xref: /xnu-12377.61.12/tests/skywalk/skywalk_test_utils.c (revision 4d495c6e23c53686cf65f45067f79024cf5dcee8)
1 /*
2  * Copyright (c) 2016-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /* This file contains useful utility routines, but contrary to skywalk_test_common
30  * Do not operate on a single set of static objects
31  */
32 
33 /*
34  * Copyright (c) 1988, 1992, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  *	@(#)in_cksum.c	8.1 (Berkeley) 6/10/93
38  */
39 
40 
41 #include <err.h>
42 #include <assert.h>
43 #include <inttypes.h>
44 #include <stdbool.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <string.h>
49 #include <unistd.h>
50 #include <poll.h>
51 #include <sys/event.h>
52 #include <uuid/uuid.h>
53 #include <arpa/inet.h>
54 #include <stddef.h>
55 #include <sysexits.h>
56 #include <sys/types.h>
57 #include <sys/sysctl.h>
58 #include <net/if_utun.h>
59 #include <net/if_ipsec.h>
60 #include <netinet/ip6.h>
61 #include <sys/kern_control.h>
62 #include <sys/ioctl.h>
63 #include <sys/socket.h>
64 #include <sys/kern_control.h>
65 #include <sys/sys_domain.h>
66 #include <ifaddrs.h>
67 #include <sys/fcntl.h>
68 #include <sys/kern_control.h>
69 #include <sys/sys_domain.h>
70 #include <net/if_utun.h>
71 #include <os/log.h>
72 
73 #include <net/pfkeyv2.h>
74 #include <netinet6/ipsec.h>
75 #include <darwintest.h>
76 
77 #include "skywalk_test_driver.h"
78 #include "skywalk_test_common.h" // XXX remove this
79 #include "skywalk_test_utils.h"
80 
81 #define SIN(s)          ((struct sockaddr_in *)(void *)s)
82 #define SIN6(s)          ((struct sockaddr_in6 *)(void *)s)
83 
84 void
sktc_build_nexus(nexus_controller_t ncd,struct sktc_nexus_attr * sktc_attr,uuid_t * providerp,uuid_t * instancep)85 sktc_build_nexus(nexus_controller_t ncd, struct sktc_nexus_attr *sktc_attr,
86     uuid_t *providerp, uuid_t *instancep)
87 {
88 	nexus_attr_t attr;
89 	int error;
90 	uint64_t scratch;
91 
92 	attr = os_nexus_attr_create();
93 	assert(attr);
94 
95 	if (sktc_attr->anonymous != -1) {
96 		error = os_nexus_attr_set(attr, NEXUS_ATTR_ANONYMOUS,
97 		    sktc_attr->anonymous);
98 		SKTC_ASSERT_ERR(!error);
99 	}
100 	if (sktc_attr->userchannel != -1) {
101 		error = os_nexus_attr_set(attr, NEXUS_ATTR_USER_CHANNEL,
102 		    sktc_attr->userchannel);
103 		SKTC_ASSERT_ERR(!error);
104 	}
105 	if (sktc_attr->ntxrings != -1) {
106 		error = os_nexus_attr_set(attr, NEXUS_ATTR_TX_RINGS,
107 		    sktc_attr->ntxrings);
108 		SKTC_ASSERT_ERR(!error);
109 	}
110 	if (sktc_attr->nrxrings != -1) {
111 		error = os_nexus_attr_set(attr, NEXUS_ATTR_RX_RINGS,
112 		    sktc_attr->nrxrings);
113 		SKTC_ASSERT_ERR(!error);
114 	}
115 	if (sktc_attr->ntxslots != -1) {
116 		error = os_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS,
117 		    sktc_attr->ntxslots);
118 		SKTC_ASSERT_ERR(!error);
119 	}
120 	if (sktc_attr->nrxslots != -1) {
121 		error = os_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS,
122 		    sktc_attr->nrxslots);
123 		SKTC_ASSERT_ERR(!error);
124 	}
125 	if (sktc_attr->slotsize != -1) {
126 		error = os_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE,
127 		    sktc_attr->slotsize);
128 		SKTC_ASSERT_ERR(!error);
129 	}
130 	if (sktc_attr->metasize != -1) {
131 		error = os_nexus_attr_set(attr, NEXUS_ATTR_SLOT_META_SIZE,
132 		    sktc_attr->metasize);
133 		SKTC_ASSERT_ERR(error == ENOTSUP);
134 	}
135 	if (sktc_attr->maxfrags != -1) {
136 		error = os_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
137 		    sktc_attr->maxfrags);
138 		SKTC_ASSERT_ERR(!error);
139 	}
140 	if (sktc_attr->rejectonclose != -1) {
141 		error = os_nexus_attr_set(attr, NEXUS_ATTR_REJECT_ON_CLOSE,
142 		    sktc_attr->rejectonclose);
143 		SKTC_ASSERT_ERR(!error);
144 	}
145 
146 	uuid_clear(*providerp);
147 	error = os_nexus_controller_register_provider(ncd,
148 	    sktc_attr->name, sktc_attr->type, attr, providerp);
149 	SKTC_ASSERT_ERR(!error);
150 	assert(!uuid_is_null(*providerp));
151 
152 	/* Clear the parameters to make sure they are being read */
153 	error = os_nexus_attr_set(attr, NEXUS_ATTR_ANONYMOUS, -1);
154 	SKTC_ASSERT_ERR(!error);
155 	error = os_nexus_attr_set(attr, NEXUS_ATTR_TX_RINGS, -1);
156 	SKTC_ASSERT_ERR(!error);
157 	error = os_nexus_attr_set(attr, NEXUS_ATTR_RX_RINGS, -1);
158 	SKTC_ASSERT_ERR(!error);
159 	error = os_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, -1);
160 	SKTC_ASSERT_ERR(!error);
161 	error = os_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, -1);
162 	SKTC_ASSERT_ERR(!error);
163 	error = os_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, -1);
164 	SKTC_ASSERT_ERR(!error);
165 	error = os_nexus_attr_set(attr, NEXUS_ATTR_SLOT_META_SIZE, -1);
166 	SKTC_ASSERT_ERR(error == ENOTSUP);
167 	error = os_nexus_attr_set(attr, NEXUS_ATTR_EXTENSIONS, -1);
168 	SKTC_ASSERT_ERR(!error);
169 	error = os_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS, -1);
170 	SKTC_ASSERT_ERR(!error);
171 	error = os_nexus_attr_set(attr, NEXUS_ATTR_REJECT_ON_CLOSE, -1);
172 	SKTC_ASSERT_ERR(!error);
173 
174 	error = os_nexus_controller_read_provider_attr(ncd,
175 	    *providerp, attr);
176 	SKTC_ASSERT_ERR(!error);
177 
178 	scratch = -1;
179 	error = os_nexus_attr_get(attr, NEXUS_ATTR_ANONYMOUS, &scratch);
180 	SKTC_ASSERT_ERR(!error);
181 	assert(scratch != -1);
182 	assert(sktc_attr->anonymous == -1 || sktc_attr->anonymous == scratch);
183 
184 	scratch = -1;
185 	error = os_nexus_attr_get(attr, NEXUS_ATTR_USER_CHANNEL, &scratch);
186 	SKTC_ASSERT_ERR(!error);
187 	assert(scratch != -1);
188 	assert(sktc_attr->userchannel == -1 ||
189 	    sktc_attr->userchannel == scratch);
190 
191 	scratch = -1;
192 	error = os_nexus_attr_get(attr, NEXUS_ATTR_TX_RINGS, &scratch);
193 	SKTC_ASSERT_ERR(!error);
194 	assert(scratch != -1);
195 	assert(sktc_attr->ntxrings == -1 || sktc_attr->ntxrings == scratch);
196 
197 	scratch = -1;
198 	error = os_nexus_attr_get(attr, NEXUS_ATTR_RX_RINGS, &scratch);
199 	SKTC_ASSERT_ERR(!error);
200 	assert(scratch != -1);
201 	assert(sktc_attr->nrxrings == -1 || sktc_attr->nrxrings == scratch);
202 
203 	scratch = -1;
204 	error = os_nexus_attr_get(attr, NEXUS_ATTR_TX_SLOTS, &scratch);
205 	SKTC_ASSERT_ERR(!error);
206 	assert(scratch != -1);
207 	assert(sktc_attr->ntxslots == -1 || sktc_attr->ntxslots == scratch);
208 
209 	scratch = -1;
210 	error = os_nexus_attr_get(attr, NEXUS_ATTR_RX_SLOTS, &scratch);
211 	SKTC_ASSERT_ERR(!error);
212 	assert(scratch != -1);
213 	assert(sktc_attr->nrxslots == -1 || sktc_attr->nrxslots == scratch);
214 
215 	scratch = -1;
216 	error = os_nexus_attr_get(attr, NEXUS_ATTR_SLOT_BUF_SIZE, &scratch);
217 	SKTC_ASSERT_ERR(!error);
218 	assert(scratch != -1);
219 	assert(sktc_attr->slotsize == -1 || sktc_attr->slotsize == scratch);
220 
221 	scratch = -1;
222 	error = os_nexus_attr_get(attr, NEXUS_ATTR_SLOT_META_SIZE, &scratch);
223 	SKTC_ASSERT_ERR(!error);
224 	assert(scratch != -1);
225 	assert(sktc_attr->metasize == -1 || sktc_attr->metasize == scratch);
226 
227 	scratch = -1;
228 	error = os_nexus_attr_get(attr, NEXUS_ATTR_MAX_FRAGS, &scratch);
229 	SKTC_ASSERT_ERR(!error);
230 	assert(scratch != -1);
231 	assert(sktc_attr->maxfrags == -1 || sktc_attr->maxfrags == scratch);
232 
233 	scratch = -1;
234 	error = os_nexus_attr_get(attr, NEXUS_ATTR_REJECT_ON_CLOSE, &scratch);
235 	SKTC_ASSERT_ERR(!error);
236 	assert(scratch != -1);
237 	assert(sktc_attr->rejectonclose == -1 ||
238 	    sktc_attr->rejectonclose == scratch);
239 
240 	os_nexus_attr_destroy(attr);
241 
242 	if (instancep) {
243 		uuid_clear(*instancep);
244 		error = os_nexus_controller_alloc_provider_instance(ncd,
245 		    *providerp, instancep);
246 		SKTC_ASSERT_ERR(!error);
247 		assert(!uuid_is_null(*instancep));
248 	}
249 }
250 
251 /* up to 4 seconds of retries (250ms delay per retry) */
252 #define SKTU_CHANNEL_CREATE_NOMEM_RETRIES       16
253 
254 channel_t
sktu_channel_create_extended(const uuid_t uuid,const nexus_port_t port,const ring_dir_t dir,const ring_id_t rid,const channel_attr_t attr,uint64_t exclusive,uint64_t txlowatunit,uint64_t txlowatval,uint64_t rxlowatunit,uint64_t rxlowatval,uint64_t userpacketpool,uint64_t defunctok,uint64_t event_ring,uint64_t low_latency)255 sktu_channel_create_extended(const uuid_t uuid, const nexus_port_t port,
256     const ring_dir_t dir, const ring_id_t rid, const channel_attr_t attr,
257     uint64_t exclusive, uint64_t txlowatunit, uint64_t txlowatval,
258     uint64_t rxlowatunit, uint64_t rxlowatval, uint64_t userpacketpool,
259     uint64_t defunctok, uint64_t event_ring, uint64_t low_latency)
260 {
261 	channel_attr_t tmpattr;
262 	int error;
263 	uint64_t scratch;
264 	static struct timespec delay250ms = { .tv_sec = 0, .tv_nsec = 250000000 };
265 	uint32_t retries = 0;
266 	channel_t ret = NULL;
267 
268 	if (!attr) {
269 		tmpattr = os_channel_attr_create();
270 	} else {
271 		tmpattr = attr;
272 	}
273 
274 	if (exclusive != -1) {
275 		error = os_channel_attr_set(tmpattr, CHANNEL_ATTR_EXCLUSIVE, exclusive);
276 		SKTC_ASSERT_ERR(!error);
277 	}
278 
279 	if (txlowatunit != -1) {
280 		error = os_channel_attr_set(tmpattr, CHANNEL_ATTR_TX_LOWAT_UNIT, txlowatunit);
281 		SKTC_ASSERT_ERR(!error);
282 	}
283 
284 	if (txlowatval != -1) {
285 		error = os_channel_attr_set(tmpattr, CHANNEL_ATTR_TX_LOWAT_VALUE, txlowatval);
286 		SKTC_ASSERT_ERR(!error);
287 	}
288 
289 	if (rxlowatunit != -1) {
290 		error = os_channel_attr_set(tmpattr, CHANNEL_ATTR_RX_LOWAT_UNIT, rxlowatunit);
291 		SKTC_ASSERT_ERR(!error);
292 	}
293 
294 	if (rxlowatval != -1) {
295 		error = os_channel_attr_set(tmpattr, CHANNEL_ATTR_RX_LOWAT_VALUE, rxlowatval);
296 		SKTC_ASSERT_ERR(!error);
297 	}
298 
299 	if (userpacketpool != -1) {
300 		error = os_channel_attr_set(tmpattr, CHANNEL_ATTR_USER_PACKET_POOL, userpacketpool);
301 		SKTC_ASSERT_ERR(!error);
302 	}
303 
304 	if (defunctok != -1) {
305 		error = os_channel_attr_set(tmpattr, CHANNEL_ATTR_NEXUS_DEFUNCT_OK, defunctok);
306 		SKTC_ASSERT_ERR(!error);
307 	}
308 
309 	if (event_ring != -1) {
310 		error = os_channel_attr_set(tmpattr, CHANNEL_ATTR_EVENT_RING, event_ring);
311 		SKTC_ASSERT_ERR(!error);
312 	}
313 
314 	if (low_latency != -1) {
315 		error = os_channel_attr_set(tmpattr, CHANNEL_ATTR_LOW_LATENCY, low_latency);
316 		SKTC_ASSERT_ERR(!error);
317 	}
318 
319 retry:
320 	ret = os_channel_create_extended(uuid, port, dir, rid, tmpattr);
321 	if (ret == NULL) {
322 		if (errno == ENOMEM && ++retries < SKTU_CHANNEL_CREATE_NOMEM_RETRIES) {
323 			nanosleep(&delay250ms, NULL);
324 			goto retry;
325 		}
326 		goto out;
327 	}
328 
329 	scratch = -1;
330 	error = os_channel_attr_get(tmpattr, CHANNEL_ATTR_EXCLUSIVE, &scratch);
331 	SKTC_ASSERT_ERR(!error);
332 	assert(scratch != 1);
333 	assert(exclusive == -1 || exclusive == scratch);
334 
335 	scratch = -1;
336 	error = os_channel_attr_get(tmpattr, CHANNEL_ATTR_TX_LOWAT_UNIT, &scratch);
337 	SKTC_ASSERT_ERR(!error);
338 	assert(scratch != -1);
339 	assert(exclusive == -1 || txlowatunit == scratch);
340 
341 	scratch = -1;
342 	error = os_channel_attr_get(tmpattr, CHANNEL_ATTR_TX_LOWAT_VALUE, &scratch);
343 	SKTC_ASSERT_ERR(!error);
344 	assert(scratch != -1);
345 	assert(exclusive == -1 || txlowatval == scratch);
346 
347 	scratch = -1;
348 	error = os_channel_attr_get(tmpattr, CHANNEL_ATTR_RX_LOWAT_UNIT, &scratch);
349 	SKTC_ASSERT_ERR(!error);
350 	assert(scratch != -1);
351 	assert(exclusive == -1 || rxlowatunit == scratch);
352 
353 	scratch = -1;
354 	error = os_channel_attr_get(tmpattr, CHANNEL_ATTR_RX_LOWAT_VALUE, &scratch);
355 	SKTC_ASSERT_ERR(!error);
356 	assert(scratch != -1);
357 	assert(exclusive == -1 || rxlowatval == scratch);
358 
359 	scratch = -1;
360 	error = os_channel_attr_get(tmpattr, CHANNEL_ATTR_USER_PACKET_POOL, &scratch);
361 	SKTC_ASSERT_ERR(!error);
362 	assert(scratch != -1);
363 	assert(exclusive == -1 || userpacketpool == scratch);
364 
365 	scratch = -1;
366 	error = os_channel_attr_get(tmpattr, CHANNEL_ATTR_NEXUS_DEFUNCT_OK, &scratch);
367 	SKTC_ASSERT_ERR(!error);
368 	assert(scratch != -1);
369 	assert(exclusive == -1 || defunctok == scratch);
370 
371 	scratch = -1;
372 	error = os_channel_attr_get(tmpattr, CHANNEL_ATTR_EVENT_RING, &scratch);
373 	SKTC_ASSERT_ERR(!error);
374 	assert(scratch != -1);
375 	assert(exclusive == -1 || event_ring == scratch);
376 
377 	scratch = -1;
378 	error = os_channel_attr_get(tmpattr, CHANNEL_ATTR_LOW_LATENCY, &scratch);
379 	SKTC_ASSERT_ERR(!error);
380 	assert(scratch != -1);
381 	assert(exclusive == -1 || low_latency == scratch);
382 
383 out:
384 	if (!attr) {
385 		os_channel_attr_destroy(tmpattr);
386 	}
387 
388 	return ret;
389 }
390 
391 /****************************************************************/
392 
393 static inline void
swap(int * permute,int i,int j)394 swap(int *permute, int i, int j)
395 {
396 	int tmp = permute[i];
397 	permute[i] = permute[j];
398 	permute[j] = tmp;
399 }
400 
401 
402 /* Plain changes, see Knuth (7.2.1.2) "Algorithm P"
403  * has advantage of only swapping adjacent pairs
404  * This could be cleaned up to be more "C" like, but
405  * this literal translation works without fanfare.
406  */
407 void
permutefuncP(int n,int * permute,void (* func)(int,int * permute))408 permutefuncP(int n, int *permute, void (*func)(int, int *permute))
409 {
410 	int j, s, q;
411 	int c[n], o[n];
412 	/* P1 Initialize. */
413 	for (j = 0; j < n; j++) {
414 		c[j] = 0;
415 		o[j] = 1;
416 	}
417 p2:
418 	/* P2 Visit. */
419 	func(n, permute);
420 	/* P3 Prepare for change. */
421 	j = n;
422 	s = 0;
423 p4:
424 	/* P4 Ready to change? */
425 	q = c[j - 1] + o[j - 1];
426 	if (q < 0) {
427 		goto p7;
428 	}
429 	if (q == j) {
430 		goto p6;
431 	}
432 	/* P5 Change. */
433 	{
434 		//T_LOG("Swapping %d with %d\n", j-c[j-1]+s-1, j-q+s-1);
435 		swap(permute, j - c[j - 1] + s - 1, j - q + s - 1);
436 	}
437 	c[j - 1] = q;
438 	goto p2;
439 p6:     /* P6 Increase s */
440 	if (j == 1) {
441 		return;
442 	}
443 	s++;
444 p7:     /* P7 Switch Direction */
445 	o[j - 1] = -o[j - 1];
446 	j--;
447 	goto p4;
448 }
449 
450 /* Heap's algorithm */
451 void
permutefuncH(int n,int * permute,void (* func)(int,int * permute))452 permutefuncH(int n, int *permute, void (*func)(int, int *permute))
453 {
454 	time_t start = time(NULL);
455 	time_t now, then = start;
456 	int count = 0;
457 	int total = 1;
458 	int i = 0;
459 	int c[n];
460 	memset(c, 0, sizeof(c));
461 	for (int f = 2; f <= n; f++) {
462 		total *= f;
463 	}
464 	count++;
465 	func(n, permute);
466 	while (i < n) {
467 		if (c[i] < i) {
468 			if (!(i & 1)) { /* Even */
469 				swap(permute, i, 0);
470 			} else { /* Odd */
471 				swap(permute, i, c[i]);
472 			}
473 			count++;
474 			{
475 				now = time(NULL);
476 				if (now > then) {
477 					T_LOG("time %ld on %d of %d (%2.2f%%, est %ld secs left)\n",
478 					    now - start, count, total,
479 					    (double)count * 100 / total,
480 					    (long)((double)(now - start) * total / count) - (now - start));
481 					then = now;
482 				}
483 			}
484 			func(n, permute);
485 			c[i] += 1;
486 			i = 0;
487 		} else {
488 			c[i] = 0;
489 			i++;
490 		}
491 	}
492 	now = time(NULL);
493 	T_LOG("total time %ld for %d permutations (rate %.2f)\n",
494 	    now - start, total, (double)total / (now - start));
495 }
496 
497 /* Random permutations, knuth's shuffle */
498 
499 void
permutefuncR(int n,int * permute,void (* func)(int,int * permute),int total,unsigned seed)500 permutefuncR(int n, int *permute, void (*func)(int, int *permute), int total, unsigned seed)
501 {
502 	time_t start = time(NULL);
503 	time_t now, then = start;
504 	int count = 0;
505 	T_LOG("Starting %d random permutations with seed %u\n", total, seed);
506 	srandom(seed);
507 	while (count < total) {
508 		for (int i = n - 1; i > 0; i--) {
509 			int j = random() % i; // XXX modulo bias.
510 			swap(permute, i, j);
511 		}
512 		count++;
513 		{
514 			now = time(NULL);
515 			if (now > then) {
516 				T_LOG("time %ld on %d of %d (%2.2f%%, est %ld secs left)\n",
517 				    now - start, count, total,
518 				    (double)count * 100 / total,
519 				    (long)((double)(now - start) * total / count) - (now - start));
520 				then = now;
521 			}
522 		}
523 		func(n, permute);
524 	}
525 	now = time(NULL);
526 	T_LOG("total time %ld for %d permutations (rate %.2f)\n",
527 	    now - start, total, (double)total / (now - start));
528 }
529 
530 
531 /*
532  * rakes each element across all other elements.
533  */
534 void
permutefuncZ(int n,int * permute,void (* func)(int,int * permute))535 permutefuncZ(int n, int *permute, void (*func)(int, int *permute))
536 {
537 	int save[n];
538 	memcpy(save, permute, sizeof(save));
539 	func(n, permute);
540 	for (int i = 0; i < n; i++) {
541 		//T_LOG("raking %d left\n", i);
542 		memcpy(permute, save, sizeof(save));
543 		for (int j = i; j > 0; j--) {
544 			swap(permute, j, j - 1);
545 			func(n, permute);
546 		}
547 		//T_LOG("raking %d right\n", i);
548 		memcpy(permute, save, sizeof(save));
549 		for (int j = i; j < n - 1; j++) {
550 			swap(permute, j, j + 1);
551 			/* The first right is the same as the last left, so skip it */
552 			if (j != i) {
553 				func(n, permute);
554 			}
555 		}
556 	}
557 }
558 
559 /****************************************************************/
560 
561 void
sktc_create_flowswitch_no_address(struct sktc_nexus_handles * handles,uint64_t ntxslots,uint64_t nrxslots,uint64_t buf_size,uint64_t max_frags,uint64_t anonymous)562 sktc_create_flowswitch_no_address(struct sktc_nexus_handles *handles,
563     uint64_t ntxslots, uint64_t nrxslots, uint64_t buf_size, uint64_t max_frags,
564     uint64_t anonymous)
565 {
566 	char buf[256];
567 	int error;
568 	struct sktc_nexus_attr attr = SKTC_NEXUS_ATTR_INIT();
569 
570 	attr.ntxslots = ntxslots;
571 	attr.nrxslots = nrxslots;
572 	attr.slotsize = buf_size;
573 	attr.anonymous = anonymous;
574 	attr.maxfrags = max_frags;
575 
576 	if (handles->netif_ifname[0] == '\0') {
577 		T_LOG("%s: no interface name specified\n",
578 		    __func__);
579 		return;
580 	}
581 	if (strlen(handles->netif_ifname) >= IFNAMSIZ) {
582 		T_LOG("%s: invalid interface name specified %s\n",
583 		    __func__, handles->netif_ifname);
584 		return;
585 	}
586 	handles->controller = os_nexus_controller_create();
587 	if (handles->controller == NULL) {
588 		SKT_LOG(
589 			"%s: os_nexus_controller_create failed, %s (%d)\n",
590 			__func__, strerror(errno), errno);
591 		return;
592 	}
593 
594 	snprintf(buf, sizeof(buf), "ms_fsw_%s", handles->netif_ifname);
595 	strncpy((char *)attr.name, buf, sizeof(nexus_name_t) - 1);
596 	attr.type = NEXUS_TYPE_FLOW_SWITCH;
597 	sktc_build_nexus(handles->controller, &attr, &handles->fsw_prov_uuid,
598 	    &handles->fsw_nx_uuid);
599 
600 	/* if the netif is already present, don't bother creating/attaching */
601 	if (!sktc_get_netif_nexus(handles->netif_ifname,
602 	    handles->netif_nx_uuid)) {
603 		snprintf(buf, sizeof(buf), "netif_%s", handles->netif_ifname);
604 		strncpy((char *)attr.name, buf, sizeof(nexus_name_t) - 1);
605 		attr.type = NEXUS_TYPE_NET_IF;
606 		attr.ntxslots = -1;
607 		attr.nrxslots = -1;
608 		sktc_build_nexus(handles->controller, &attr,
609 		    &handles->netif_prov_uuid, &handles->netif_nx_uuid);
610 		error = __os_nexus_ifattach(handles->controller,
611 		    handles->netif_nx_uuid,
612 		    handles->netif_ifname, NULL,
613 		    false,
614 		    &handles->netif_nx_attach_uuid);
615 		if (error != 0) {
616 			SKT_LOG(
617 				"__os_nexus_ifattach(%s) failed, %s (%d)\n",
618 				buf, strerror(errno), errno);
619 			return;
620 		}
621 	}
622 	error = __os_nexus_ifattach(handles->controller, handles->fsw_nx_uuid,
623 	    NULL, handles->netif_nx_uuid, false, &handles->fsw_nx_dev_attach_uuid);
624 	if (error != 0) {
625 		SKT_LOG("__os_nexus_ifattach() failed, %s (%d)\n",
626 		    strerror(errno), errno);
627 		return;
628 	}
629 }
630 
631 
632 void
sktc_nexus_handles_assign_address(struct sktc_nexus_handles * handles)633 sktc_nexus_handles_assign_address(struct sktc_nexus_handles *handles)
634 {
635 	int             error;
636 
637 	error = sktc_ifnet_add_addr(handles->netif_ifname,
638 	    &handles->netif_addr,
639 	    &handles->netif_mask, NULL);
640 	SKTC_ASSERT_ERR(!error);
641 }
642 
643 void
sktc_create_flowswitch(struct sktc_nexus_handles * handles,int i)644 sktc_create_flowswitch(struct sktc_nexus_handles *handles, int i)
645 {
646 	uint16_t        val;
647 
648 	/* assign the name */
649 	snprintf(handles->netif_ifname, sizeof(handles->netif_ifname),
650 	    FETH_FORMAT, i);
651 
652 	/* pick/assign a random IPv4LL address */
653 	val = random() % 0xffff;
654 	/* avoid subnet broadcast and host address 0 */
655 	if (((val & 0xff) == 0) || ((val & 0xff) == 0xff)) {
656 		val = (val & 0xfff0) | 0x2;
657 	}
658 	handles->netif_addr = sktc_make_in_addr(IN_LINKLOCALNETNUM | val);
659 	handles->netif_mask = sktc_make_in_addr(IN_CLASSC_NET);
660 	sktc_nexus_handles_assign_address(handles);
661 
662 	/* create the flowswitch */
663 	sktc_create_flowswitch_no_address(handles, -1, -1, -1, -1, 1);
664 }
665 
666 void
sktc_cleanup_flowswitch(struct sktc_nexus_handles * handles)667 sktc_cleanup_flowswitch(struct sktc_nexus_handles *handles)
668 {
669 	int error;
670 
671 	assert(handles->controller);
672 	assert(!uuid_is_null(handles->fsw_prov_uuid));
673 	assert(!uuid_is_null(handles->fsw_nx_uuid));
674 
675 	error = os_nexus_controller_free_provider_instance(handles->controller,
676 	    handles->fsw_nx_uuid);
677 	SKTC_ASSERT_ERR(!error);
678 
679 	error = os_nexus_controller_deregister_provider(handles->controller,
680 	    handles->fsw_prov_uuid);
681 	SKTC_ASSERT_ERR(!error);
682 
683 	os_nexus_controller_destroy(handles->controller);
684 
685 	error = sktc_ifnet_del_addr(handles->netif_ifname, &handles->netif_addr);
686 	SKTC_ASSERT_ERR(!error);
687 }
688 
689 /****************************************************************/
690 
691 int
sktc_bind_tcp4_flow(nexus_controller_t ncd,const uuid_t fsw,in_port_t in_port,nexus_port_t nx_port,const uuid_t flow)692 sktc_bind_tcp4_flow(nexus_controller_t ncd, const uuid_t fsw, in_port_t in_port, nexus_port_t nx_port, const uuid_t flow)
693 {
694 	struct nx_flow_req nfr;
695 	int error;
696 
697 	memset(&nfr, 0, sizeof(nfr));
698 	nfr.nfr_ip_protocol = IPPROTO_TCP;
699 	nfr.nfr_nx_port = nx_port;
700 	nfr.nfr_saddr.sa.sa_len = sizeof(struct sockaddr_in);
701 	nfr.nfr_saddr.sa.sa_family = AF_INET;
702 	nfr.nfr_saddr.sin.sin_port = htons(in_port);
703 	nfr.nfr_saddr.sin.sin_addr.s_addr = htonl(INADDR_ANY);
704 	uuid_copy(nfr.nfr_flow_uuid, flow);
705 
706 #if 0
707 	char buf[31];
708 	uuid_string_t uuidstr;
709 	uuid_unparse(nfr.nfr_flow_uuid, uuidstr);
710 	inet_ntop(AF_INET, &nfr.nfr_saddr.sin.sin_addr.s_addr, buf, sizeof(buf));
711 	T_LOG("before: nx_port %3d Flow %s %s addr %s port %d\n",
712 	    nfr.nfr_nx_port, uuidstr, (nfr.nfr_ip_protocol == IPPROTO_TCP) ? "tcp" : "udp",
713 	    buf, ntohs(nfr.nfr_saddr.sin.sin_port));
714 #endif
715 
716 	error = __os_nexus_flow_add(ncd, fsw, &nfr);
717 #if 0
718 	if (error) {
719 		T_LOG("__os_nexus_flow_add returned %d, errno %d\n", error, errno);
720 	}
721 #endif
722 
723 #if 0
724 	uuid_unparse(nfr.nfr_flow_uuid, uuidstr);
725 	inet_ntop(AF_INET, &nfr.nfr_saddr.sin.sin_addr.s_addr, buf, sizeof(buf));
726 	T_LOG("after:  nx_port %3d Flow %s %s addr %s port %d\n",
727 	    nfr.nfr_nx_port, uuidstr, (nfr.nfr_ip_protocol == IPPROTO_TCP) ? "tcp" : "udp",
728 	    buf, ntohs(nfr.nfr_saddr.sin.sin_port));
729 #endif
730 
731 	// XXX fails, see the fswbind25 for standalone test for this
732 	assert(nfr.nfr_nx_port == nx_port);
733 	SKT_LOG("got ephemeral port %d\n", ntohs(nfr.nfr_saddr.sin.sin_port));
734 
735 	/* Validate the ephemeral ports */
736 	if (!error && !in_port) {
737 		static int first, last;
738 		if (!first && !last) {
739 			size_t size;
740 
741 			size = sizeof(first);
742 			error = sysctlbyname("net.inet.ip.portrange.first", &first, &size, NULL, 0);
743 			SKTC_ASSERT_ERR(!error);
744 			assert(size == sizeof(first));
745 
746 			size = sizeof(last);
747 			error = sysctlbyname("net.inet.ip.portrange.last", &last, &size, NULL, 0);
748 			SKTC_ASSERT_ERR(!error);
749 			assert(size == sizeof(last));
750 
751 			T_LOG("ephemeral port range first %d last %d\n", first, last);
752 
753 			if (last < first) {
754 				int tmp = first;
755 				first = last;
756 				last = tmp;
757 			}
758 			assert(first <= last);
759 		}
760 		assert(ntohs(nfr.nfr_saddr.sin.sin_port) >= first);
761 		assert(ntohs(nfr.nfr_saddr.sin.sin_port) <= last);
762 	}
763 
764 	return error;
765 }
766 
767 int
sktc_unbind_flow(nexus_controller_t ncd,const uuid_t fsw,const uuid_t flow)768 sktc_unbind_flow(nexus_controller_t ncd, const uuid_t fsw, const uuid_t flow)
769 {
770 	struct nx_flow_req nfr;
771 	int error;
772 
773 	memset(&nfr, 0, sizeof(nfr));
774 	uuid_copy(nfr.nfr_flow_uuid, flow);
775 
776 	error = __os_nexus_flow_del(ncd, fsw, &nfr);
777 	if (error) {
778 		SKT_LOG("__os_nexus_flow_add returned %d, errno %d\n", error, errno);
779 	}
780 	return error;
781 }
782 
783 /****************************************************************/
784 
785 uint32_t
sktc_chew_random(channel_t channel,channel_ring_t ring,sync_mode_t mode,bool dosync,uint32_t nslots)786 sktc_chew_random(channel_t channel, channel_ring_t ring, sync_mode_t mode, bool dosync, uint32_t nslots)
787 {
788 	uint64_t count = 0;
789 	int error;
790 	channel_slot_t slot;
791 
792 	/* Chew a random number of slots */
793 	nslots = random() % (nslots + 1);
794 
795 	slot = NULL;
796 	while (count < nslots) {
797 		slot_prop_t prop;
798 
799 		slot = os_channel_get_next_slot(ring, slot, &prop);
800 		assert(slot);
801 		if (mode == CHANNEL_SYNC_TX) {
802 			packet_t pkt = os_channel_slot_get_packet(ring, slot);
803 			buflet_t buf = os_packet_get_next_buflet(pkt, NULL);
804 			assert(buf != NULL);
805 			uint16_t bdlim = os_buflet_get_data_limit(buf);
806 			assert(bdlim != 0);
807 			prop.sp_len = random() % bdlim;
808 			os_channel_set_slot_properties(ring, slot, &prop);
809 		}
810 		count++;
811 	}
812 
813 	if (slot) {
814 		error = os_channel_advance_slot(ring, slot);
815 		SKTC_ASSERT_ERR(!error);
816 	}
817 
818 	if (dosync) {
819 		error = os_channel_sync(channel, mode);
820 		if (skywalk_in_driver && error) {
821 			SKT_LOG("%s: sync fail error %d errno %d: %s\n", __func__, error, errno, strerror(errno));
822 		} else {
823 			SKTC_ASSERT_ERR(!error);
824 		}
825 	}
826 
827 	return count;
828 }
829 
830 /* This pumps slots on a ring until count slots have been tranferred */
831 void
sktc_pump_ring_nslots_kq(channel_t channel,channel_ring_t ring,sync_mode_t mode,bool dosync,uint64_t nslots,bool verbose)832 sktc_pump_ring_nslots_kq(channel_t channel, channel_ring_t ring, sync_mode_t mode, bool dosync, uint64_t nslots, bool verbose)
833 {
834 	uint64_t count = 0;
835 	int channelfd;
836 	int kq;
837 	struct kevent kev;
838 	int error;
839 	time_t start, then;
840 
841 	channelfd = os_channel_get_fd(channel);
842 	assert(channelfd != -1);
843 
844 	kq = kqueue();
845 	assert(kq != -1);
846 	EV_SET(&kev, channelfd,
847 	    mode == CHANNEL_SYNC_TX ? EVFILT_WRITE : EVFILT_READ,
848 	    EV_ADD | EV_ENABLE, 0, 0, NULL);
849 	error = kevent(kq, &kev, 1, NULL, 0, NULL);
850 	SKTC_ASSERT_ERR(!error);
851 
852 	if (verbose) {
853 		then = start = time(NULL);
854 	}
855 
856 	while (count < nslots) {
857 		uint32_t avail;
858 
859 		if (verbose) {
860 			time_t now = time(NULL);
861 			if (now > then) {
862 				T_LOG("time %ld pump %"PRId64" of %"PRId64" (%2.2f%%, est %ld secs left)\n",
863 				    now - start, count, nslots,
864 				    (double)count * 100 / nslots,
865 				    (long)((double)(now - start) * nslots / count) - (now - start));
866 				then = now;
867 			}
868 		}
869 
870 		avail = os_channel_available_slot_count(ring);
871 
872 		if (!avail) {
873 			int error;
874 
875 			memset(&kev, 0, sizeof(kev));
876 			error = kevent(kq, NULL, 0, &kev, 1, NULL);
877 			SKTC_ASSERT_ERR(error != -1);
878 			SKTC_ASSERT_ERR(error == 1);
879 
880 			assert(kev.ident == channelfd);
881 			if (mode == CHANNEL_SYNC_TX) {
882 				assert(kev.filter == EVFILT_WRITE);
883 			} else {
884 				assert(kev.filter == EVFILT_READ);
885 			}
886 
887 			avail = os_channel_available_slot_count(ring);
888 			assert(avail);
889 		}
890 
891 		count += sktc_chew_random(channel, ring, mode, dosync, MIN(nslots - count, avail));
892 	}
893 
894 	if (verbose) {
895 		time_t now = time(NULL);
896 		T_LOG("total time %ld for %"PRId64" slots (rate %.2f)\n",
897 		    now - start, nslots, (double)nslots / (now - start));
898 	}
899 
900 	error = close(kq);
901 	SKTC_ASSERT_ERR(!error);
902 }
903 
904 void
sktc_pump_ring_nslots_select(channel_t channel,channel_ring_t ring,sync_mode_t mode,bool dosync,uint64_t nslots,bool verbose)905 sktc_pump_ring_nslots_select(channel_t channel, channel_ring_t ring, sync_mode_t mode, bool dosync, uint64_t nslots, bool verbose)
906 {
907 	uint64_t count = 0;
908 	int channelfd;
909 	fd_set readfds, writefds, errorfds, zerofds;
910 	time_t start, then;
911 
912 	channelfd = os_channel_get_fd(channel);
913 	assert(channelfd != -1);
914 
915 	FD_ZERO(&zerofds);
916 	FD_ZERO(&readfds);
917 	FD_ZERO(&writefds);
918 	FD_ZERO(&errorfds);
919 	if (mode == CHANNEL_SYNC_TX) {
920 		FD_SET(channelfd, &writefds);
921 	} else {
922 		FD_SET(channelfd, &readfds);
923 	}
924 
925 	if (verbose) {
926 		then = start = time(NULL);
927 	}
928 
929 	while (count < nslots) {
930 		uint32_t avail;
931 
932 		if (verbose) {
933 			time_t now = time(NULL);
934 			if (now > then) {
935 				T_LOG("time %ld pump %"PRId64" of %"PRId64" (%2.2f%%, est %ld secs left)\n",
936 				    now - start, count, nslots,
937 				    (double)count * 100 / nslots,
938 				    (long)((double)(now - start) * nslots / count) - (now - start));
939 				then = now;
940 			}
941 		}
942 
943 		avail = os_channel_available_slot_count(ring);
944 
945 		if (!avail) {
946 			int error;
947 
948 			FD_SET(channelfd, &errorfds);
949 			error = select(channelfd + 1, &readfds, &writefds, &errorfds, NULL);
950 			SKTC_ASSERT_ERR(error != -1);
951 			assert(!memcmp(&zerofds, &errorfds, sizeof(zerofds)));
952 			if (mode == CHANNEL_SYNC_TX) {
953 				assert(FD_ISSET(channelfd, &writefds));
954 				assert(!memcmp(&zerofds, &readfds, sizeof(zerofds)));
955 			} else {
956 				assert(FD_ISSET(channelfd, &readfds));
957 				assert(!memcmp(&zerofds, &writefds, sizeof(zerofds)));
958 			}
959 			SKTC_ASSERT_ERR(error == 1);
960 
961 			avail = os_channel_available_slot_count(ring);
962 			assert(avail);
963 		}
964 
965 		count += sktc_chew_random(channel, ring, mode, dosync, MIN(nslots - count, avail));
966 	}
967 
968 	if (verbose) {
969 		time_t now = time(NULL);
970 		T_LOG("total time %ld for %"PRId64" slots (rate %.2f)\n",
971 		    now - start, nslots, (double)nslots / (now - start));
972 	}
973 }
974 
975 void
sktc_pump_ring_nslots_poll(channel_t channel,channel_ring_t ring,sync_mode_t mode,bool dosync,uint64_t nslots,bool verbose)976 sktc_pump_ring_nslots_poll(channel_t channel, channel_ring_t ring, sync_mode_t mode, bool dosync, uint64_t nslots, bool verbose)
977 {
978 	uint64_t count = 0;
979 	int channelfd;
980 	struct pollfd fds;
981 	time_t start, then;
982 
983 	channelfd = os_channel_get_fd(channel);
984 	assert(channelfd != -1);
985 
986 	fds.fd = channelfd;
987 	if (mode == CHANNEL_SYNC_TX) {
988 		fds.events = POLLWRNORM;
989 	} else {
990 		fds.events = POLLRDNORM;
991 	}
992 
993 	if (verbose) {
994 		then = start = time(NULL);
995 	}
996 
997 	while (count < nslots) {
998 		uint32_t avail;
999 
1000 		if (verbose) {
1001 			time_t now = time(NULL);
1002 			if (now > then) {
1003 				T_LOG("time %ld pump %"PRId64" of %"PRId64" (%2.2f%%, est %ld secs left)\n",
1004 				    now - start, count, nslots,
1005 				    (double)count * 100 / nslots,
1006 				    (long)((double)(now - start) * nslots / count) - (now - start));
1007 				then = now;
1008 			}
1009 		}
1010 
1011 		avail = os_channel_available_slot_count(ring);
1012 
1013 		if (!avail) {
1014 			int error;
1015 
1016 			error = poll(&fds, 1, -1);
1017 			SKTC_ASSERT_ERR(error != -1);
1018 			SKTC_ASSERT_ERR(error == 1);
1019 			assert(fds.fd == channelfd);
1020 			if (mode == CHANNEL_SYNC_TX) {
1021 				assert(fds.events == POLLWRNORM);
1022 				assert(fds.revents == POLLWRNORM);
1023 			} else {
1024 				assert(fds.events == POLLRDNORM);
1025 				assert(fds.revents == POLLRDNORM);
1026 			}
1027 
1028 			avail = os_channel_available_slot_count(ring);
1029 			assert(avail);
1030 		}
1031 
1032 		count += sktc_chew_random(channel, ring, mode, dosync, MIN(nslots - count, avail));
1033 	}
1034 
1035 	if (verbose) {
1036 		time_t now = time(NULL);
1037 		T_LOG("total time %ld for %"PRId64" slots (rate %.2f)\n",
1038 		    now - start, nslots, (double)nslots / (now - start));
1039 	}
1040 }
1041 
1042 /****************************************************************/
1043 
1044 void
sktc_raise_file_limit(int new)1045 sktc_raise_file_limit(int new)
1046 {
1047 	int error;
1048 	struct rlimit rl;
1049 
1050 	error = getrlimit(RLIMIT_NOFILE, &rl);
1051 	SKTC_ASSERT_ERR(!error);
1052 
1053 	if (rl.rlim_cur < new) {
1054 		T_LOG("raising file open limit from %llu (max %llu) to %d\n",
1055 		    rl.rlim_cur, rl.rlim_max, new);
1056 		rl.rlim_cur = new;
1057 		rl.rlim_max = new;
1058 		error = setrlimit(RLIMIT_NOFILE, &rl);
1059 		SKTC_ASSERT_ERR(!error);
1060 	}
1061 }
1062 
1063 
1064 /****************************************************************/
1065 
1066 int
sktu_create_interface(sktu_if_type_t type,sktu_if_flag_t flags)1067 sktu_create_interface(sktu_if_type_t type, sktu_if_flag_t flags)
1068 {
1069 	struct ctl_info kernctl_info;
1070 	struct sockaddr_ctl kernctl_addr;
1071 	int error;
1072 	int tunsock;
1073 	const char *CONTROL_NAME;
1074 	int OPT_ENABLE_NETIF, OPT_ATTACH_FSW, OPT_ENABLE_CHANNEL;
1075 	int enable_netif, attach_fsw, enable_channel;
1076 	int scratch;
1077 
1078 	assert(type == SKTU_IFT_UTUN || type == SKTU_IFT_IPSEC);
1079 	if (type == SKTU_IFT_UTUN) {
1080 		CONTROL_NAME = UTUN_CONTROL_NAME;
1081 		OPT_ENABLE_NETIF = UTUN_OPT_ENABLE_NETIF;
1082 		OPT_ATTACH_FSW = UTUN_OPT_ATTACH_FLOWSWITCH;
1083 		OPT_ENABLE_CHANNEL = UTUN_OPT_ENABLE_CHANNEL;
1084 	} else {
1085 		CONTROL_NAME = IPSEC_CONTROL_NAME;
1086 		OPT_ENABLE_NETIF = IPSEC_OPT_ENABLE_NETIF;
1087 		OPT_ATTACH_FSW = 0;
1088 		OPT_ENABLE_CHANNEL = IPSEC_OPT_ENABLE_CHANNEL;
1089 	}
1090 
1091 	enable_netif = ((flags & SKTU_IFF_ENABLE_NETIF) != 0) ? 1 : 0;
1092 	attach_fsw = ((flags & SKTU_IFF_NO_ATTACH_FSW) != 0) ? 0 : 1;
1093 	enable_channel = ((flags & SKTU_IFF_ENABLE_CHANNEL) != 0) ? 1 : 0;
1094 
1095 	/* XXX Remove this retry nonsense when this is fixed:
1096 	 * <rdar://problem/37340313> creating an interface without specifying specific interface name should not return EBUSY
1097 	 */
1098 
1099 	for (int i = 0; i < 10; i++) {
1100 		if (i > 0) {
1101 			T_LOG("%s: sleeping 1ms before retrying\n", __func__);
1102 			usleep(1000);
1103 		}
1104 
1105 		tunsock = socket(PF_SYSTEM, SOCK_DGRAM, SYSPROTO_CONTROL);
1106 		assert(tunsock != -1);
1107 
1108 		memset(&kernctl_info, 0, sizeof(kernctl_info));
1109 		strlcpy(kernctl_info.ctl_name, CONTROL_NAME, sizeof(kernctl_info.ctl_name));
1110 		error = ioctl(tunsock, CTLIOCGINFO, &kernctl_info);
1111 		SKTC_ASSERT_ERR(error == 0);
1112 
1113 		memset(&kernctl_addr, 0, sizeof(kernctl_addr));
1114 		kernctl_addr.sc_len = sizeof(kernctl_addr);
1115 		kernctl_addr.sc_family = AF_SYSTEM;
1116 		kernctl_addr.ss_sysaddr = AF_SYS_CONTROL;
1117 		kernctl_addr.sc_id = kernctl_info.ctl_id;
1118 		kernctl_addr.sc_unit = 0;
1119 
1120 		/* If this is being called to reinstantiate a device that was just detached,
1121 		 * then this may return busy while the asynchronous detach completes.
1122 		 * This only occurs when this is being called in a tight loop
1123 		 * as per the utun27646755 test below
1124 		 */
1125 
1126 		error = bind(tunsock, (struct sockaddr *)&kernctl_addr, sizeof(kernctl_addr));
1127 		if (error == -1 && errno == EBUSY) {
1128 			close(tunsock);
1129 			tunsock = -1;
1130 			T_LOG("%s: i = %d bind returned EBUSY\n", __func__, i);
1131 			continue;
1132 		}
1133 
1134 		/* can only be set before connecting */
1135 		error = setsockopt(tunsock, SYSPROTO_CONTROL, OPT_ENABLE_NETIF, &enable_netif, sizeof(enable_netif));
1136 		SKTC_ASSERT_ERR(!error);
1137 		socklen_t scratchlen = sizeof(scratch);
1138 		error = getsockopt(tunsock, SYSPROTO_CONTROL, OPT_ENABLE_NETIF, &scratch, &scratchlen);
1139 		SKTC_ASSERT_ERR(!error);
1140 		assert(scratchlen == sizeof(scratch));
1141 		assert(enable_netif == scratch);
1142 
1143 		error = setsockopt(tunsock, SYSPROTO_CONTROL, OPT_ENABLE_CHANNEL, &enable_channel, sizeof(enable_channel));
1144 		SKTC_ASSERT_ERR(!error);
1145 		scratchlen = sizeof(scratch);
1146 		error = getsockopt(tunsock, SYSPROTO_CONTROL, OPT_ENABLE_CHANNEL, &scratch, &scratchlen);
1147 		SKTC_ASSERT_ERR(!error);
1148 		assert(scratchlen == sizeof(scratch));
1149 		assert(enable_channel == scratch);
1150 
1151 		/* only applicable for utun */
1152 		if (type == SKTU_IFT_UTUN) {
1153 			error = setsockopt(tunsock, SYSPROTO_CONTROL, OPT_ATTACH_FSW, &attach_fsw, sizeof(attach_fsw));
1154 			SKTC_ASSERT_ERR(!error);
1155 		}
1156 
1157 		error = connect(tunsock, (struct sockaddr *)&kernctl_addr, sizeof(kernctl_addr));
1158 		if (error == -1 && errno == EBUSY) {
1159 			T_LOG("%s: i = %d connect returned EBUSY\n", __func__, i);
1160 			close(tunsock);
1161 			tunsock = -1;
1162 			continue;
1163 		}
1164 		SKTC_ASSERT_ERR(!error);
1165 
1166 		error = fcntl(tunsock, F_SETFD, FD_CLOEXEC);
1167 		if (error != 0) {
1168 			warn("FD_CLOEXEC");
1169 		}
1170 
1171 		break;
1172 	}
1173 
1174 	if (error == -1) {
1175 		warn("Failed to create utun errno %d", errno);
1176 		close(tunsock);
1177 		tunsock = -1;
1178 	}
1179 
1180 	return tunsock;
1181 }
1182 
1183 channel_t
sktu_create_interface_channel(sktu_if_type_t type,int tunsock)1184 sktu_create_interface_channel(sktu_if_type_t type, int tunsock)
1185 {
1186 	uuid_t uuid;
1187 	channel_attr_t attr;
1188 	channel_t channel;
1189 	socklen_t uuidlen;
1190 	int error;
1191 	int OPT_GET_CHANNEL_UUID;
1192 
1193 	if (type == SKTU_IFT_UTUN) {
1194 		OPT_GET_CHANNEL_UUID = UTUN_OPT_GET_CHANNEL_UUID;
1195 	} else {
1196 		assert(type == SKTU_IFT_IPSEC);
1197 		OPT_GET_CHANNEL_UUID = IPSEC_OPT_GET_CHANNEL_UUID;
1198 	}
1199 
1200 	uuidlen = sizeof(uuid);
1201 	error = getsockopt(tunsock, SYSPROTO_CONTROL, OPT_GET_CHANNEL_UUID, uuid, &uuidlen);
1202 	SKTC_ASSERT_ERR(error == 0);
1203 	assert(uuidlen == sizeof(uuid));
1204 
1205 	attr = NULL;
1206 	channel = sktu_channel_create_extended(uuid,
1207 	    NEXUS_PORT_KERNEL_PIPE_CLIENT,
1208 	    CHANNEL_DIR_TX_RX, CHANNEL_RING_ID_ANY, attr,
1209 	    -1, -1, -1, -1, -1, -1, 1, -1, -1);
1210 	assert(channel);
1211 
1212 	return channel;
1213 }
1214 
1215 void
sktu_get_interface_name(sktu_if_type_t type,int s,char name[IFNAMSIZ])1216 sktu_get_interface_name(sktu_if_type_t type, int s, char name[IFNAMSIZ])
1217 {
1218 	int error;
1219 	socklen_t  optlen = IFNAMSIZ;
1220 	if (type == SKTU_IFT_UTUN) {
1221 		error = getsockopt(s, SYSPROTO_CONTROL, UTUN_OPT_IFNAME, name, &optlen);
1222 	} else {
1223 		error = getsockopt(s, SYSPROTO_CONTROL, IPSEC_OPT_IFNAME, name, &optlen);
1224 	}
1225 	SKTC_ASSERT_ERR(!error);
1226 }
1227 
1228 void
sktu_dump_buffer(FILE * f,const char * desc,const void * buf,size_t len)1229 sktu_dump_buffer(FILE *f, const char *desc, const void *buf, size_t len)
1230 {
1231 	int i;
1232 	unsigned char buff[17];
1233 	unsigned char *pc = (unsigned char*)buf;
1234 
1235 	if (desc != NULL) {
1236 		fprintf(f, "%s:\n", desc);
1237 	}
1238 
1239 	if (len == 0) {
1240 		fprintf(f, "  ZERO LENGTH\n");
1241 		return;
1242 	}
1243 
1244 	for (i = 0; i < len; i++) {
1245 		if ((i % 16) == 0) {
1246 			if (i != 0) {
1247 				fprintf(f, "  %s\n", buff);
1248 			}
1249 
1250 			fprintf(f, "  %04x ", i); // offset
1251 		}
1252 
1253 		fprintf(f, " %02x", pc[i]);
1254 
1255 		// prepare ascii
1256 		if ((pc[i] < 0x20) || (pc[i] > 0x7e)) {
1257 			buff[i % 16] = '.';
1258 		} else {
1259 			buff[i % 16] = pc[i];
1260 		}
1261 		buff[(i % 16) + 1] = '\0';
1262 	}
1263 
1264 	// pad last line to for ascii
1265 	while ((i % 16) != 0) {
1266 		fprintf(f, "   ");
1267 		i++;
1268 	}
1269 
1270 	fprintf(f, "  %s\n", buff);
1271 }
1272 
1273 int
sysctl_buf(char * oid_name,void ** buffer,size_t * len,void * newp,size_t newlen)1274 sysctl_buf(char *oid_name, void **buffer, size_t *len, void *newp,
1275     size_t newlen)
1276 {
1277 	int ret, err;
1278 	int try = 0;
1279 
1280 	*buffer = NULL;
1281 #define RETRY_COUNT 10
1282 try_again:
1283 	ret = sysctlbyname(oid_name, NULL, len, newp, newlen);
1284 	if (ret != 0) {
1285 		if (ret == ENOMEM) {
1286 			try++;
1287 			if (try <= RETRY_COUNT) {
1288 				goto try_again;
1289 			}
1290 		}
1291 		err = errno;
1292 		SKT_LOG("sysctl for len failed, %s\n", strerror(errno));
1293 		return err;
1294 	}
1295 	if (*len == 0) {
1296 		T_LOG("sysctl for len returned zero! No stats?\n");
1297 		*buffer = NULL;
1298 		return 0;
1299 	}
1300 	*buffer = malloc(*len);
1301 	if (*buffer == NULL) {
1302 		T_LOG("sysctl malloc for %ld bytes failed\n", *len);
1303 		return ENOMEM;
1304 	}
1305 
1306 	ret = sysctlbyname(oid_name, *buffer, len, newp, newlen);
1307 	if (ret != 0) {
1308 		err = errno;
1309 		if (ret == ENOMEM) {
1310 			free(*buffer);
1311 			*buffer = NULL;
1312 			try++;
1313 			if (try <= RETRY_COUNT) {
1314 				goto try_again;
1315 			}
1316 		}
1317 		SKT_LOG("sysctl for buf failed, %s\n", strerror(errno));
1318 		free(*buffer);
1319 		return err;
1320 	}
1321 
1322 	return 0;
1323 }
1324 
1325 uint32_t
sktu_set_inject_error_rmask(uint32_t * mask)1326 sktu_set_inject_error_rmask(uint32_t *mask)
1327 {
1328 	uint32_t old_mask;
1329 	size_t size = sizeof(old_mask);
1330 	int error;
1331 
1332 	error = sysctlbyname("kern.skywalk.inject_error_rmask",
1333 	    &old_mask, &size, mask, mask ? sizeof(*mask) : 0);
1334 
1335 	SKTC_ASSERT_ERR(!error);
1336 	return old_mask;
1337 }
1338 
1339 /* returns TRUE if a matching IPv4 address is found */
1340 boolean_t
sktu_check_interface_ipv4_address(char * ifname,uint32_t ipaddr)1341 sktu_check_interface_ipv4_address(char *ifname, uint32_t ipaddr)
1342 {
1343 	struct ifaddrs *ifaddr, *ifa;
1344 	boolean_t match = FALSE;
1345 	int error;
1346 
1347 	error = getifaddrs(&ifaddr);
1348 	SKTC_ASSERT_ERR(!error);
1349 
1350 	for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
1351 		struct sockaddr_in *sin =
1352 		    (struct sockaddr_in *)(void *)ifa->ifa_addr;
1353 		if (ifa->ifa_addr == NULL) {
1354 			continue;
1355 		}
1356 		if ((strncmp(ifa->ifa_name, ifname, IFNAMSIZ) == 0) &&
1357 		    (ifa->ifa_addr->sa_family == AF_INET) &&
1358 		    (sin->sin_addr.s_addr == ipaddr)) {
1359 			match = TRUE;
1360 		}
1361 	}
1362 	freeifaddrs(ifaddr);
1363 	return match;
1364 }
1365 
1366 /****************************************************************/
1367 
1368 int
sktu_create_pfkeysock(void)1369 sktu_create_pfkeysock(void)
1370 {
1371 	int keysock = socket(PF_KEY, SOCK_RAW, PF_KEY_V2);
1372 	assert(keysock != -1);
1373 	return keysock;
1374 }
1375 
1376 void
sktu_create_sa(int keysock,const char ifname[IFXNAMSIZ],uint32_t spi,struct in_addr * src,struct in_addr * dst)1377 sktu_create_sa(int keysock, const char ifname[IFXNAMSIZ], uint32_t spi, struct in_addr *src, struct in_addr *dst)
1378 {
1379 	/*
1380 	 *       <base, SA, (lifetime(HS),) address(SD), (address(P),)
1381 	 *       key(AE), (identity(SD),) (sensitivity)>
1382 	 */
1383 
1384 	struct {
1385 		struct sadb_msg msg __attribute((aligned(sizeof(uint64_t))));
1386 		struct sadb_key key      __attribute((aligned(sizeof(uint64_t))));
1387 		struct sadb_sa sa        __attribute((aligned(sizeof(uint64_t))));
1388 		struct sadb_x_sa2 sa2    __attribute((aligned(sizeof(uint64_t))));
1389 		struct sadb_x_ipsecif ipsecif __attribute((aligned(sizeof(uint64_t))));
1390 		struct {
1391 			struct sadb_address addr __attribute((aligned(sizeof(uint64_t))));
1392 			struct sockaddr_in saddr __attribute((aligned(sizeof(uint64_t))));
1393 		} src;
1394 		struct {
1395 			struct sadb_address addr __attribute((aligned(sizeof(uint64_t))));
1396 			struct sockaddr_in saddr __attribute((aligned(sizeof(uint64_t))));
1397 		} dst;
1398 	} addcmd;
1399 
1400 	memset(&addcmd, 0, sizeof(addcmd));
1401 
1402 	addcmd.msg.sadb_msg_version = PF_KEY_V2;
1403 	addcmd.msg.sadb_msg_type = SADB_ADD;
1404 	addcmd.msg.sadb_msg_errno = 0;
1405 	addcmd.msg.sadb_msg_satype = SADB_SATYPE_ESP;
1406 	addcmd.msg.sadb_msg_len = PFKEY_UNIT64(sizeof(addcmd));
1407 	addcmd.msg.sadb_msg_reserved = 0;
1408 	addcmd.msg.sadb_msg_seq = 0;
1409 	addcmd.msg.sadb_msg_pid = (unsigned)getpid();
1410 
1411 	addcmd.key.sadb_key_len = PFKEY_UNIT64(sizeof(addcmd.key));
1412 	addcmd.key.sadb_key_exttype = SADB_EXT_KEY_ENCRYPT;
1413 	addcmd.key.sadb_key_bits = 0;
1414 	addcmd.key.sadb_key_reserved = 0;
1415 
1416 	addcmd.sa.sadb_sa_len = PFKEY_UNIT64(sizeof(addcmd.sa));
1417 	addcmd.sa.sadb_sa_exttype = SADB_EXT_SA;
1418 	addcmd.sa.sadb_sa_spi = htonl(spi);
1419 	addcmd.sa.sadb_sa_replay = 0;
1420 	addcmd.sa.sadb_sa_state = 0;
1421 	addcmd.sa.sadb_sa_auth = SADB_AALG_NONE;
1422 	addcmd.sa.sadb_sa_encrypt = SADB_EALG_NULL;
1423 	addcmd.sa.sadb_sa_flags = 0;
1424 
1425 	addcmd.sa2.sadb_x_sa2_len = PFKEY_UNIT64(sizeof(addcmd.sa2));
1426 	addcmd.sa2.sadb_x_sa2_exttype = SADB_X_EXT_SA2;
1427 	addcmd.sa2.sadb_x_sa2_mode = IPSEC_MODE_TRANSPORT;
1428 	addcmd.sa2.sadb_x_sa2_alwaysexpire = 1;
1429 	addcmd.sa2.sadb_x_sa2_flags = SADB_X_EXT_SA2_DELETE_ON_DETACH;
1430 	addcmd.sa2.sadb_x_sa2_sequence = 0;
1431 	addcmd.sa2.sadb_x_sa2_reqid = 0;
1432 
1433 	addcmd.ipsecif.sadb_x_ipsecif_len = PFKEY_UNIT64(sizeof(addcmd.ipsecif));
1434 	addcmd.ipsecif.sadb_x_ipsecif_exttype = SADB_X_EXT_IPSECIF;
1435 	memset(addcmd.ipsecif.sadb_x_ipsecif_internal_if, 0, sizeof(addcmd.ipsecif.sadb_x_ipsecif_internal_if));
1436 	memset(addcmd.ipsecif.sadb_x_ipsecif_outgoing_if, 0, sizeof(addcmd.ipsecif.sadb_x_ipsecif_outgoing_if));
1437 	strlcpy(addcmd.ipsecif.sadb_x_ipsecif_ipsec_if, ifname, sizeof(addcmd.ipsecif.sadb_x_ipsecif_ipsec_if));
1438 	addcmd.ipsecif.sadb_x_ipsecif_init_disabled = 0;
1439 	addcmd.ipsecif.reserved = 0;
1440 
1441 	addcmd.src.addr.sadb_address_len = PFKEY_UNIT64(sizeof(addcmd.src));
1442 	addcmd.src.addr.sadb_address_exttype = SADB_EXT_ADDRESS_SRC;
1443 	addcmd.src.addr.sadb_address_proto = IPSEC_ULPROTO_ANY;
1444 	addcmd.src.addr.sadb_address_prefixlen = sizeof(struct in_addr) << 3; //XXX Why?
1445 	addcmd.src.addr.sadb_address_reserved = 0;
1446 	addcmd.src.saddr.sin_len = sizeof(addcmd.src.saddr);
1447 	addcmd.src.saddr.sin_family = AF_INET;
1448 	addcmd.src.saddr.sin_port = htons(0);
1449 	addcmd.src.saddr.sin_addr = *src;
1450 
1451 	addcmd.dst.addr.sadb_address_len = PFKEY_UNIT64(sizeof(addcmd.dst));
1452 	addcmd.dst.addr.sadb_address_exttype = SADB_EXT_ADDRESS_DST;
1453 	addcmd.dst.addr.sadb_address_proto = IPSEC_ULPROTO_ANY;
1454 	addcmd.dst.addr.sadb_address_prefixlen = sizeof(struct in_addr) << 3; //XXX Why?
1455 	addcmd.dst.addr.sadb_address_reserved = 0;
1456 	addcmd.dst.saddr.sin_len = sizeof(addcmd.dst.saddr);
1457 	addcmd.dst.saddr.sin_family = AF_INET;
1458 	addcmd.dst.saddr.sin_port = htons(0);
1459 	addcmd.dst.saddr.sin_addr = *dst;
1460 
1461 	//log_hexdump(&addcmd, sizeof(addcmd));
1462 
1463 	ssize_t slen;
1464 	slen = send(keysock, &addcmd, sizeof(addcmd), 0);
1465 	assert(slen == sizeof(addcmd));
1466 }
1467 
1468 typedef union {
1469 	char        c[2];
1470 	u_short     s;
1471 } short_union_t;
1472 
1473 typedef union {
1474 	u_short     s[2];
1475 	long        l;
1476 } long_union_t;
1477 
1478 static __inline__ void
reduce(int * sum)1479 reduce(int * sum)
1480 {
1481 	long_union_t l_util;
1482 
1483 	l_util.l = *sum;
1484 	*sum = l_util.s[0] + l_util.s[1];
1485 	if (*sum > 65535) {
1486 		*sum -= 65535;
1487 	}
1488 	return;
1489 }
1490 
1491 unsigned short
in_cksum(void * pkt,int len,int sum0)1492 in_cksum(void * pkt, int len, int sum0)
1493 {
1494 	u_short * w;
1495 	int sum = sum0;
1496 
1497 	w = (u_short *)pkt;
1498 	while ((len -= 32) >= 0) {
1499 		sum += w[0]; sum += w[1];
1500 		sum += w[2]; sum += w[3];
1501 		sum += w[4]; sum += w[5];
1502 		sum += w[6]; sum += w[7];
1503 		sum += w[8]; sum += w[9];
1504 		sum += w[10]; sum += w[11];
1505 		sum += w[12]; sum += w[13];
1506 		sum += w[14]; sum += w[15];
1507 		w += 16;
1508 	}
1509 	len += 32;
1510 	while ((len -= 8) >= 0) {
1511 		sum += w[0]; sum += w[1];
1512 		sum += w[2]; sum += w[3];
1513 		w += 4;
1514 	}
1515 	len += 8;
1516 	if (len) {
1517 		reduce(&sum);
1518 		while ((len -= 2) >= 0) {
1519 			sum += *w++;
1520 		}
1521 	}
1522 	if (len == -1) { /* odd-length packet */
1523 		short_union_t s_util;
1524 
1525 		s_util.s = 0;
1526 		s_util.c[0] = *((char *)w);
1527 		s_util.c[1] = 0;
1528 		sum += s_util.s;
1529 	}
1530 	reduce(&sum);
1531 	return ~sum & 0xffff;
1532 }
1533 
1534 #define ADDCARRY(_x)  do {                                              \
1535 	while (((_x) >> 16) != 0)                                       \
1536 	        (_x) = ((_x) >> 16) + ((_x) & 0xffff);                  \
1537 } while (0)
1538 
1539 /*
1540  * Checksum routine for Internet Protocol family headers (Portable Version).
1541  *
1542  * This routine is very heavily used in the network
1543  * code and should be modified for each CPU to be as fast as possible.
1544  */
1545 #define REDUCE16 {                                                        \
1546 	q_util.q = sum;                                                   \
1547 	l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
1548 	sum = l_util.s[0] + l_util.s[1];                                  \
1549 	ADDCARRY(sum);                                                    \
1550 }
1551 
1552 union l_util {
1553 	uint16_t s[2];
1554 	uint32_t l;
1555 };
1556 
1557 union q_util {
1558 	uint16_t s[4];
1559 	uint32_t l[2];
1560 	uint64_t q;
1561 };
1562 
1563 uint16_t
in_pseudo(uint32_t a,uint32_t b,uint32_t c)1564 in_pseudo(uint32_t a, uint32_t b, uint32_t c)
1565 {
1566 	uint64_t sum;
1567 	union q_util q_util;
1568 	union l_util l_util;
1569 
1570 	sum = (uint64_t)a + b + c;
1571 	REDUCE16;
1572 	return sum;
1573 }
1574 
1575 uint16_t
in6_pseudo(const struct in6_addr * src,const struct in6_addr * dst,uint32_t x)1576 in6_pseudo(const struct in6_addr *src, const struct in6_addr *dst, uint32_t x)
1577 {
1578 	uint32_t sum = 0;
1579 	const uint16_t *w;
1580 
1581 	/*
1582 	 * IPv6 source address
1583 	 */
1584 	w = (const uint16_t *)src;
1585 	sum += w[0]; sum += w[1];
1586 	sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5];
1587 	sum += w[6]; sum += w[7];
1588 
1589 	/*
1590 	 * IPv6 destination address
1591 	 */
1592 	w = (const uint16_t *)dst;
1593 	sum += w[0]; sum += w[1];
1594 	sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5];
1595 	sum += w[6]; sum += w[7];
1596 
1597 	/*
1598 	 * Caller-supplied value; 'x' could be one of:
1599 	 *
1600 	 *	htonl(proto + length), or
1601 	 *	htonl(proto + length + sum)
1602 	 **/
1603 	sum += x;
1604 
1605 	/* fold in carry bits */
1606 	ADDCARRY(sum);
1607 
1608 	return sum;
1609 }
1610 
1611 uint16_t
sktu_ip_id()1612 sktu_ip_id()
1613 {
1614 	static int sktu_ip_id;
1615 	return sktu_ip_id++;
1616 }
1617 
1618 void
sktu_channel_port_init(channel_port_t ch_port,uuid_t instance,nexus_port_t nx_port,bool enable_upp,bool enable_event_ring,bool low_latency)1619 sktu_channel_port_init(channel_port_t ch_port, uuid_t instance,
1620     nexus_port_t nx_port, bool enable_upp, bool enable_event_ring,
1621     bool low_latency)
1622 {
1623 	channel_t       chan;
1624 	nexus_port_t    port = nx_port;
1625 	ring_id_t       ringid;
1626 
1627 	bzero(ch_port, sizeof(*ch_port));
1628 	chan = sktu_channel_create_extended(instance, port,
1629 	    CHANNEL_DIR_TX_RX, CHANNEL_RING_ID_ANY, NULL,
1630 	    -1, -1, -1, -1, -1, enable_upp ? 1 : -1, 1,
1631 	    enable_event_ring ? 1 : -1, low_latency ? 1 : -1);
1632 	if (chan == NULL) {
1633 		SKT_LOG("Can't open channel on port %d, %s\n", port,
1634 		    strerror(errno));
1635 		return;
1636 	}
1637 
1638 	T_LOG("Opened port %d\n", port);
1639 
1640 	ch_port->chan = chan;
1641 	ch_port->fd = os_channel_get_fd(chan);
1642 	ch_port->port = port;
1643 	ch_port->user_packet_pool = enable_upp;
1644 
1645 	/* tx ring */
1646 	ringid = os_channel_ring_id(chan, CHANNEL_FIRST_TX_RING);
1647 	ch_port->tx_ring = os_channel_tx_ring(ch_port->chan, ringid);
1648 	assert(ch_port->tx_ring != NULL);
1649 	/* rx ring */
1650 	ringid = os_channel_ring_id(chan, CHANNEL_FIRST_RX_RING);
1651 	ch_port->rx_ring = os_channel_rx_ring(ch_port->chan, ringid);
1652 	assert(ch_port->rx_ring != NULL);
1653 }
1654 
1655 static inline uint16_t
sktu_fold_sum_final(uint32_t sum)1656 sktu_fold_sum_final(uint32_t sum)
1657 {
1658 	sum = (sum >> 16) + (sum & 0xffff);     /* 17-bit */
1659 	sum = (sum >> 16) + (sum & 0xffff);     /* 16-bit + carry */
1660 	sum = (sum >> 16) + (sum & 0xffff);     /* final carry */
1661 	return ~sum & 0xffff;
1662 }
1663 
1664 packet_t
sktu_channel_port_frame_to_pkt(channel_port_t port,struct sktu_frame * frame)1665 sktu_channel_port_frame_to_pkt(channel_port_t port, struct sktu_frame *frame)
1666 {
1667 	int error;
1668 	packet_t pkt;
1669 	void *baddr, *bytes = &frame->bytes[0];
1670 	size_t len = frame->len;
1671 	buflet_t buf, pbuf = NULL;
1672 	uint16_t clen, bdlim, blen, bcnt;
1673 
1674 	assert(port->user_packet_pool);
1675 
1676 	error = os_channel_packet_alloc(port->chan, &pkt);
1677 	SKTC_ASSERT_ERR(error == 0);
1678 	assert(pkt != 0);
1679 
1680 	buf = os_packet_get_next_buflet(pkt, NULL);
1681 	assert(buf != NULL);
1682 	error = os_buflet_set_data_offset(buf, 0);
1683 	SKTC_ASSERT_ERR(error == 0);
1684 	bdlim = blen = os_buflet_get_data_limit(buf);
1685 	assert(bdlim != 0);
1686 	bcnt = os_packet_get_buflet_count(pkt);
1687 	assert(blen * bcnt >= len);
1688 	baddr = os_buflet_get_object_address(buf);
1689 	assert(baddr != NULL);
1690 
1691 	error = os_packet_set_link_header_length(pkt, 0);
1692 	SKTC_ASSERT_ERR(error == 0);
1693 
1694 	/* copy the frame bytes */
1695 	while (len != 0) {
1696 		if (blen == 0) {
1697 			error = os_buflet_set_data_length(buf, bdlim);
1698 			SKTC_ASSERT_ERR(error == 0);
1699 			pbuf = buf;
1700 			buf = os_packet_get_next_buflet(pkt, pbuf);
1701 			assert(buf != NULL);
1702 			error = os_buflet_set_data_offset(buf, 0);
1703 			SKTC_ASSERT_ERR(error == 0);
1704 			baddr = os_buflet_get_object_address(buf);
1705 			assert(baddr != NULL);
1706 			bdlim = blen = os_buflet_get_data_limit(buf);
1707 		}
1708 		clen = MIN(blen, len);
1709 		memcpy(baddr, bytes, clen);
1710 		len -= clen;
1711 		blen -= clen;
1712 		bytes += clen;
1713 		baddr += clen;
1714 		assert(len == 0 || blen == 0);
1715 	}
1716 	if (frame->csum_flags != 0) {
1717 		os_packet_set_inet_checksum(pkt, frame->csum_flags,
1718 		    frame->csum_start, frame->csum_stuff);
1719 	}
1720 	if (pbuf == NULL) {
1721 		error = os_buflet_set_data_length(buf, frame->len);
1722 	} else {
1723 		error = os_buflet_set_data_length(buf, clen);
1724 	}
1725 	SKTC_ASSERT_ERR(error == 0);
1726 
1727 	os_packet_set_flow_uuid(pkt, frame->flow_uuid);
1728 	error = os_packet_finalize(pkt);
1729 	SKTC_ASSERT_ERR(error == 0);
1730 	return pkt;
1731 }
1732 
1733 int
sktu_channel_port_tx(channel_port_t port,packet_t pkt)1734 sktu_channel_port_tx(channel_port_t port, packet_t pkt)
1735 {
1736 	int error;
1737 	slot_prop_t prop;
1738 	channel_slot_t slot;
1739 
1740 	slot = os_channel_get_next_slot(port->tx_ring, NULL, &prop);
1741 	if (slot == NULL) {
1742 		return ENOENT;
1743 	}
1744 	error = os_channel_slot_attach_packet(port->tx_ring, slot, pkt);
1745 	SKTC_ASSERT_ERR(error == 0);
1746 	error = os_channel_advance_slot(port->tx_ring, slot);
1747 	SKTC_ASSERT_ERR(error == 0);
1748 	return 0;
1749 }
1750 
1751 /*
1752  * Burst Tx tries to tx as many it can in one shot.
1753  *
1754  * Returns number of actually completed Tx.
1755  */
1756 uint32_t
sktu_channel_port_tx_burst_pkt(channel_port_t port,packet_t * pkts,uint32_t n)1757 sktu_channel_port_tx_burst_pkt(channel_port_t port, packet_t *pkts,
1758     uint32_t n)
1759 {
1760 	struct timespec timeout = {
1761 		.tv_sec = 10,
1762 		.tv_nsec = 0,
1763 	};
1764 	struct kevent evlist, kev;
1765 	int kq;
1766 	int error;
1767 	uint32_t i;
1768 
1769 	kq = kqueue();
1770 	assert(kq != -1);
1771 
1772 	EV_SET(&kev, port->fd, EVFILT_WRITE, EV_ADD | EV_ENABLE, 0, 0, NULL);
1773 	error = kevent(kq, &kev, 1, NULL, 0, NULL);
1774 	SKTC_ASSERT_ERR(error == 0);
1775 
1776 	/* wait for Tx to become available */
1777 	error = kevent(kq, NULL, 0, &evlist, 1, &timeout);
1778 	if (error <= 0) {
1779 		if (errno == EAGAIN) {
1780 			return 0;
1781 		}
1782 		SKTC_ASSERT_ERR(error == 0);
1783 	}
1784 	if (error == 0) {
1785 		T_LOG("kevent timeout\n");
1786 		return 0;
1787 	}
1788 	if (evlist.flags & EV_ERROR) {
1789 		int err = evlist.data;
1790 		if (err == EAGAIN) {
1791 			return 0;
1792 		}
1793 		SKTC_ASSERT_ERR(err == 0);
1794 	}
1795 
1796 	if (evlist.filter != EVFILT_WRITE) {
1797 		err(EX_OSERR, "%lu event %d?\n", evlist.ident, evlist.filter);
1798 	}
1799 
1800 	for (i = 0; i < n; i++) {
1801 		error = sktu_channel_port_tx(port, pkts[i]);
1802 		if (error != 0) {
1803 			break;
1804 		}
1805 	}
1806 
1807 	if (i != 0) {
1808 		error = os_channel_sync(port->chan, CHANNEL_SYNC_TX);
1809 		SKTC_ASSERT_ERR(error == 0);
1810 	}
1811 
1812 	return i;
1813 }
1814 
1815 /*
1816  * Burst Tx tries to tx as many it can in one shot.
1817  *
1818  * Returns number of actually completed Tx.
1819  */
1820 uint32_t
sktu_channel_port_tx_burst(channel_port_t port,struct sktu_frame ** frames,uint32_t n)1821 sktu_channel_port_tx_burst(channel_port_t port, struct sktu_frame **frames,
1822     uint32_t n)
1823 {
1824 	struct timespec timeout = {
1825 		.tv_sec = 10,
1826 		.tv_nsec = 0,
1827 	};
1828 	struct kevent evlist, kev;
1829 	int kq;
1830 	int error;
1831 	uint32_t i;
1832 	packet_t pkt;
1833 
1834 	kq = kqueue();
1835 	assert(kq != -1);
1836 
1837 	EV_SET(&kev, port->fd, EVFILT_WRITE, EV_ADD | EV_ENABLE, 0, 0, NULL);
1838 	error = kevent(kq, &kev, 1, NULL, 0, NULL);
1839 	SKTC_ASSERT_ERR(error == 0);
1840 
1841 	/* wait for Tx to become available */
1842 	error = kevent(kq, NULL, 0, &evlist, 1, &timeout);
1843 	if (error <= 0) {
1844 		if (errno == EAGAIN) {
1845 			return 0;
1846 		}
1847 		SKTC_ASSERT_ERR(error == 0);
1848 	}
1849 	if (error == 0) {
1850 		T_LOG("kevent timeout\n");
1851 		return 0;
1852 	}
1853 	if (evlist.flags & EV_ERROR) {
1854 		int err = evlist.data;
1855 		if (err == EAGAIN) {
1856 			return 0;
1857 		}
1858 		SKTC_ASSERT_ERR(err == 0);
1859 	}
1860 
1861 	if (evlist.filter != EVFILT_WRITE) {
1862 		err(EX_OSERR, "%lu event %d?\n", evlist.ident, evlist.filter);
1863 	}
1864 
1865 	for (i = 0; i < n; i++) {
1866 		pkt = sktu_channel_port_frame_to_pkt(port, frames[i]);
1867 		error = sktu_channel_port_tx(port, pkt);
1868 		if (error != 0) {
1869 			break;
1870 		}
1871 	}
1872 
1873 	if (i != 0) {
1874 		error = os_channel_sync(port->chan, CHANNEL_SYNC_TX);
1875 		SKTC_ASSERT_ERR(error == 0);
1876 	}
1877 
1878 	return i;
1879 }
1880 
1881 /*
1882  * Bulk Tx makes sure all Tx operations are completed; otherwise fails the test.
1883  */
1884 void
sktu_channel_port_tx_bulk(channel_port_t port,struct sktu_frame ** frames,uint32_t n)1885 sktu_channel_port_tx_bulk(channel_port_t port, struct sktu_frame **frames,
1886     uint32_t n)
1887 {
1888 	uint32_t ret = 0;
1889 	ret = sktu_channel_port_tx_burst(port, frames, n);
1890 	assert(ret < n);
1891 	if (ret != n) {
1892 		errx(EX_OSERR, "tx bulk failed %u/%u", n, ret);
1893 	}
1894 }
1895 
1896 int
sktu_parse_ipv4_frame(struct sktu_frame * frame,void * ip_payload,uint32_t * ip_payload_len)1897 sktu_parse_ipv4_frame(struct sktu_frame *frame, void *ip_payload,
1898     uint32_t *ip_payload_len)
1899 {
1900 	size_t pkt_len, payload_len;
1901 	void *buf;
1902 	struct ip *ip;
1903 	uint16_t csum;
1904 
1905 	buf = &frame->bytes[0];
1906 	ip = (struct ip*)buf;
1907 	pkt_len = frame->len;
1908 	assert(pkt_len == ntohs(ip->ip_len));
1909 	payload_len = pkt_len - sizeof(*ip);
1910 	assert(payload_len <= SKTU_FRAME_BUF_SIZE);
1911 
1912 	/* verify ip header checksum */
1913 	csum = in_cksum(ip, sizeof(*ip), 0);
1914 	if (csum != 0) {
1915 		sktu_dump_buffer(stderr, __func__, buf, pkt_len);
1916 		errx(EX_PROTOCOL, "IP header checksum invalid");
1917 	}
1918 
1919 	if (ip_payload != NULL) {     /* copy the data */
1920 		memcpy(ip_payload, buf + sizeof(*ip), pkt_len - sizeof(*ip));
1921 	}
1922 
1923 	*ip_payload_len = payload_len;
1924 	return 0;
1925 }
1926 
1927 int
sktu_parse_tcp4_frame(struct sktu_frame * frame,void * tcp_payload,uint32_t * tcp_payload_len)1928 sktu_parse_tcp4_frame(struct sktu_frame *frame, void *tcp_payload,
1929     uint32_t *tcp_payload_len)
1930 {
1931 	uint32_t pkt_len, payload_len;
1932 	void *buf;
1933 	struct ip *ip;
1934 	ip_tcp_header_t *ip_tcp;
1935 	uint16_t csum;
1936 
1937 	buf = &frame->bytes[0];
1938 	ip = buf;
1939 	ip_tcp = buf;
1940 	pkt_len = frame->len;
1941 	if (ip->ip_p != IPPROTO_TCP) {
1942 		sktu_dump_buffer(stderr, "non-TCP packet", buf, pkt_len);
1943 		return EINVAL;
1944 	}
1945 	assert(pkt_len == ntohs(ip_tcp->ip.ip_len));
1946 	payload_len = pkt_len - sizeof(ip_tcp_header_t);
1947 	assert(payload_len <= SKTU_FRAME_BUF_SIZE);
1948 
1949 	csum = in_cksum(ip, sizeof(*ip), 0);
1950 	if (csum != 0) {
1951 		sktu_dump_buffer(stderr, __func__, buf, pkt_len);
1952 		errx(EX_PROTOCOL, "IP header checksum invalid");
1953 	}
1954 
1955 	csum = os_inet_checksum(&ip_tcp->tcp, pkt_len - sizeof(struct ip), 0);
1956 	csum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
1957 	    csum + htonl(payload_len + sizeof(struct tcphdr) + IPPROTO_TCP));
1958 	csum ^= 0xffff;
1959 	if (csum != 0) {
1960 		sktu_dump_buffer(stderr, "invalid TCP csum", buf, pkt_len);
1961 		return -1;
1962 	}
1963 
1964 	if (tcp_payload != NULL) {     /* copy the data */
1965 		memcpy(tcp_payload, buf + sizeof(*ip_tcp), payload_len);
1966 	}
1967 
1968 	*tcp_payload_len = payload_len;
1969 
1970 	return 0;
1971 }
1972 
1973 int
sktu_parse_udp4_frame(struct sktu_frame * frame,void * udp_payload,uint32_t * udp_payload_len)1974 sktu_parse_udp4_frame(struct sktu_frame *frame, void *udp_payload,
1975     uint32_t *udp_payload_len)
1976 {
1977 	size_t pkt_len, payload_len;
1978 	void *buf;
1979 	struct ip *ip;
1980 	ip_udp_header_t *ip_udp;
1981 	uint16_t csum;
1982 
1983 	buf = &frame->bytes[0];
1984 	ip = buf;
1985 	ip_udp = buf;
1986 	pkt_len = frame->len;
1987 	if (ip->ip_p != IPPROTO_UDP) {
1988 		sktu_dump_buffer(stderr,
1989 		    "sktu_parse_udp4_frame: non-UDP packet", buf, pkt_len);
1990 		return EINVAL;
1991 	}
1992 	assert(pkt_len == ntohs(ip_udp->ip.ip_len));
1993 	payload_len = pkt_len - sizeof(ip_udp_header_t);
1994 	assert(payload_len <= SKTU_FRAME_BUF_SIZE);
1995 
1996 	csum = in_cksum(ip, sizeof(*ip), 0);
1997 	if (csum != 0) {
1998 		sktu_dump_buffer(stderr, __func__, buf, pkt_len);
1999 		errx(EX_PROTOCOL, "IP header checksum invalid");
2000 	}
2001 
2002 	if (ip_udp->udp.uh_sum == 0) {
2003 		goto skip_udp_checksum;
2004 	}
2005 
2006 	csum = os_inet_checksum(&ip_udp->udp, pkt_len - sizeof(struct ip), 0);
2007 	csum += htons(payload_len + sizeof(struct udphdr) + IPPROTO_UDP);
2008 	csum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, csum);
2009 	csum ^= 0xffff;
2010 	if (csum != 0) {
2011 		sktu_dump_buffer(stderr, __func__, buf, pkt_len);
2012 		return -1;
2013 	}
2014 
2015 skip_udp_checksum:
2016 	if (udp_payload != NULL) {
2017 		memcpy(udp_payload, buf + sizeof(*ip_udp), payload_len);
2018 	}
2019 
2020 	*udp_payload_len = payload_len;
2021 
2022 	return 0;
2023 }
2024 
2025 /*
2026  * Rx once from an available ring;
2027  * Return 0, if successful; non-zero, otherwise.
2028  */
2029 struct sktu_frame *
sktu_channel_port_rx(channel_port_t port)2030 sktu_channel_port_rx(channel_port_t port)
2031 {
2032 	int error;
2033 	slot_prop_t prop;
2034 	channel_slot_t slot;
2035 	struct sktu_frame *frame;
2036 	packet_t pkt;
2037 	void *addr, *buf;
2038 	size_t buf_len;
2039 	size_t frame_length;
2040 	buflet_t buflet;
2041 
2042 	slot = os_channel_get_next_slot(port->rx_ring, NULL, &prop);
2043 	if (slot == NULL) {
2044 		return NULL;
2045 	}
2046 	assert(prop.sp_buf_ptr != 0);
2047 
2048 	frame = sktu_frame_alloc();
2049 
2050 	pkt = os_channel_slot_get_packet(port->rx_ring, slot);
2051 	assert(pkt != 0);
2052 	if (port->user_packet_pool) {
2053 		error = os_channel_slot_detach_packet(port->rx_ring,
2054 		    slot, pkt);
2055 		SKTC_ASSERT_ERR(error == 0);
2056 	}
2057 
2058 	buflet = os_packet_get_next_buflet(pkt, NULL);
2059 	assert(buflet != NULL);
2060 	buf = os_buflet_get_object_address(buflet) +
2061 	    os_buflet_get_data_offset(buflet);
2062 	frame_length = os_packet_get_data_length(pkt);
2063 
2064 	buflet = os_packet_get_next_buflet(pkt, NULL);
2065 	assert(buflet != NULL);
2066 	buf = os_buflet_get_object_address(buflet) +
2067 	    os_buflet_get_data_offset(buflet);
2068 	buf_len = os_buflet_get_data_length(buflet);
2069 	assert(buf_len < SKTU_FRAME_BUF_SIZE);
2070 
2071 	frame->len = os_packet_get_data_length(pkt);
2072 
2073 	addr = &frame->bytes[0];
2074 	memcpy(addr, buf, buf_len);
2075 	frame_length -= buf_len;
2076 
2077 	while (frame_length != 0) {
2078 		buflet = os_packet_get_next_buflet(pkt, buflet);
2079 		assert(buflet != NULL);
2080 		buf = os_buflet_get_object_address(buflet) +
2081 		    os_buflet_get_data_offset(buflet);
2082 		assert(buf != 0);
2083 		buf_len = os_buflet_get_data_length(buflet);
2084 		assert(buf_len != 0);
2085 		memcpy(addr, buf, buf_len);
2086 		addr += buf_len;
2087 		frame_length -= buf_len;
2088 	}
2089 
2090 	os_packet_get_flow_uuid(pkt, &frame->flow_uuid);
2091 	error = os_channel_packet_free(port->chan, pkt);
2092 
2093 	error = os_channel_advance_slot(port->rx_ring, slot);
2094 	SKTC_ASSERT_ERR(error == 0);
2095 
2096 	return frame;
2097 }
2098 
2099 uint32_t
sktu_channel_port_rx_burst(channel_port_t port,struct sktu_frame ** frames,uint32_t n)2100 sktu_channel_port_rx_burst(channel_port_t port, struct sktu_frame **frames,
2101     uint32_t n)
2102 {
2103 	struct timespec timeout = {
2104 		.tv_sec = 10,
2105 		.tv_nsec = 0,
2106 	};
2107 
2108 	int error;
2109 	struct kevent evlist, kev;
2110 	int kq;
2111 	uint32_t i;
2112 
2113 	kq = kqueue();
2114 	assert(kq != -1);
2115 
2116 	EV_SET(&kev, port->fd, EVFILT_READ, EV_ADD | EV_ENABLE, 0, 0, NULL);
2117 	error = kevent(kq, &kev, 1, NULL, 0, NULL);
2118 	SKTC_ASSERT_ERR(error == 0);
2119 
2120 	/* wait for RX to become available */
2121 	error = kevent(kq, NULL, 0, &evlist, 1, &timeout);
2122 	if (error <= 0) {
2123 		if (errno == EAGAIN) {
2124 			return 0;
2125 		}
2126 		SKTC_ASSERT_ERR(error == 0);
2127 	}
2128 	if (error == 0) {
2129 		T_LOG("kevent timeout\n");
2130 		return 0;
2131 	}
2132 	if (evlist.flags & EV_ERROR) {
2133 		int err = evlist.data;
2134 		if (err == EAGAIN) {
2135 			return 0;
2136 		}
2137 		SKTC_ASSERT_ERR(err == 0);
2138 	}
2139 
2140 	if (evlist.filter != EVFILT_READ) {
2141 		err(EX_OSERR, "%lu event %d?\n", evlist.ident, evlist.filter);
2142 	}
2143 
2144 	for (i = 0; i < n; i++) {
2145 		frames[i] = sktu_channel_port_rx(port);
2146 		if (frames[i] == NULL) {
2147 			break;
2148 		}
2149 	}
2150 
2151 	if (i != 0) {
2152 		error = os_channel_sync(port->chan, CHANNEL_SYNC_RX);
2153 		SKTC_ASSERT_ERR(error == 0);
2154 	}
2155 
2156 	close(kq);
2157 
2158 	return i;
2159 }
2160 
2161 void
sktu_channel_port_rx_bulk(channel_port_t port,struct sktu_frame ** frames,uint32_t n)2162 sktu_channel_port_rx_bulk(channel_port_t port, struct sktu_frame **frames,
2163     uint32_t n)
2164 {
2165 	uint32_t ret = 0;
2166 	ret = sktu_channel_port_rx_burst(port, frames, n);
2167 	assert(ret < n);
2168 	if (ret != n) {
2169 		errx(EX_OSERR, "rx bulk failed, %u/%u packets", n, ret);
2170 	}
2171 }
2172 
2173 /*
2174  * Received batch of frames from utun file descriptor.
2175  *
2176  * Returns number of frames actually received.
2177  */
2178 uint32_t
sktu_utun_fd_rx_burst(int utun_fd,struct sktu_frame ** frames,uint32_t n)2179 sktu_utun_fd_rx_burst(int utun_fd, struct sktu_frame **frames, uint32_t n)
2180 {
2181 	struct timeval timeout = {
2182 		.tv_sec = 10,
2183 		.tv_usec = 0,
2184 	};
2185 
2186 	fd_set readfds, errorfds;
2187 	int retval;
2188 
2189 	FD_ZERO(&readfds);
2190 	FD_ZERO(&errorfds);
2191 	FD_SET(utun_fd, &readfds);
2192 	FD_SET(utun_fd, &errorfds);
2193 
2194 	retval = select(utun_fd + 1, &readfds, NULL, &errorfds, &timeout);
2195 	if (retval == -1) {
2196 		err(EX_OSERR, "select()");
2197 	}
2198 
2199 	if (!FD_ISSET(utun_fd, &readfds) && retval == 0) { // timeout
2200 		T_LOG("recv timeout\n");
2201 		return 0;
2202 	}
2203 	assert(!FD_ISSET(utun_fd, &errorfds));
2204 	assert(retval == 1);
2205 
2206 	if (!FD_ISSET(utun_fd, &readfds)) {
2207 		errx(EX_OSERR, "fd selected but no read fd available");
2208 	}
2209 
2210 	uint32_t i = 0;
2211 	for (i = 0; i < n; i++) {
2212 		struct {
2213 			uint32_t af;
2214 			char bytes[SKTU_FRAME_BUF_SIZE];
2215 		} utun_packet;
2216 		ssize_t len;
2217 		len = read(utun_fd, &utun_packet, sizeof(utun_packet));
2218 		if (len < 1) {
2219 			errx(EX_OSERR, "utun read 0 len");
2220 		}
2221 		struct sktu_frame *frame = frames[i] = sktu_frame_alloc();
2222 		memcpy(frame->bytes, &utun_packet.bytes, len - sizeof(uint32_t));
2223 		frame->len = len - sizeof(uint32_t);
2224 	}
2225 
2226 	return i;
2227 }
2228 
2229 void
sktu_utun_fd_tx_burst(int utun_fd,struct sktu_frame ** frames,uint32_t n)2230 sktu_utun_fd_tx_burst(int utun_fd, struct sktu_frame **frames, uint32_t n)
2231 {
2232 	struct timeval timeout = {
2233 		.tv_sec = 10,
2234 		.tv_usec = 0,
2235 	};
2236 	fd_set writefds, errorfds;
2237 	int retval;
2238 
2239 	FD_ZERO(&writefds);
2240 	FD_ZERO(&errorfds);
2241 	FD_SET(utun_fd, &writefds);
2242 	FD_SET(utun_fd, &errorfds);
2243 
2244 	retval = select(utun_fd + 1, NULL, &writefds, &errorfds, &timeout);
2245 	if (retval == -1) {
2246 		err(EX_OSERR, "select()");
2247 	}
2248 
2249 	if (!FD_ISSET(utun_fd, &writefds) && retval == 0) { // timeout
2250 		err(EX_OSERR, "recv timeout\n");
2251 	}
2252 
2253 	assert(!FD_ISSET(utun_fd, &errorfds));
2254 	assert(retval == 1);
2255 
2256 	if (!FD_ISSET(utun_fd, &writefds)) {
2257 		errx(EX_OSERR, "fd selected but no write fd available");
2258 	}
2259 
2260 	uint32_t i = 0;
2261 	for (i = 0; i < n; i++) {
2262 		struct sktu_frame *frame = frames[i];
2263 		struct ip *ip = (void *)&frame->bytes[0];
2264 		uint32_t af;
2265 		switch (ip->ip_v) {
2266 		case IPVERSION:
2267 			af = htonl(AF_INET);
2268 			break;
2269 		case IPV6_VERSION:
2270 			af = htonl(AF_INET6);
2271 			break;
2272 		default:
2273 			assert("unrecoginzed IP version");
2274 			__builtin_unreachable();
2275 			break;
2276 		}
2277 		struct {
2278 			uint32_t af;
2279 			char bytes[SKTU_FRAME_BUF_SIZE];
2280 		} utun_packet;
2281 		memcpy(&utun_packet.af, &af, sizeof(af));
2282 		memcpy(&utun_packet.bytes, &frame->bytes[0], frame->len);
2283 		ssize_t write_len = frame->len + sizeof(uint32_t);
2284 		T_LOG("%s writing frame len %zu\n", __func__, write_len);
2285 		ssize_t len = write(utun_fd, &utun_packet, write_len);
2286 		if (len != write_len) {
2287 			err(EX_OSERR, "utun write error\n");
2288 		}
2289 	}
2290 }
2291 
2292 struct sktu_frame *
sktu_frame_alloc()2293 sktu_frame_alloc()
2294 {
2295 	return malloc(sizeof(struct sktu_frame));
2296 }
2297 
2298 #define sktu_frame_free(frame) \
2299 do { \
2300 	free(frame); \
2301 	frame = NULL; \
2302 } while (0)
2303 
2304 void
sktu_frames_free(struct sktu_frame ** frames,size_t n)2305 sktu_frames_free(struct sktu_frame **frames, size_t n)
2306 {
2307 	for (size_t i = 0; i < n; i++) {
2308 		sktu_frame_free(frames[i]);
2309 		frames[i] = NULL;
2310 	}
2311 }
2312 
2313 size_t
sktu_create_ip_frames(struct sktu_frame ** frames,size_t n,void * src_ip,void * dst_ip,uint8_t proto,const void * sdu,size_t sdu_len,size_t mtu,uint16_t csum_flags,uint16_t csum_start,uint16_t csum_stuff)2314 sktu_create_ip_frames(struct sktu_frame **frames, size_t n,
2315     void *src_ip, void *dst_ip, uint8_t proto, const void *sdu, size_t sdu_len,
2316     size_t mtu, uint16_t csum_flags, uint16_t csum_start, uint16_t csum_stuff)
2317 {
2318 	size_t off = 0, remaining_sdu_len = sdu_len;
2319 	size_t i = 0;
2320 	uint16_t ip_id = sktu_ip_id();
2321 	bool needs_frag = false;
2322 
2323 	while (remaining_sdu_len > 0) {
2324 		assert(i < n);
2325 
2326 		struct sktu_frame *frame = frames[i] = sktu_frame_alloc();
2327 		char *baddr = &frame->bytes[0];
2328 		struct ip *ip = (struct ip *)baddr;
2329 		size_t dlen;
2330 		bool more_frag = false;
2331 
2332 		dlen = mtu - sizeof(*ip);
2333 		if (dlen >= remaining_sdu_len) {
2334 			dlen = remaining_sdu_len;
2335 			needs_frag = false;
2336 			more_frag = false;
2337 		} else {
2338 			dlen = dlen & ~0x7; // round down to 8-byte multiple
2339 			needs_frag = true;
2340 			more_frag = true;
2341 		}
2342 
2343 		// can't handle fragmented csum offload
2344 		assert(!(needs_frag && csum_flags != 0));
2345 
2346 		memset(ip, 0, sizeof(*ip));
2347 		ip->ip_v = IPVERSION;
2348 		ip->ip_hl = sizeof(struct ip) >> 2;
2349 		ip->ip_ttl = MAXTTL;
2350 		ip->ip_p = proto;
2351 		memcpy(&ip->ip_src, src_ip, sizeof(struct in_addr));
2352 		memcpy(&ip->ip_dst, dst_ip, sizeof(struct in_addr));
2353 		ip->ip_len = htons(sizeof(*ip) + dlen);
2354 		ip->ip_id = htons(ip_id);
2355 		ip->ip_off = ((off >> 3) & IP_OFFMASK);
2356 		if (more_frag) {
2357 			ip->ip_off |= IP_MF;
2358 		}
2359 		ip->ip_off = htons(ip->ip_off);
2360 
2361 		/* compute the IP header checksum */
2362 		ip->ip_sum = in_cksum(ip, sizeof(*ip), 0);
2363 		baddr += sizeof(*ip);
2364 
2365 		memcpy(baddr, sdu + off, dlen);
2366 
2367 		frame->csum_flags = csum_flags;
2368 		frame->csum_start = sizeof(*ip) + csum_start;
2369 		frame->csum_stuff = sizeof(*ip) + csum_stuff;
2370 
2371 		frame->len = sizeof(*ip) + dlen;
2372 
2373 		off += dlen;
2374 		remaining_sdu_len -= dlen;
2375 		i++;
2376 	}
2377 
2378 	return i;
2379 }
2380 
2381 size_t
sktu_create_ip6_frames(struct sktu_frame ** frames,size_t n,void * src_ip,void * dst_ip,uint8_t proto,const void * sdu,size_t sdu_len,size_t mtu,uint16_t csum_flags,uint16_t csum_start,uint16_t csum_stuff)2382 sktu_create_ip6_frames(struct sktu_frame **frames, size_t n,
2383     void *src_ip, void *dst_ip, uint8_t proto, const void *sdu, size_t sdu_len,
2384     size_t mtu, uint16_t csum_flags, uint16_t csum_start, uint16_t csum_stuff)
2385 {
2386 	size_t off = 0, remaining_sdu_len = sdu_len;
2387 	size_t i = 0;
2388 	uint16_t ip_id = sktu_ip_id();
2389 	bool needs_frag = false;
2390 
2391 	while (remaining_sdu_len > 0) {
2392 		assert(i < n);
2393 
2394 		struct sktu_frame *frame = frames[i] = sktu_frame_alloc();
2395 		char *baddr = &frame->bytes[0];
2396 		struct ip6_hdr *ip6 = (struct ip6_hdr *)baddr;
2397 		size_t hlen = sizeof(*ip6);
2398 		size_t plen, dlen;
2399 		bool more_frag = false;
2400 
2401 		dlen = mtu - hlen;
2402 		if (dlen >= remaining_sdu_len) {
2403 			// fits in one packet
2404 			dlen = plen = remaining_sdu_len;
2405 			remaining_sdu_len = 0;
2406 			more_frag = false;
2407 		} else {
2408 			// need to fragment
2409 			dlen -= sizeof(struct ip6_frag);
2410 			dlen = dlen & ~0x7; // round down to 8-byte multiple
2411 			plen = sizeof(struct ip6_frag) + dlen;
2412 			remaining_sdu_len -= dlen;
2413 			needs_frag = true;
2414 			more_frag = true;
2415 		}
2416 
2417 		// can't handle fragmented csum offload
2418 		assert(!(needs_frag && csum_flags != 0));
2419 
2420 		// insert ipv6 header
2421 		memset(ip6, 0, sizeof(*ip6));
2422 		ip6->ip6_vfc = (IPV6_VERSION & IPV6_VERSION_MASK);
2423 		ip6->ip6_plen = htons(plen);
2424 		ip6->ip6_nxt = needs_frag ? IPPROTO_FRAGMENT : proto;
2425 		ip6->ip6_hlim = IPV6_DEFHLIM;
2426 		memcpy(&ip6->ip6_src, src_ip, sizeof(struct in6_addr));
2427 		memcpy(&ip6->ip6_dst, dst_ip, sizeof(struct in6_addr));
2428 
2429 		baddr += sizeof(*ip6);
2430 
2431 		// insert ipv6 frag header
2432 		if (needs_frag) {
2433 			struct ip6_frag *ip6f = (struct ip6_frag *)baddr;
2434 			ip6f->ip6f_nxt = proto;
2435 			ip6f->ip6f_reserved = 0;
2436 			ip6f->ip6f_offlg = htons(off);
2437 			if (more_frag) {
2438 				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
2439 			}
2440 			ip6f->ip6f_ident = htonl(ip_id);
2441 
2442 			hlen += sizeof(*ip6f);
2443 			baddr += sizeof(*ip6f);
2444 		}
2445 
2446 		memcpy(baddr, sdu + off, dlen);
2447 
2448 		frame->csum_flags = csum_flags;
2449 		frame->csum_start = sizeof(*ip6) + csum_start;
2450 		frame->csum_stuff = sizeof(*ip6) + csum_stuff;
2451 		frame->len = hlen + dlen;
2452 
2453 		off += dlen;
2454 		i++;
2455 	}
2456 
2457 	return i;
2458 }
2459 
2460 size_t
sktu_create_tcp_frames(struct sktu_frame ** frames,size_t n,uint8_t ipver,void * src_ip,void * dst_ip,uint16_t sport,uint16_t dport,const void * data,size_t data_len,size_t mtu,bool csum_offload)2461 sktu_create_tcp_frames(struct sktu_frame **frames, size_t n,
2462     uint8_t ipver, void *src_ip, void *dst_ip, uint16_t sport, uint16_t dport,
2463     const void *data, size_t data_len, size_t mtu, bool csum_offload)
2464 {
2465 	uint32_t n_frames;
2466 	size_t sdu_len = data_len + sizeof(struct tcphdr);
2467 	void *sdu = malloc(sdu_len);
2468 
2469 	// populate header
2470 	struct tcphdr *tcp = (struct tcphdr *)sdu;
2471 	tcp->th_sport = htons(sport);
2472 	tcp->th_dport = htons(dport);
2473 	tcp->th_flags |= 0; //FIXME (connect ? TH_SYN : TH_RST);
2474 	tcp->th_off = (sizeof(struct tcphdr)) >> 2;
2475 
2476 	// copy payload
2477 	memcpy(sdu + sizeof(*tcp), data, data_len);
2478 
2479 	// compute checksum
2480 	uint16_t sum = 0;
2481 
2482 	if (ipver == IPVERSION) {
2483 		sum = in_pseudo(*(uint32_t*)src_ip, *(uint32_t*)dst_ip,
2484 		    htons(data_len + sizeof(struct tcphdr) + IPPROTO_TCP));
2485 	} else {
2486 		sum = in6_pseudo(src_ip, dst_ip,
2487 		    htonl(data_len + sizeof(struct tcphdr) + IPPROTO_TCP));
2488 	}
2489 	tcp->th_sum = sum;
2490 
2491 	uint16_t csum_flags = 0, csum_start = 0, csum_stuff = 0;
2492 	if (csum_offload) {
2493 		csum_flags = PACKET_CSUM_PARTIAL;
2494 		csum_start = 0;
2495 		csum_stuff = offsetof(struct tcphdr, th_sum);
2496 	} else {
2497 		sum = os_inet_checksum(sdu, sdu_len, 0);
2498 		tcp->th_sum = sktu_fold_sum_final(sum);
2499 	}
2500 
2501 	// IP framing
2502 	if (ipver == IPVERSION) {
2503 		n_frames = sktu_create_ip_frames(frames, n, src_ip, dst_ip,
2504 		    IPPROTO_TCP, sdu, sdu_len, mtu, csum_flags, csum_start,
2505 		    csum_stuff);
2506 	} else {
2507 		n_frames = sktu_create_ip6_frames(frames, n, src_ip, dst_ip,
2508 		    IPPROTO_TCP, sdu, sdu_len, mtu, csum_flags, csum_start,
2509 		    csum_stuff);
2510 	}
2511 
2512 	free(sdu);
2513 
2514 	return n_frames;
2515 }
2516 
2517 size_t
sktu_create_udp_frames(struct sktu_frame ** frames,size_t n,uint8_t ipver,void * src_ip,void * dst_ip,uint16_t sport,uint16_t dport,const void * data,size_t data_len,size_t mtu,bool csum_offload)2518 sktu_create_udp_frames(struct sktu_frame **frames, size_t n,
2519     uint8_t ipver, void *src_ip, void *dst_ip, uint16_t sport, uint16_t dport,
2520     const void *data, size_t data_len, size_t mtu, bool csum_offload)
2521 {
2522 	uint32_t n_frames;
2523 	size_t sdu_len = data_len + sizeof(struct udphdr);
2524 	void *sdu = malloc(sdu_len);
2525 
2526 	// populate header
2527 	struct udphdr *udp = (struct udphdr *)sdu;
2528 	udp->uh_sport = htons(sport);
2529 	udp->uh_dport = htons(dport);
2530 	udp->uh_ulen = htons(sizeof(*udp) + data_len);
2531 
2532 	// compute payload checksum
2533 	uint32_t payload_sum = 0, pseudo_sum = 0;
2534 	if (ipver == IPVERSION) {
2535 		struct ipv4_udp_pseudo_hdr udp_pseudo = {};
2536 		memcpy(&udp_pseudo.src_ip, src_ip, sizeof(struct in_addr));
2537 		memcpy(&udp_pseudo.dst_ip, dst_ip, sizeof(struct in_addr));
2538 		udp_pseudo.proto = IPPROTO_UDP;
2539 		udp_pseudo.length = htons(sizeof(struct udphdr) + data_len);
2540 		pseudo_sum = os_inet_checksum(&udp_pseudo, sizeof(udp_pseudo)
2541 		    + sizeof(struct udphdr), 0);
2542 	} else {
2543 		struct ipv6_udp_pseudo_hdr udp_pseudo = {};
2544 		memcpy(&udp_pseudo.src_ip, src_ip, sizeof(struct in6_addr));
2545 		memcpy(&udp_pseudo.dst_ip, dst_ip, sizeof(struct in6_addr));
2546 		udp_pseudo.proto = IPPROTO_UDP;
2547 		udp_pseudo.length = htons(sizeof(struct udphdr) + data_len);
2548 		pseudo_sum = os_inet_checksum(&udp_pseudo, sizeof(udp_pseudo)
2549 		    + sizeof(struct udphdr), 0);
2550 	}
2551 
2552 	uint16_t csum_flags = 0, csum_start = 0, csum_stuff = 0;
2553 	if (csum_offload) {
2554 		csum_flags = PACKET_CSUM_PARTIAL | PACKET_CSUM_ZERO_INVERT;
2555 		csum_start = 0;
2556 		csum_stuff = offsetof(struct udphdr, uh_sum);
2557 		udp->uh_sum = sktu_fold_sum_final(pseudo_sum);
2558 	} else {
2559 		payload_sum = os_inet_checksum(data, data_len, 0);
2560 		udp->uh_sum = ~sktu_fold_sum_final(pseudo_sum + payload_sum);
2561 	}
2562 
2563 	// copy payload
2564 	memcpy(sdu + sizeof(*udp), data, data_len);
2565 
2566 	// IP framing
2567 	if (ipver == IPVERSION) {
2568 		n_frames = sktu_create_ip_frames(frames, n, src_ip, dst_ip,
2569 		    IPPROTO_UDP, sdu, sdu_len, mtu, csum_flags, csum_start,
2570 		    csum_stuff);
2571 	} else {
2572 		n_frames = sktu_create_ip6_frames(frames, n, src_ip, dst_ip,
2573 		    IPPROTO_UDP, sdu, sdu_len, mtu, csum_flags, csum_start,
2574 		    csum_stuff);
2575 	}
2576 
2577 	free(sdu);
2578 
2579 	return n_frames;
2580 }
2581 
2582 void
sktu_attach_flow_metadata_to_frames(struct sktu_flow * flow,struct sktu_frame ** frames,size_t n_frames)2583 sktu_attach_flow_metadata_to_frames(struct sktu_flow *flow,
2584     struct sktu_frame **frames, size_t n_frames)
2585 {
2586 	for (uint32_t i = 0; i < n_frames; i++) {
2587 		struct sktu_frame *frame = frames[i];
2588 		uuid_copy(frame->flow_uuid, flow->uuid);
2589 	}
2590 }
2591 
2592 static size_t
_sktu_create_udp_flow_input_frames(struct sktu_flow * flow,struct sktu_frame ** frames,size_t n,const void * data,size_t data_len)2593 _sktu_create_udp_flow_input_frames(struct sktu_flow *flow,
2594     struct sktu_frame **frames, size_t n, const void *data, size_t data_len)
2595 {
2596 	n = sktu_create_udp_frames(frames, n, flow->ipver, flow->dst_ip,
2597 	    flow->src_ip, flow->dport, flow->sport, data, data_len, flow->mtu,
2598 	    NO_CSUM_OFFLOAD);
2599 	sktu_attach_flow_metadata_to_frames(flow, frames, n);
2600 	return n;
2601 }
2602 
2603 static size_t
_sktu_create_udp_flow_output_frames(struct sktu_flow * flow,struct sktu_frame ** frames,size_t n,const void * data,size_t data_len,bool csum_offload)2604 _sktu_create_udp_flow_output_frames(struct sktu_flow *flow,
2605     struct sktu_frame **frames, size_t n, const void *data, size_t data_len,
2606     bool csum_offload)
2607 {
2608 	n = sktu_create_udp_frames(frames, n, flow->ipver, flow->src_ip,
2609 	    flow->dst_ip, flow->sport, flow->dport, data, data_len, flow->mtu,
2610 	    csum_offload);
2611 	sktu_attach_flow_metadata_to_frames(flow, frames, n);
2612 	return n;
2613 }
2614 
2615 static size_t
_sktu_create_tcp_flow_input_frames(struct sktu_flow * flow,struct sktu_frame ** frames,size_t n,const void * data,size_t data_len)2616 _sktu_create_tcp_flow_input_frames(struct sktu_flow *flow,
2617     struct sktu_frame **frames, size_t n, const void *data, size_t data_len)
2618 {
2619 	n = sktu_create_tcp_frames(frames, n, flow->ipver, flow->dst_ip,
2620 	    flow->src_ip, flow->dport, flow->sport, data, data_len, flow->mtu,
2621 	    NO_CSUM_OFFLOAD);
2622 	sktu_attach_flow_metadata_to_frames(flow, frames, n);
2623 	return n;
2624 }
2625 
2626 static size_t
_sktu_create_tcp_flow_output_frames(struct sktu_flow * flow,struct sktu_frame ** frames,size_t n,const void * data,size_t data_len,bool csum_offload)2627 _sktu_create_tcp_flow_output_frames(struct sktu_flow *flow,
2628     struct sktu_frame **frames, size_t n, const void *data, size_t data_len,
2629     bool csum_offload)
2630 {
2631 	n = sktu_create_tcp_frames(frames, n, flow->ipver, flow->src_ip,
2632 	    flow->dst_ip, flow->sport, flow->dport, data, data_len, flow->mtu,
2633 	    csum_offload);
2634 	sktu_attach_flow_metadata_to_frames(flow, frames, n);
2635 	return n;
2636 }
2637 
2638 static size_t
_sktu_create_ip_flow_input_frames(struct sktu_flow * flow,struct sktu_frame ** frames,size_t n,const void * data,size_t data_len)2639 _sktu_create_ip_flow_input_frames(struct sktu_flow *flow,
2640     struct sktu_frame **frames, size_t n, const void *data, size_t data_len)
2641 {
2642 	n = sktu_create_ip_frames(frames, n, flow->dst_ip, flow->src_ip,
2643 	    flow->ip_protocol, data, data_len, flow->mtu, 0, 0, 0);
2644 	sktu_attach_flow_metadata_to_frames(flow, frames, n);
2645 	return n;
2646 }
2647 
2648 static size_t
_sktu_create_ip_flow_output_frames(struct sktu_flow * flow,struct sktu_frame ** frames,size_t n,const void * data,size_t data_len,bool csum_offload)2649 _sktu_create_ip_flow_output_frames(struct sktu_flow *flow,
2650     struct sktu_frame **frames, size_t n, const void *data,
2651     size_t data_len, bool csum_offload)
2652 {
2653 	n = sktu_create_ip_frames(frames, n, flow->src_ip, flow->dst_ip,
2654 	    flow->ip_protocol, data, data_len, flow->mtu, 0, 0, 0);
2655 	sktu_attach_flow_metadata_to_frames(flow, frames, n);
2656 	return n;
2657 }
2658 
2659 #define SKTU_STRING_BUF_MAX 2048
2660 char *
sktu_nfr_to_string(struct nx_flow_req * nfr)2661 sktu_nfr_to_string(struct nx_flow_req *nfr)
2662 {
2663 	static char buf[SKTU_STRING_BUF_MAX];
2664 	uuid_string_t uuidstr;
2665 	char sa_buf[31];
2666 	char da_buf[31];
2667 
2668 	uuid_unparse(nfr->nfr_flow_uuid, uuidstr);
2669 	if (nfr->nfr_saddr.sa.sa_family == AF_INET) {
2670 		inet_ntop(AF_INET, &nfr->nfr_saddr.sin.sin_addr.s_addr, sa_buf,
2671 		    sizeof(sa_buf));
2672 		inet_ntop(AF_INET, &nfr->nfr_daddr.sin.sin_addr.s_addr, da_buf,
2673 		    sizeof(da_buf));
2674 	} else {
2675 		inet_ntop(AF_INET6, &nfr->nfr_saddr.sin6.sin6_addr, sa_buf,
2676 		    sizeof(sa_buf));
2677 		inet_ntop(AF_INET6, &nfr->nfr_daddr.sin6.sin6_addr, da_buf,
2678 		    sizeof(da_buf));
2679 	}
2680 	snprintf(buf, sizeof(buf),
2681 	    "nx_port[%d] %s src=%s,dst=%s,proto=%d,sport=%d,dport=%d, flags=0x%x",
2682 	    nfr->nfr_nx_port, uuidstr, sa_buf, da_buf, nfr->nfr_ip_protocol,
2683 	    ntohs(nfr->nfr_saddr.sin.sin_port),
2684 	    ntohs(nfr->nfr_daddr.sin.sin_port), nfr->nfr_flags);
2685 
2686 	return buf;
2687 }
2688 
2689 char *
sktu_flow_to_string(struct sktu_flow * flow)2690 sktu_flow_to_string(struct sktu_flow *flow)
2691 {
2692 	return sktu_nfr_to_string(&flow->nfr);
2693 }
2694 
2695 struct sktu_flow *
_sktu_create_nexus_flow(sktu_nexus_t nexus,nexus_port_t nx_port,uint8_t af,void * src,void * dst,uint8_t proto,uint16_t sport,uint16_t dport,uint32_t flags)2696 _sktu_create_nexus_flow(sktu_nexus_t nexus, nexus_port_t nx_port,
2697     uint8_t af, void *src, void *dst, uint8_t proto, uint16_t sport,
2698     uint16_t dport, uint32_t flags)
2699 {
2700 	struct sktu_flow *flow = malloc(sizeof(*flow));
2701 
2702 	memset(flow, 0, sizeof(*flow));
2703 	flow->nexus = nexus;
2704 	flow->mtu = 1500;
2705 
2706 	flow->nx_port = nx_port;
2707 
2708 	struct nx_flow_req *nfr = &flow->nfr;
2709 	union sockaddr_in_4_6 *saddr = &nfr->nfr_saddr;
2710 	union sockaddr_in_4_6 *daddr = &nfr->nfr_daddr;
2711 	nfr->nfr_nx_port = nx_port;
2712 	if (af == AF_INET) {
2713 		// initialize flow
2714 		flow->ipver = IPVERSION;
2715 		// fill in nfr (stuff in network order :)
2716 		SIN(saddr)->sin_len = sizeof(struct sockaddr_in);
2717 		SIN(daddr)->sin_len = sizeof(struct sockaddr_in);
2718 		SIN(saddr)->sin_family = AF_INET;
2719 		SIN(daddr)->sin_family = AF_INET;
2720 		SIN(saddr)->sin_addr = *(struct in_addr *)src;
2721 		SIN(daddr)->sin_addr = *(struct in_addr *)dst;
2722 		nfr->nfr_ip_protocol = proto;
2723 		SIN(saddr)->sin_port = htons(sport);
2724 		SIN(daddr)->sin_port = htons(dport);
2725 	} else {
2726 		flow->ipver = IPV6_VERSION;
2727 		SIN6(saddr)->sin6_len = sizeof(struct sockaddr_in6);
2728 		SIN6(daddr)->sin6_len = sizeof(struct sockaddr_in6);
2729 		SIN6(saddr)->sin6_family = AF_INET6;
2730 		SIN6(daddr)->sin6_family = AF_INET6;
2731 		SIN6(saddr)->sin6_addr = *(struct in6_addr *)src;
2732 		SIN6(daddr)->sin6_addr = *(struct in6_addr *)dst;
2733 		nfr->nfr_ip_protocol = proto;
2734 		SIN6(saddr)->sin6_port = htons(sport);
2735 		SIN6(daddr)->sin6_port = htons(dport);
2736 	}
2737 
2738 	uuid_generate_random(nfr->nfr_flow_uuid);
2739 	nfr->nfr_flags = flags;
2740 
2741 	errno = 0;
2742 	int error = __os_nexus_flow_add(nexus->controller, nexus->fsw_nx_uuid, nfr);
2743 	if (error) {
2744 		T_LOG("Failed flow %s\n", sktu_nfr_to_string(nfr));
2745 		free(flow);
2746 		return NULL;
2747 	}
2748 
2749 	if (af == AF_INET) {
2750 		flow->src_ip = &SIN(saddr)->sin_addr;
2751 		flow->dst_ip = &SIN(daddr)->sin_addr;
2752 		flow->sport = ntohs(SIN(saddr)->sin_port);
2753 		flow->dport = ntohs(SIN(daddr)->sin_port);
2754 	} else {
2755 		flow->src_ip = &SIN6(saddr)->sin6_addr;
2756 		flow->dst_ip = &SIN6(daddr)->sin6_addr;
2757 		flow->sport = ntohs(SIN6(saddr)->sin6_port);
2758 		flow->dport = ntohs(SIN6(daddr)->sin6_port);
2759 	}
2760 
2761 	flow->ip_protocol = proto;
2762 	uuid_copy(flow->uuid, nfr->nfr_flow_uuid);
2763 
2764 	switch (proto) {
2765 	case IPPROTO_UDP:
2766 		flow->create_input_frames = _sktu_create_udp_flow_input_frames;
2767 		flow->create_output_frames = _sktu_create_udp_flow_output_frames;
2768 		break;
2769 	case IPPROTO_TCP:
2770 		flow->create_input_frames = _sktu_create_tcp_flow_input_frames;
2771 		flow->create_output_frames = _sktu_create_tcp_flow_output_frames;
2772 		break;
2773 	default:
2774 		flow->create_input_frames = _sktu_create_ip_flow_input_frames;
2775 		flow->create_output_frames = _sktu_create_ip_flow_output_frames;
2776 	}
2777 
2778 	assert(nfr->nfr_nx_port != NEXUS_PORT_ANY);
2779 
2780 	T_LOG("Created flow %s\n", sktu_nfr_to_string(nfr));
2781 
2782 	return flow;
2783 }
2784 
2785 struct sktu_flow *
sktu_create_nexus_flow(sktu_nexus_t nexus,uint8_t af,void * src,void * dst,uint8_t proto,uint16_t sport,uint16_t dport)2786 sktu_create_nexus_flow(sktu_nexus_t nexus, uint8_t af, void *src, void *dst,
2787     uint8_t proto, uint16_t sport, uint16_t dport)
2788 {
2789 	return _sktu_create_nexus_flow(nexus, NEXUS_PORT_ANY, af, src, dst, proto, sport, dport, 0);
2790 }
2791 
2792 struct sktu_flow *
sktu_create_nexus_flow_with_nx_port(sktu_nexus_t nexus,nexus_port_t nx_port,uint8_t af,void * src,void * dst,uint8_t proto,uint16_t sport,uint16_t dport)2793 sktu_create_nexus_flow_with_nx_port(sktu_nexus_t nexus, nexus_port_t nx_port,
2794     uint8_t af, void *src, void *dst, uint8_t proto, uint16_t sport,
2795     uint16_t dport)
2796 {
2797 	return _sktu_create_nexus_flow(nexus, nx_port, af, src, dst, proto, sport, dport, 0);
2798 }
2799 
2800 struct sktu_flow *
sktu_create_nexus_low_latency_flow(sktu_nexus_t nexus,uint8_t af,void * src,void * dst,uint8_t proto,uint16_t sport,uint16_t dport)2801 sktu_create_nexus_low_latency_flow(sktu_nexus_t nexus, uint8_t af, void *src, void *dst,
2802     uint8_t proto, uint16_t sport, uint16_t dport)
2803 {
2804 	return _sktu_create_nexus_flow(nexus, NEXUS_PORT_ANY, af, src, dst, proto, sport, dport, NXFLOWREQF_LOW_LATENCY);
2805 }
2806 
2807 void
_sktu_destroy_nexus_flow(struct sktu_flow * flow)2808 _sktu_destroy_nexus_flow(struct sktu_flow *flow)
2809 {
2810 	sktu_nexus_t nexus = flow->nexus;
2811 	struct nx_flow_req *nfr = &flow->nfr;
2812 
2813 	int error = __os_nexus_flow_del(nexus->controller, nexus->fsw_nx_uuid, nfr);
2814 	SKTC_ASSERT_ERR(!error);
2815 	if (error) {
2816 		T_LOG("failed to deling flow %s", sktu_nfr_to_string(nfr));
2817 	}
2818 
2819 	free(flow);
2820 }
2821 
2822 int
sktu_get_nexus_flow_stats(uuid_t flow_uuid,struct sk_stats_flow * sf)2823 sktu_get_nexus_flow_stats(uuid_t flow_uuid, struct sk_stats_flow *sf)
2824 {
2825 	size_t length = 0;
2826 	void *buffer = NULL;
2827 	int ret = sysctl_buf(SK_STATS_FLOW, &buffer, &length, NULL, 0);
2828 	assert(ret == 0);
2829 	assert(buffer != NULL && length != 0);
2830 
2831 	assert((length % sizeof(*sf)) == 0);
2832 
2833 	struct sk_stats_flow *iter;
2834 	for (iter = buffer; (void *)iter < buffer + length; iter++) {
2835 		if (uuid_compare(iter->sf_uuid, flow_uuid) == 0) {
2836 			*sf = *iter;
2837 			return 0;
2838 		}
2839 	}
2840 	return ENOENT;
2841 }
2842 
2843 int
sktu_get_nexus_flowswitch_stats(struct sk_stats_flow_switch ** sfsw,size_t * len)2844 sktu_get_nexus_flowswitch_stats(struct sk_stats_flow_switch **sfsw, size_t *len)
2845 {
2846 	int ret;
2847 	void *buffer = NULL;
2848 	size_t length = 0;
2849 	size_t width = sizeof(struct sk_stats_flow_switch);
2850 
2851 	ret = sysctl_buf(SK_STATS_FLOW_SWITCH, &buffer, &length, NULL, 0);
2852 	if (ret != 0 || buffer == NULL || length == 0) {
2853 		return ret;
2854 	}
2855 	if ((length % width) != 0) {
2856 		T_LOG("Error, mismatching sk_stats_flow_switch, quit\n");
2857 		exit(EX_OSERR);
2858 	}
2859 
2860 	*sfsw = (struct sk_stats_flow_switch *)buffer;
2861 	*len = length;
2862 
2863 	return 0;
2864 }
2865 
2866 void
__fsw_stats_print(struct fsw_stats * s)2867 __fsw_stats_print(struct fsw_stats *s)
2868 {
2869 	int i;
2870 
2871 	for (i = 0; i < __FSW_STATS_MAX; i++) {
2872 		if (STATS_VAL(s, i) == 0) {
2873 			continue;
2874 		}
2875 		os_log(OS_LOG_DEFAULT, "\t%-24s: %llu\n",
2876 		    fsw_stats_str(i), STATS_VAL(s, i));
2877 	}
2878 }
2879