1 /*
2 * Copyright (c) 2016-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* This file contains useful utility routines, but contrary to skywalk_test_common
30 * Do not operate on a single set of static objects
31 */
32
33 /*
34 * Copyright (c) 1988, 1992, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93
38 */
39
40
41 #include <err.h>
42 #include <assert.h>
43 #include <inttypes.h>
44 #include <stdbool.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <string.h>
49 #include <unistd.h>
50 #include <poll.h>
51 #include <sys/event.h>
52 #include <uuid/uuid.h>
53 #include <arpa/inet.h>
54 #include <stddef.h>
55 #include <sysexits.h>
56 #include <sys/types.h>
57 #include <sys/sysctl.h>
58 #include <net/if_utun.h>
59 #include <net/if_ipsec.h>
60 #include <netinet/ip6.h>
61 #include <sys/kern_control.h>
62 #include <sys/ioctl.h>
63 #include <sys/socket.h>
64 #include <sys/kern_control.h>
65 #include <sys/sys_domain.h>
66 #include <ifaddrs.h>
67 #include <sys/fcntl.h>
68 #include <sys/kern_control.h>
69 #include <sys/sys_domain.h>
70 #include <net/if_utun.h>
71 #include <os/log.h>
72
73 #include <net/pfkeyv2.h>
74 #include <netinet6/ipsec.h>
75 #include <darwintest.h>
76
77 #include "skywalk_test_driver.h"
78 #include "skywalk_test_common.h" // XXX remove this
79 #include "skywalk_test_utils.h"
80
81 #define SIN(s) ((struct sockaddr_in *)(void *)s)
82 #define SIN6(s) ((struct sockaddr_in6 *)(void *)s)
83
84 void
sktc_build_nexus(nexus_controller_t ncd,struct sktc_nexus_attr * sktc_attr,uuid_t * providerp,uuid_t * instancep)85 sktc_build_nexus(nexus_controller_t ncd, struct sktc_nexus_attr *sktc_attr,
86 uuid_t *providerp, uuid_t *instancep)
87 {
88 nexus_attr_t attr;
89 int error;
90 uint64_t scratch;
91
92 attr = os_nexus_attr_create();
93 assert(attr);
94
95 if (sktc_attr->anonymous != -1) {
96 error = os_nexus_attr_set(attr, NEXUS_ATTR_ANONYMOUS,
97 sktc_attr->anonymous);
98 SKTC_ASSERT_ERR(!error);
99 }
100 if (sktc_attr->userchannel != -1) {
101 error = os_nexus_attr_set(attr, NEXUS_ATTR_USER_CHANNEL,
102 sktc_attr->userchannel);
103 SKTC_ASSERT_ERR(!error);
104 }
105 if (sktc_attr->ntxrings != -1) {
106 error = os_nexus_attr_set(attr, NEXUS_ATTR_TX_RINGS,
107 sktc_attr->ntxrings);
108 SKTC_ASSERT_ERR(!error);
109 }
110 if (sktc_attr->nrxrings != -1) {
111 error = os_nexus_attr_set(attr, NEXUS_ATTR_RX_RINGS,
112 sktc_attr->nrxrings);
113 SKTC_ASSERT_ERR(!error);
114 }
115 if (sktc_attr->ntxslots != -1) {
116 error = os_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS,
117 sktc_attr->ntxslots);
118 SKTC_ASSERT_ERR(!error);
119 }
120 if (sktc_attr->nrxslots != -1) {
121 error = os_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS,
122 sktc_attr->nrxslots);
123 SKTC_ASSERT_ERR(!error);
124 }
125 if (sktc_attr->slotsize != -1) {
126 error = os_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE,
127 sktc_attr->slotsize);
128 SKTC_ASSERT_ERR(!error);
129 }
130 if (sktc_attr->metasize != -1) {
131 error = os_nexus_attr_set(attr, NEXUS_ATTR_SLOT_META_SIZE,
132 sktc_attr->metasize);
133 SKTC_ASSERT_ERR(error == ENOTSUP);
134 }
135 if (sktc_attr->maxfrags != -1) {
136 error = os_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
137 sktc_attr->maxfrags);
138 SKTC_ASSERT_ERR(!error);
139 }
140 if (sktc_attr->rejectonclose != -1) {
141 error = os_nexus_attr_set(attr, NEXUS_ATTR_REJECT_ON_CLOSE,
142 sktc_attr->rejectonclose);
143 SKTC_ASSERT_ERR(!error);
144 }
145
146 uuid_clear(*providerp);
147 error = os_nexus_controller_register_provider(ncd,
148 sktc_attr->name, sktc_attr->type, attr, providerp);
149 SKTC_ASSERT_ERR(!error);
150 assert(!uuid_is_null(*providerp));
151
152 /* Clear the parameters to make sure they are being read */
153 error = os_nexus_attr_set(attr, NEXUS_ATTR_ANONYMOUS, -1);
154 SKTC_ASSERT_ERR(!error);
155 error = os_nexus_attr_set(attr, NEXUS_ATTR_TX_RINGS, -1);
156 SKTC_ASSERT_ERR(!error);
157 error = os_nexus_attr_set(attr, NEXUS_ATTR_RX_RINGS, -1);
158 SKTC_ASSERT_ERR(!error);
159 error = os_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, -1);
160 SKTC_ASSERT_ERR(!error);
161 error = os_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, -1);
162 SKTC_ASSERT_ERR(!error);
163 error = os_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, -1);
164 SKTC_ASSERT_ERR(!error);
165 error = os_nexus_attr_set(attr, NEXUS_ATTR_SLOT_META_SIZE, -1);
166 SKTC_ASSERT_ERR(error == ENOTSUP);
167 error = os_nexus_attr_set(attr, NEXUS_ATTR_EXTENSIONS, -1);
168 SKTC_ASSERT_ERR(!error);
169 error = os_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS, -1);
170 SKTC_ASSERT_ERR(!error);
171 error = os_nexus_attr_set(attr, NEXUS_ATTR_REJECT_ON_CLOSE, -1);
172 SKTC_ASSERT_ERR(!error);
173
174 error = os_nexus_controller_read_provider_attr(ncd,
175 *providerp, attr);
176 SKTC_ASSERT_ERR(!error);
177
178 scratch = -1;
179 error = os_nexus_attr_get(attr, NEXUS_ATTR_ANONYMOUS, &scratch);
180 SKTC_ASSERT_ERR(!error);
181 assert(scratch != -1);
182 assert(sktc_attr->anonymous == -1 || sktc_attr->anonymous == scratch);
183
184 scratch = -1;
185 error = os_nexus_attr_get(attr, NEXUS_ATTR_USER_CHANNEL, &scratch);
186 SKTC_ASSERT_ERR(!error);
187 assert(scratch != -1);
188 assert(sktc_attr->userchannel == -1 ||
189 sktc_attr->userchannel == scratch);
190
191 scratch = -1;
192 error = os_nexus_attr_get(attr, NEXUS_ATTR_TX_RINGS, &scratch);
193 SKTC_ASSERT_ERR(!error);
194 assert(scratch != -1);
195 assert(sktc_attr->ntxrings == -1 || sktc_attr->ntxrings == scratch);
196
197 scratch = -1;
198 error = os_nexus_attr_get(attr, NEXUS_ATTR_RX_RINGS, &scratch);
199 SKTC_ASSERT_ERR(!error);
200 assert(scratch != -1);
201 assert(sktc_attr->nrxrings == -1 || sktc_attr->nrxrings == scratch);
202
203 scratch = -1;
204 error = os_nexus_attr_get(attr, NEXUS_ATTR_TX_SLOTS, &scratch);
205 SKTC_ASSERT_ERR(!error);
206 assert(scratch != -1);
207 assert(sktc_attr->ntxslots == -1 || sktc_attr->ntxslots == scratch);
208
209 scratch = -1;
210 error = os_nexus_attr_get(attr, NEXUS_ATTR_RX_SLOTS, &scratch);
211 SKTC_ASSERT_ERR(!error);
212 assert(scratch != -1);
213 assert(sktc_attr->nrxslots == -1 || sktc_attr->nrxslots == scratch);
214
215 scratch = -1;
216 error = os_nexus_attr_get(attr, NEXUS_ATTR_SLOT_BUF_SIZE, &scratch);
217 SKTC_ASSERT_ERR(!error);
218 assert(scratch != -1);
219 assert(sktc_attr->slotsize == -1 || sktc_attr->slotsize == scratch);
220
221 scratch = -1;
222 error = os_nexus_attr_get(attr, NEXUS_ATTR_SLOT_META_SIZE, &scratch);
223 SKTC_ASSERT_ERR(!error);
224 assert(scratch != -1);
225 assert(sktc_attr->metasize == -1 || sktc_attr->metasize == scratch);
226
227 scratch = -1;
228 error = os_nexus_attr_get(attr, NEXUS_ATTR_MAX_FRAGS, &scratch);
229 SKTC_ASSERT_ERR(!error);
230 assert(scratch != -1);
231 assert(sktc_attr->maxfrags == -1 || sktc_attr->maxfrags == scratch);
232
233 scratch = -1;
234 error = os_nexus_attr_get(attr, NEXUS_ATTR_REJECT_ON_CLOSE, &scratch);
235 SKTC_ASSERT_ERR(!error);
236 assert(scratch != -1);
237 assert(sktc_attr->rejectonclose == -1 ||
238 sktc_attr->rejectonclose == scratch);
239
240 os_nexus_attr_destroy(attr);
241
242 if (instancep) {
243 uuid_clear(*instancep);
244 error = os_nexus_controller_alloc_provider_instance(ncd,
245 *providerp, instancep);
246 SKTC_ASSERT_ERR(!error);
247 assert(!uuid_is_null(*instancep));
248 }
249 }
250
251 /* up to 4 seconds of retries (250ms delay per retry) */
252 #define SKTU_CHANNEL_CREATE_NOMEM_RETRIES 16
253
254 channel_t
sktu_channel_create_extended(const uuid_t uuid,const nexus_port_t port,const ring_dir_t dir,const ring_id_t rid,const channel_attr_t attr,uint64_t exclusive,uint64_t monitor,uint64_t txlowatunit,uint64_t txlowatval,uint64_t rxlowatunit,uint64_t rxlowatval,uint64_t userpacketpool,uint64_t defunctok,uint64_t event_ring,uint64_t low_latency)255 sktu_channel_create_extended(const uuid_t uuid,
256 const nexus_port_t port, const ring_dir_t dir,
257 const ring_id_t rid, const channel_attr_t attr,
258 uint64_t exclusive, uint64_t monitor,
259 uint64_t txlowatunit, uint64_t txlowatval,
260 uint64_t rxlowatunit, uint64_t rxlowatval,
261 uint64_t userpacketpool, uint64_t defunctok,
262 uint64_t event_ring, uint64_t low_latency)
263 {
264 channel_attr_t tmpattr;
265 int error;
266 uint64_t scratch;
267 static struct timespec delay250ms = { .tv_sec = 0, .tv_nsec = 250000000 };
268 uint32_t retries = 0;
269 channel_t ret = NULL;
270
271 if (!attr) {
272 tmpattr = os_channel_attr_create();
273 } else {
274 tmpattr = attr;
275 }
276
277 if (exclusive != -1) {
278 error = os_channel_attr_set(tmpattr, CHANNEL_ATTR_EXCLUSIVE, exclusive);
279 SKTC_ASSERT_ERR(!error);
280 }
281
282 if (monitor != -1) {
283 error = os_channel_attr_set(tmpattr, CHANNEL_ATTR_MONITOR, monitor);
284 SKTC_ASSERT_ERR(!error);
285 }
286
287 if (txlowatunit != -1) {
288 error = os_channel_attr_set(tmpattr, CHANNEL_ATTR_TX_LOWAT_UNIT, txlowatunit);
289 SKTC_ASSERT_ERR(!error);
290 }
291
292 if (txlowatval != -1) {
293 error = os_channel_attr_set(tmpattr, CHANNEL_ATTR_TX_LOWAT_VALUE, txlowatval);
294 SKTC_ASSERT_ERR(!error);
295 }
296
297 if (rxlowatunit != -1) {
298 error = os_channel_attr_set(tmpattr, CHANNEL_ATTR_RX_LOWAT_UNIT, rxlowatunit);
299 SKTC_ASSERT_ERR(!error);
300 }
301
302 if (rxlowatval != -1) {
303 error = os_channel_attr_set(tmpattr, CHANNEL_ATTR_RX_LOWAT_VALUE, rxlowatval);
304 SKTC_ASSERT_ERR(!error);
305 }
306
307 if (userpacketpool != -1) {
308 error = os_channel_attr_set(tmpattr, CHANNEL_ATTR_USER_PACKET_POOL, userpacketpool);
309 SKTC_ASSERT_ERR(!error);
310 }
311
312 if (defunctok != -1) {
313 error = os_channel_attr_set(tmpattr, CHANNEL_ATTR_NEXUS_DEFUNCT_OK, defunctok);
314 SKTC_ASSERT_ERR(!error);
315 }
316
317 if (event_ring != -1) {
318 error = os_channel_attr_set(tmpattr, CHANNEL_ATTR_EVENT_RING, event_ring);
319 SKTC_ASSERT_ERR(!error);
320 }
321
322 if (low_latency != -1) {
323 error = os_channel_attr_set(tmpattr, CHANNEL_ATTR_LOW_LATENCY, low_latency);
324 SKTC_ASSERT_ERR(!error);
325 }
326
327 retry:
328 ret = os_channel_create_extended(uuid, port, dir, rid, tmpattr);
329 if (ret == NULL) {
330 if (errno == ENOMEM && ++retries < SKTU_CHANNEL_CREATE_NOMEM_RETRIES) {
331 nanosleep(&delay250ms, NULL);
332 goto retry;
333 }
334 goto out;
335 }
336
337 scratch = -1;
338 error = os_channel_attr_get(tmpattr, CHANNEL_ATTR_EXCLUSIVE, &scratch);
339 SKTC_ASSERT_ERR(!error);
340 assert(scratch != 1);
341 assert(exclusive == -1 || exclusive == scratch);
342
343 scratch = -1;
344 error = os_channel_attr_get(tmpattr, CHANNEL_ATTR_MONITOR, &scratch);
345 SKTC_ASSERT_ERR(!error);
346 assert(scratch != -1);
347 assert(exclusive == -1 || monitor == scratch);
348
349 scratch = -1;
350 error = os_channel_attr_get(tmpattr, CHANNEL_ATTR_TX_LOWAT_UNIT, &scratch);
351 SKTC_ASSERT_ERR(!error);
352 assert(scratch != -1);
353 assert(exclusive == -1 || txlowatunit == scratch);
354
355 scratch = -1;
356 error = os_channel_attr_get(tmpattr, CHANNEL_ATTR_TX_LOWAT_VALUE, &scratch);
357 SKTC_ASSERT_ERR(!error);
358 assert(scratch != -1);
359 assert(exclusive == -1 || txlowatval == scratch);
360
361 scratch = -1;
362 error = os_channel_attr_get(tmpattr, CHANNEL_ATTR_RX_LOWAT_UNIT, &scratch);
363 SKTC_ASSERT_ERR(!error);
364 assert(scratch != -1);
365 assert(exclusive == -1 || rxlowatunit == scratch);
366
367 scratch = -1;
368 error = os_channel_attr_get(tmpattr, CHANNEL_ATTR_RX_LOWAT_VALUE, &scratch);
369 SKTC_ASSERT_ERR(!error);
370 assert(scratch != -1);
371 assert(exclusive == -1 || rxlowatval == scratch);
372
373 scratch = -1;
374 error = os_channel_attr_get(tmpattr, CHANNEL_ATTR_USER_PACKET_POOL, &scratch);
375 SKTC_ASSERT_ERR(!error);
376 assert(scratch != -1);
377 assert(exclusive == -1 || userpacketpool == scratch);
378
379 scratch = -1;
380 error = os_channel_attr_get(tmpattr, CHANNEL_ATTR_NEXUS_DEFUNCT_OK, &scratch);
381 SKTC_ASSERT_ERR(!error);
382 assert(scratch != -1);
383 assert(exclusive == -1 || defunctok == scratch);
384
385 scratch = -1;
386 error = os_channel_attr_get(tmpattr, CHANNEL_ATTR_EVENT_RING, &scratch);
387 SKTC_ASSERT_ERR(!error);
388 assert(scratch != -1);
389 assert(exclusive == -1 || event_ring == scratch);
390
391 scratch = -1;
392 error = os_channel_attr_get(tmpattr, CHANNEL_ATTR_LOW_LATENCY, &scratch);
393 SKTC_ASSERT_ERR(!error);
394 assert(scratch != -1);
395 assert(exclusive == -1 || low_latency == scratch);
396
397 out:
398 if (!attr) {
399 os_channel_attr_destroy(tmpattr);
400 }
401
402 return ret;
403 }
404
405 /****************************************************************/
406
407 static inline void
swap(int * permute,int i,int j)408 swap(int *permute, int i, int j)
409 {
410 int tmp = permute[i];
411 permute[i] = permute[j];
412 permute[j] = tmp;
413 }
414
415
416 /* Plain changes, see Knuth (7.2.1.2) "Algorithm P"
417 * has advantage of only swapping adjacent pairs
418 * This could be cleaned up to be more "C" like, but
419 * this literal translation works without fanfare.
420 */
421 void
permutefuncP(int n,int * permute,void (* func)(int,int * permute))422 permutefuncP(int n, int *permute, void (*func)(int, int *permute))
423 {
424 int j, s, q;
425 int c[n], o[n];
426 /* P1 Initialize. */
427 for (j = 0; j < n; j++) {
428 c[j] = 0;
429 o[j] = 1;
430 }
431 p2:
432 /* P2 Visit. */
433 func(n, permute);
434 /* P3 Prepare for change. */
435 j = n;
436 s = 0;
437 p4:
438 /* P4 Ready to change? */
439 q = c[j - 1] + o[j - 1];
440 if (q < 0) {
441 goto p7;
442 }
443 if (q == j) {
444 goto p6;
445 }
446 /* P5 Change. */
447 {
448 //T_LOG("Swapping %d with %d\n", j-c[j-1]+s-1, j-q+s-1);
449 swap(permute, j - c[j - 1] + s - 1, j - q + s - 1);
450 }
451 c[j - 1] = q;
452 goto p2;
453 p6: /* P6 Increase s */
454 if (j == 1) {
455 return;
456 }
457 s++;
458 p7: /* P7 Switch Direction */
459 o[j - 1] = -o[j - 1];
460 j--;
461 goto p4;
462 }
463
464 /* Heap's algorithm */
465 void
permutefuncH(int n,int * permute,void (* func)(int,int * permute))466 permutefuncH(int n, int *permute, void (*func)(int, int *permute))
467 {
468 time_t start = time(NULL);
469 time_t now, then = start;
470 int count = 0;
471 int total = 1;
472 int i = 0;
473 int c[n];
474 memset(c, 0, sizeof(c));
475 for (int f = 2; f <= n; f++) {
476 total *= f;
477 }
478 count++;
479 func(n, permute);
480 while (i < n) {
481 if (c[i] < i) {
482 if (!(i & 1)) { /* Even */
483 swap(permute, i, 0);
484 } else { /* Odd */
485 swap(permute, i, c[i]);
486 }
487 count++;
488 {
489 now = time(NULL);
490 if (now > then) {
491 T_LOG("time %ld on %d of %d (%2.2f%%, est %ld secs left)\n",
492 now - start, count, total,
493 (double)count * 100 / total,
494 (long)((double)(now - start) * total / count) - (now - start));
495 then = now;
496 }
497 }
498 func(n, permute);
499 c[i] += 1;
500 i = 0;
501 } else {
502 c[i] = 0;
503 i++;
504 }
505 }
506 now = time(NULL);
507 T_LOG("total time %ld for %d permutations (rate %.2f)\n",
508 now - start, total, (double)total / (now - start));
509 }
510
511 /* Random permutations, knuth's shuffle */
512
513 void
permutefuncR(int n,int * permute,void (* func)(int,int * permute),int total,unsigned seed)514 permutefuncR(int n, int *permute, void (*func)(int, int *permute), int total, unsigned seed)
515 {
516 time_t start = time(NULL);
517 time_t now, then = start;
518 int count = 0;
519 T_LOG("Starting %d random permutations with seed %u\n", total, seed);
520 srandom(seed);
521 while (count < total) {
522 for (int i = n - 1; i > 0; i--) {
523 int j = random() % i; // XXX modulo bias.
524 swap(permute, i, j);
525 }
526 count++;
527 {
528 now = time(NULL);
529 if (now > then) {
530 T_LOG("time %ld on %d of %d (%2.2f%%, est %ld secs left)\n",
531 now - start, count, total,
532 (double)count * 100 / total,
533 (long)((double)(now - start) * total / count) - (now - start));
534 then = now;
535 }
536 }
537 func(n, permute);
538 }
539 now = time(NULL);
540 T_LOG("total time %ld for %d permutations (rate %.2f)\n",
541 now - start, total, (double)total / (now - start));
542 }
543
544
545 /*
546 * rakes each element across all other elements.
547 */
548 void
permutefuncZ(int n,int * permute,void (* func)(int,int * permute))549 permutefuncZ(int n, int *permute, void (*func)(int, int *permute))
550 {
551 int save[n];
552 memcpy(save, permute, sizeof(save));
553 func(n, permute);
554 for (int i = 0; i < n; i++) {
555 //T_LOG("raking %d left\n", i);
556 memcpy(permute, save, sizeof(save));
557 for (int j = i; j > 0; j--) {
558 swap(permute, j, j - 1);
559 func(n, permute);
560 }
561 //T_LOG("raking %d right\n", i);
562 memcpy(permute, save, sizeof(save));
563 for (int j = i; j < n - 1; j++) {
564 swap(permute, j, j + 1);
565 /* The first right is the same as the last left, so skip it */
566 if (j != i) {
567 func(n, permute);
568 }
569 }
570 }
571 }
572
573 /****************************************************************/
574
575 void
sktc_create_flowswitch_no_address(struct sktc_nexus_handles * handles,uint64_t ntxslots,uint64_t nrxslots,uint64_t buf_size,uint64_t max_frags,uint64_t anonymous)576 sktc_create_flowswitch_no_address(struct sktc_nexus_handles *handles,
577 uint64_t ntxslots, uint64_t nrxslots, uint64_t buf_size, uint64_t max_frags,
578 uint64_t anonymous)
579 {
580 char buf[256];
581 int error;
582 struct sktc_nexus_attr attr = SKTC_NEXUS_ATTR_INIT();
583
584 attr.ntxslots = ntxslots;
585 attr.nrxslots = nrxslots;
586 attr.slotsize = buf_size;
587 attr.anonymous = anonymous;
588 attr.maxfrags = max_frags;
589
590 if (handles->netif_ifname[0] == '\0') {
591 T_LOG("%s: no interface name specified\n",
592 __func__);
593 return;
594 }
595 if (strlen(handles->netif_ifname) >= IFNAMSIZ) {
596 T_LOG("%s: invalid interface name specified %s\n",
597 __func__, handles->netif_ifname);
598 return;
599 }
600 handles->controller = os_nexus_controller_create();
601 if (handles->controller == NULL) {
602 SKT_LOG(
603 "%s: os_nexus_controller_create failed, %s (%d)\n",
604 __func__, strerror(errno), errno);
605 return;
606 }
607
608 snprintf(buf, sizeof(buf), "ms_fsw_%s", handles->netif_ifname);
609 strncpy((char *)attr.name, buf, sizeof(nexus_name_t) - 1);
610 attr.type = NEXUS_TYPE_FLOW_SWITCH;
611 sktc_build_nexus(handles->controller, &attr, &handles->fsw_prov_uuid,
612 &handles->fsw_nx_uuid);
613
614 /* if the netif is already present, don't bother creating/attaching */
615 if (!sktc_get_netif_nexus(handles->netif_ifname,
616 handles->netif_nx_uuid)) {
617 snprintf(buf, sizeof(buf), "netif_%s", handles->netif_ifname);
618 strncpy((char *)attr.name, buf, sizeof(nexus_name_t) - 1);
619 attr.type = NEXUS_TYPE_NET_IF;
620 attr.ntxslots = -1;
621 attr.nrxslots = -1;
622 sktc_build_nexus(handles->controller, &attr,
623 &handles->netif_prov_uuid, &handles->netif_nx_uuid);
624 error = __os_nexus_ifattach(handles->controller,
625 handles->netif_nx_uuid,
626 handles->netif_ifname, NULL,
627 false,
628 &handles->netif_nx_attach_uuid);
629 if (error != 0) {
630 SKT_LOG(
631 "__os_nexus_ifattach(%s) failed, %s (%d)\n",
632 buf, strerror(errno), errno);
633 return;
634 }
635 }
636 error = __os_nexus_ifattach(handles->controller, handles->fsw_nx_uuid,
637 NULL, handles->netif_nx_uuid, false, &handles->fsw_nx_dev_attach_uuid);
638 if (error != 0) {
639 SKT_LOG("__os_nexus_ifattach() failed, %s (%d)\n",
640 strerror(errno), errno);
641 return;
642 }
643 }
644
645
646 void
sktc_nexus_handles_assign_address(struct sktc_nexus_handles * handles)647 sktc_nexus_handles_assign_address(struct sktc_nexus_handles *handles)
648 {
649 int error;
650
651 error = sktc_ifnet_add_addr(handles->netif_ifname,
652 &handles->netif_addr,
653 &handles->netif_mask, NULL);
654 SKTC_ASSERT_ERR(!error);
655 }
656
657 void
sktc_create_flowswitch(struct sktc_nexus_handles * handles,int i)658 sktc_create_flowswitch(struct sktc_nexus_handles *handles, int i)
659 {
660 uint16_t val;
661
662 /* assign the name */
663 snprintf(handles->netif_ifname, sizeof(handles->netif_ifname),
664 FETH_FORMAT, i);
665
666 /* pick/assign a random IPv4LL address */
667 val = random() % 0xffff;
668 /* avoid subnet broadcast and host address 0 */
669 if (((val & 0xff) == 0) || ((val & 0xff) == 0xff)) {
670 val = (val & 0xfff0) | 0x2;
671 }
672 handles->netif_addr = sktc_make_in_addr(IN_LINKLOCALNETNUM | val);
673 handles->netif_mask = sktc_make_in_addr(IN_CLASSC_NET);
674 sktc_nexus_handles_assign_address(handles);
675
676 /* create the flowswitch */
677 sktc_create_flowswitch_no_address(handles, -1, -1, -1, -1, 1);
678 }
679
680 void
sktc_cleanup_flowswitch(struct sktc_nexus_handles * handles)681 sktc_cleanup_flowswitch(struct sktc_nexus_handles *handles)
682 {
683 int error;
684
685 assert(handles->controller);
686 assert(!uuid_is_null(handles->fsw_prov_uuid));
687 assert(!uuid_is_null(handles->fsw_nx_uuid));
688
689 error = os_nexus_controller_free_provider_instance(handles->controller,
690 handles->fsw_nx_uuid);
691 SKTC_ASSERT_ERR(!error);
692
693 error = os_nexus_controller_deregister_provider(handles->controller,
694 handles->fsw_prov_uuid);
695 SKTC_ASSERT_ERR(!error);
696
697 os_nexus_controller_destroy(handles->controller);
698
699 error = sktc_ifnet_del_addr(handles->netif_ifname, &handles->netif_addr);
700 SKTC_ASSERT_ERR(!error);
701 }
702
703 /****************************************************************/
704
705 int
sktc_bind_tcp4_flow(nexus_controller_t ncd,const uuid_t fsw,in_port_t in_port,nexus_port_t nx_port,const uuid_t flow)706 sktc_bind_tcp4_flow(nexus_controller_t ncd, const uuid_t fsw, in_port_t in_port, nexus_port_t nx_port, const uuid_t flow)
707 {
708 struct nx_flow_req nfr;
709 int error;
710
711 memset(&nfr, 0, sizeof(nfr));
712 nfr.nfr_ip_protocol = IPPROTO_TCP;
713 nfr.nfr_nx_port = nx_port;
714 nfr.nfr_saddr.sa.sa_len = sizeof(struct sockaddr_in);
715 nfr.nfr_saddr.sa.sa_family = AF_INET;
716 nfr.nfr_saddr.sin.sin_port = htons(in_port);
717 nfr.nfr_saddr.sin.sin_addr.s_addr = htonl(INADDR_ANY);
718 uuid_copy(nfr.nfr_flow_uuid, flow);
719
720 #if 0
721 char buf[31];
722 uuid_string_t uuidstr;
723 uuid_unparse(nfr.nfr_flow_uuid, uuidstr);
724 inet_ntop(AF_INET, &nfr.nfr_saddr.sin.sin_addr.s_addr, buf, sizeof(buf));
725 T_LOG("before: nx_port %3d Flow %s %s addr %s port %d\n",
726 nfr.nfr_nx_port, uuidstr, (nfr.nfr_ip_protocol == IPPROTO_TCP) ? "tcp" : "udp",
727 buf, ntohs(nfr.nfr_saddr.sin.sin_port));
728 #endif
729
730 error = __os_nexus_flow_add(ncd, fsw, &nfr);
731 #if 0
732 if (error) {
733 T_LOG("__os_nexus_flow_add returned %d, errno %d\n", error, errno);
734 }
735 #endif
736
737 #if 0
738 uuid_unparse(nfr.nfr_flow_uuid, uuidstr);
739 inet_ntop(AF_INET, &nfr.nfr_saddr.sin.sin_addr.s_addr, buf, sizeof(buf));
740 T_LOG("after: nx_port %3d Flow %s %s addr %s port %d\n",
741 nfr.nfr_nx_port, uuidstr, (nfr.nfr_ip_protocol == IPPROTO_TCP) ? "tcp" : "udp",
742 buf, ntohs(nfr.nfr_saddr.sin.sin_port));
743 #endif
744
745 // XXX fails, see the fswbind25 for standalone test for this
746 assert(nfr.nfr_nx_port == nx_port);
747 T_LOG("got ephemeral port %d\n", ntohs(nfr.nfr_saddr.sin.sin_port));
748
749 /* Validate the ephemeral ports */
750 if (!error && !in_port) {
751 static int first, last;
752 if (!first && !last) {
753 size_t size;
754
755 size = sizeof(first);
756 error = sysctlbyname("net.inet.ip.portrange.first", &first, &size, NULL, 0);
757 SKTC_ASSERT_ERR(!error);
758 assert(size == sizeof(first));
759
760 size = sizeof(last);
761 error = sysctlbyname("net.inet.ip.portrange.last", &last, &size, NULL, 0);
762 SKTC_ASSERT_ERR(!error);
763 assert(size == sizeof(last));
764
765 T_LOG("ephemeral port range first %d last %d\n", first, last);
766
767 if (last < first) {
768 int tmp = first;
769 first = last;
770 last = tmp;
771 }
772 assert(first <= last);
773 }
774 assert(ntohs(nfr.nfr_saddr.sin.sin_port) >= first);
775 assert(ntohs(nfr.nfr_saddr.sin.sin_port) <= last);
776 }
777
778 return error;
779 }
780
781 int
sktc_unbind_flow(nexus_controller_t ncd,const uuid_t fsw,const uuid_t flow)782 sktc_unbind_flow(nexus_controller_t ncd, const uuid_t fsw, const uuid_t flow)
783 {
784 struct nx_flow_req nfr;
785 int error;
786
787 memset(&nfr, 0, sizeof(nfr));
788 uuid_copy(nfr.nfr_flow_uuid, flow);
789
790 error = __os_nexus_flow_del(ncd, fsw, &nfr);
791 if (error) {
792 SKT_LOG("__os_nexus_flow_add returned %d, errno %d\n", error, errno);
793 }
794 return error;
795 }
796
797 /****************************************************************/
798
799 uint32_t
sktc_chew_random(channel_t channel,channel_ring_t ring,sync_mode_t mode,bool dosync,uint32_t nslots)800 sktc_chew_random(channel_t channel, channel_ring_t ring, sync_mode_t mode, bool dosync, uint32_t nslots)
801 {
802 uint64_t count = 0;
803 int error;
804 channel_slot_t slot;
805
806 /* Chew a random number of slots */
807 nslots = random() % (nslots + 1);
808
809 slot = NULL;
810 while (count < nslots) {
811 slot_prop_t prop;
812
813 slot = os_channel_get_next_slot(ring, slot, &prop);
814 assert(slot);
815 if (mode == CHANNEL_SYNC_TX) {
816 packet_t pkt = os_channel_slot_get_packet(ring, slot);
817 buflet_t buf = os_packet_get_next_buflet(pkt, NULL);
818 assert(buf != NULL);
819 uint16_t bdlim = os_buflet_get_data_limit(buf);
820 assert(bdlim != 0);
821 prop.sp_len = random() % bdlim;
822 os_channel_set_slot_properties(ring, slot, &prop);
823 }
824 count++;
825 }
826
827 if (slot) {
828 error = os_channel_advance_slot(ring, slot);
829 SKTC_ASSERT_ERR(!error);
830 }
831
832 if (dosync) {
833 error = os_channel_sync(channel, mode);
834 if (skywalk_in_driver && error) {
835 SKT_LOG("%s: sync fail error %d errno %d: %s\n", __func__, error, errno, strerror(errno));
836 } else {
837 SKTC_ASSERT_ERR(!error);
838 }
839 }
840
841 return count;
842 }
843
844 /* This pumps slots on a ring until count slots have been tranferred */
845 void
sktc_pump_ring_nslots_kq(channel_t channel,channel_ring_t ring,sync_mode_t mode,bool dosync,uint64_t nslots,bool verbose)846 sktc_pump_ring_nslots_kq(channel_t channel, channel_ring_t ring, sync_mode_t mode, bool dosync, uint64_t nslots, bool verbose)
847 {
848 uint64_t count = 0;
849 int channelfd;
850 int kq;
851 struct kevent kev;
852 int error;
853 time_t start, then;
854
855 channelfd = os_channel_get_fd(channel);
856 assert(channelfd != -1);
857
858 kq = kqueue();
859 assert(kq != -1);
860 EV_SET(&kev, channelfd,
861 mode == CHANNEL_SYNC_TX ? EVFILT_WRITE : EVFILT_READ,
862 EV_ADD | EV_ENABLE, 0, 0, NULL);
863 error = kevent(kq, &kev, 1, NULL, 0, NULL);
864 SKTC_ASSERT_ERR(!error);
865
866 if (verbose) {
867 then = start = time(NULL);
868 }
869
870 while (count < nslots) {
871 uint32_t avail;
872
873 if (verbose) {
874 time_t now = time(NULL);
875 if (now > then) {
876 T_LOG("time %ld pump %"PRId64" of %"PRId64" (%2.2f%%, est %ld secs left)\n",
877 now - start, count, nslots,
878 (double)count * 100 / nslots,
879 (long)((double)(now - start) * nslots / count) - (now - start));
880 then = now;
881 }
882 }
883
884 avail = os_channel_available_slot_count(ring);
885
886 if (!avail) {
887 int error;
888
889 memset(&kev, 0, sizeof(kev));
890 error = kevent(kq, NULL, 0, &kev, 1, NULL);
891 SKTC_ASSERT_ERR(error != -1);
892 SKTC_ASSERT_ERR(error == 1);
893
894 assert(kev.ident == channelfd);
895 if (mode == CHANNEL_SYNC_TX) {
896 assert(kev.filter == EVFILT_WRITE);
897 } else {
898 assert(kev.filter == EVFILT_READ);
899 }
900
901 avail = os_channel_available_slot_count(ring);
902 assert(avail);
903 }
904
905 count += sktc_chew_random(channel, ring, mode, dosync, MIN(nslots - count, avail));
906 }
907
908 if (verbose) {
909 time_t now = time(NULL);
910 T_LOG("total time %ld for %"PRId64" slots (rate %.2f)\n",
911 now - start, nslots, (double)nslots / (now - start));
912 }
913
914 error = close(kq);
915 SKTC_ASSERT_ERR(!error);
916 }
917
918 void
sktc_pump_ring_nslots_select(channel_t channel,channel_ring_t ring,sync_mode_t mode,bool dosync,uint64_t nslots,bool verbose)919 sktc_pump_ring_nslots_select(channel_t channel, channel_ring_t ring, sync_mode_t mode, bool dosync, uint64_t nslots, bool verbose)
920 {
921 uint64_t count = 0;
922 int channelfd;
923 fd_set readfds, writefds, errorfds, zerofds;
924 time_t start, then;
925
926 channelfd = os_channel_get_fd(channel);
927 assert(channelfd != -1);
928
929 FD_ZERO(&zerofds);
930 FD_ZERO(&readfds);
931 FD_ZERO(&writefds);
932 FD_ZERO(&errorfds);
933 if (mode == CHANNEL_SYNC_TX) {
934 FD_SET(channelfd, &writefds);
935 } else {
936 FD_SET(channelfd, &readfds);
937 }
938
939 if (verbose) {
940 then = start = time(NULL);
941 }
942
943 while (count < nslots) {
944 uint32_t avail;
945
946 if (verbose) {
947 time_t now = time(NULL);
948 if (now > then) {
949 T_LOG("time %ld pump %"PRId64" of %"PRId64" (%2.2f%%, est %ld secs left)\n",
950 now - start, count, nslots,
951 (double)count * 100 / nslots,
952 (long)((double)(now - start) * nslots / count) - (now - start));
953 then = now;
954 }
955 }
956
957 avail = os_channel_available_slot_count(ring);
958
959 if (!avail) {
960 int error;
961
962 FD_SET(channelfd, &errorfds);
963 error = select(channelfd + 1, &readfds, &writefds, &errorfds, NULL);
964 SKTC_ASSERT_ERR(error != -1);
965 assert(!memcmp(&zerofds, &errorfds, sizeof(zerofds)));
966 if (mode == CHANNEL_SYNC_TX) {
967 assert(FD_ISSET(channelfd, &writefds));
968 assert(!memcmp(&zerofds, &readfds, sizeof(zerofds)));
969 } else {
970 assert(FD_ISSET(channelfd, &readfds));
971 assert(!memcmp(&zerofds, &writefds, sizeof(zerofds)));
972 }
973 SKTC_ASSERT_ERR(error == 1);
974
975 avail = os_channel_available_slot_count(ring);
976 assert(avail);
977 }
978
979 count += sktc_chew_random(channel, ring, mode, dosync, MIN(nslots - count, avail));
980 }
981
982 if (verbose) {
983 time_t now = time(NULL);
984 T_LOG("total time %ld for %"PRId64" slots (rate %.2f)\n",
985 now - start, nslots, (double)nslots / (now - start));
986 }
987 }
988
989 void
sktc_pump_ring_nslots_poll(channel_t channel,channel_ring_t ring,sync_mode_t mode,bool dosync,uint64_t nslots,bool verbose)990 sktc_pump_ring_nslots_poll(channel_t channel, channel_ring_t ring, sync_mode_t mode, bool dosync, uint64_t nslots, bool verbose)
991 {
992 uint64_t count = 0;
993 int channelfd;
994 struct pollfd fds;
995 time_t start, then;
996
997 channelfd = os_channel_get_fd(channel);
998 assert(channelfd != -1);
999
1000 fds.fd = channelfd;
1001 if (mode == CHANNEL_SYNC_TX) {
1002 fds.events = POLLWRNORM;
1003 } else {
1004 fds.events = POLLRDNORM;
1005 }
1006
1007 if (verbose) {
1008 then = start = time(NULL);
1009 }
1010
1011 while (count < nslots) {
1012 uint32_t avail;
1013
1014 if (verbose) {
1015 time_t now = time(NULL);
1016 if (now > then) {
1017 T_LOG("time %ld pump %"PRId64" of %"PRId64" (%2.2f%%, est %ld secs left)\n",
1018 now - start, count, nslots,
1019 (double)count * 100 / nslots,
1020 (long)((double)(now - start) * nslots / count) - (now - start));
1021 then = now;
1022 }
1023 }
1024
1025 avail = os_channel_available_slot_count(ring);
1026
1027 if (!avail) {
1028 int error;
1029
1030 error = poll(&fds, 1, -1);
1031 SKTC_ASSERT_ERR(error != -1);
1032 SKTC_ASSERT_ERR(error == 1);
1033 assert(fds.fd == channelfd);
1034 if (mode == CHANNEL_SYNC_TX) {
1035 assert(fds.events == POLLWRNORM);
1036 assert(fds.revents == POLLWRNORM);
1037 } else {
1038 assert(fds.events == POLLRDNORM);
1039 assert(fds.revents == POLLRDNORM);
1040 }
1041
1042 avail = os_channel_available_slot_count(ring);
1043 assert(avail);
1044 }
1045
1046 count += sktc_chew_random(channel, ring, mode, dosync, MIN(nslots - count, avail));
1047 }
1048
1049 if (verbose) {
1050 time_t now = time(NULL);
1051 T_LOG("total time %ld for %"PRId64" slots (rate %.2f)\n",
1052 now - start, nslots, (double)nslots / (now - start));
1053 }
1054 }
1055
1056 /****************************************************************/
1057
1058 void
sktc_raise_file_limit(int new)1059 sktc_raise_file_limit(int new)
1060 {
1061 int error;
1062 struct rlimit rl;
1063
1064 error = getrlimit(RLIMIT_NOFILE, &rl);
1065 SKTC_ASSERT_ERR(!error);
1066
1067 if (rl.rlim_cur < new) {
1068 T_LOG("raising file open limit from %llu (max %llu) to %d\n",
1069 rl.rlim_cur, rl.rlim_max, new);
1070 rl.rlim_cur = new;
1071 rl.rlim_max = new;
1072 error = setrlimit(RLIMIT_NOFILE, &rl);
1073 SKTC_ASSERT_ERR(!error);
1074 }
1075 }
1076
1077
1078 /****************************************************************/
1079
1080 int
sktu_create_interface(sktu_if_type_t type,sktu_if_flag_t flags)1081 sktu_create_interface(sktu_if_type_t type, sktu_if_flag_t flags)
1082 {
1083 struct ctl_info kernctl_info;
1084 struct sockaddr_ctl kernctl_addr;
1085 int error;
1086 int tunsock;
1087 const char *CONTROL_NAME;
1088 int OPT_ENABLE_NETIF, OPT_ATTACH_FSW;
1089 int enable_netif, attach_fsw;
1090 int scratch;
1091
1092 assert(type == SKTU_IFT_UTUN || type == SKTU_IFT_IPSEC);
1093 if (type == SKTU_IFT_UTUN) {
1094 CONTROL_NAME = UTUN_CONTROL_NAME;
1095 OPT_ENABLE_NETIF = UTUN_OPT_ENABLE_NETIF;
1096 OPT_ATTACH_FSW = UTUN_OPT_ATTACH_FLOWSWITCH;
1097 } else {
1098 CONTROL_NAME = IPSEC_CONTROL_NAME;
1099 OPT_ENABLE_NETIF = IPSEC_OPT_ENABLE_NETIF;
1100 OPT_ATTACH_FSW = 0;
1101 }
1102
1103 enable_netif = ((flags & SKTU_IFF_ENABLE_NETIF) != 0) ? 1 : 0;
1104 attach_fsw = ((flags & SKTU_IFF_NO_ATTACH_FSW) != 0) ? 0 : 1;
1105
1106 /* XXX Remove this retry nonsense when this is fixed:
1107 * <rdar://problem/37340313> creating an interface without specifying specific interface name should not return EBUSY
1108 */
1109
1110 for (int i = 0; i < 10; i++) {
1111 if (i > 0) {
1112 T_LOG("%s: sleeping 1ms before retrying\n", __func__);
1113 usleep(1000);
1114 }
1115
1116 tunsock = socket(PF_SYSTEM, SOCK_DGRAM, SYSPROTO_CONTROL);
1117 assert(tunsock != -1);
1118
1119 memset(&kernctl_info, 0, sizeof(kernctl_info));
1120 strlcpy(kernctl_info.ctl_name, CONTROL_NAME, sizeof(kernctl_info.ctl_name));
1121 error = ioctl(tunsock, CTLIOCGINFO, &kernctl_info);
1122 SKTC_ASSERT_ERR(error == 0);
1123
1124 memset(&kernctl_addr, 0, sizeof(kernctl_addr));
1125 kernctl_addr.sc_len = sizeof(kernctl_addr);
1126 kernctl_addr.sc_family = AF_SYSTEM;
1127 kernctl_addr.ss_sysaddr = AF_SYS_CONTROL;
1128 kernctl_addr.sc_id = kernctl_info.ctl_id;
1129 kernctl_addr.sc_unit = 0;
1130
1131 /* If this is being called to reinstantiate a device that was just detached,
1132 * then this may return busy while the asynchronous detach completes.
1133 * This only occurs when this is being called in a tight loop
1134 * as per the utun27646755 test below
1135 */
1136
1137 error = bind(tunsock, (struct sockaddr *)&kernctl_addr, sizeof(kernctl_addr));
1138 if (error == -1 && errno == EBUSY) {
1139 close(tunsock);
1140 tunsock = -1;
1141 T_LOG("%s: i = %d bind returned EBUSY\n", __func__, i);
1142 continue;
1143 }
1144
1145 /* can only be set before connecting */
1146 error = setsockopt(tunsock, SYSPROTO_CONTROL, OPT_ENABLE_NETIF, &enable_netif, sizeof(enable_netif));
1147 SKTC_ASSERT_ERR(!error);
1148 socklen_t scratchlen = sizeof(scratch);
1149 error = getsockopt(tunsock, SYSPROTO_CONTROL, OPT_ENABLE_NETIF, &scratch, &scratchlen);
1150 SKTC_ASSERT_ERR(!error);
1151 assert(scratchlen == sizeof(scratch));
1152 assert(enable_netif == scratch);
1153
1154 /* only applicable for utun */
1155 if (type == SKTU_IFT_UTUN) {
1156 error = setsockopt(tunsock, SYSPROTO_CONTROL, OPT_ATTACH_FSW, &attach_fsw, sizeof(attach_fsw));
1157 SKTC_ASSERT_ERR(!error);
1158 }
1159
1160 error = connect(tunsock, (struct sockaddr *)&kernctl_addr, sizeof(kernctl_addr));
1161 if (error == -1 && errno == EBUSY) {
1162 T_LOG("%s: i = %d connect returned EBUSY\n", __func__, i);
1163 close(tunsock);
1164 tunsock = -1;
1165 continue;
1166 }
1167
1168 error = fcntl(tunsock, F_SETFD, FD_CLOEXEC);
1169 if (error != 0) {
1170 warn("FD_CLOEXEC");
1171 }
1172
1173 break;
1174 }
1175
1176 if (error == -1) {
1177 warn("Failed to create utun errno %d", errno);
1178 close(tunsock);
1179 tunsock = -1;
1180 }
1181
1182 return tunsock;
1183 }
1184
1185 channel_t
sktu_create_interface_channel(sktu_if_type_t type,int tunsock)1186 sktu_create_interface_channel(sktu_if_type_t type, int tunsock)
1187 {
1188 uuid_t uuid;
1189 channel_attr_t attr;
1190 channel_t channel;
1191 socklen_t uuidlen;
1192 int error;
1193 int OPT_ENABLE_CHANNEL;
1194 int OPT_GET_CHANNEL_UUID;
1195
1196 if (type == SKTU_IFT_UTUN) {
1197 OPT_ENABLE_CHANNEL = UTUN_OPT_ENABLE_CHANNEL;
1198 OPT_GET_CHANNEL_UUID = UTUN_OPT_GET_CHANNEL_UUID;
1199 } else {
1200 assert(type == SKTU_IFT_IPSEC);
1201 OPT_ENABLE_CHANNEL = IPSEC_OPT_ENABLE_CHANNEL;
1202 OPT_GET_CHANNEL_UUID = IPSEC_OPT_GET_CHANNEL_UUID;
1203 }
1204
1205 if (type == SKTU_IFT_UTUN) {
1206 int enable = 1;
1207 error = setsockopt(tunsock, SYSPROTO_CONTROL, OPT_ENABLE_CHANNEL, &enable, sizeof(enable));
1208 if (error != 0) {
1209 SKT_LOG("setsockopt returned error %d, errno %d\n", error, errno);
1210 }
1211 SKTC_ASSERT_ERR(error == 0);
1212 }
1213
1214 int scratch;
1215 socklen_t scratchlen = sizeof(scratch);
1216 error = getsockopt(tunsock, SYSPROTO_CONTROL, OPT_ENABLE_CHANNEL, &scratch, &scratchlen);
1217 SKTC_ASSERT_ERR(!error);
1218 assert(scratchlen == sizeof(scratch));
1219 assert(1 == scratch);
1220
1221 uuidlen = sizeof(uuid);
1222 error = getsockopt(tunsock, SYSPROTO_CONTROL, OPT_GET_CHANNEL_UUID, uuid, &uuidlen);
1223 SKTC_ASSERT_ERR(error == 0);
1224 assert(uuidlen == sizeof(uuid));
1225
1226 attr = NULL;
1227 channel = sktu_channel_create_extended(uuid,
1228 NEXUS_PORT_KERNEL_PIPE_CLIENT,
1229 CHANNEL_DIR_TX_RX, CHANNEL_RING_ID_ANY, attr,
1230 -1, -1, -1, -1, -1, -1, -1, 1, -1, -1);
1231 assert(channel);
1232
1233 return channel;
1234 }
1235
1236 void
sktu_get_interface_name(sktu_if_type_t type,int s,char name[IFNAMSIZ])1237 sktu_get_interface_name(sktu_if_type_t type, int s, char name[IFNAMSIZ])
1238 {
1239 int error;
1240 socklen_t optlen = IFNAMSIZ;
1241 if (type == SKTU_IFT_UTUN) {
1242 error = getsockopt(s, SYSPROTO_CONTROL, UTUN_OPT_IFNAME, name, &optlen);
1243 } else {
1244 error = getsockopt(s, SYSPROTO_CONTROL, IPSEC_OPT_IFNAME, name, &optlen);
1245 }
1246 SKTC_ASSERT_ERR(!error);
1247 }
1248
1249 void
sktu_dump_buffer(FILE * f,const char * desc,const void * buf,size_t len)1250 sktu_dump_buffer(FILE *f, const char *desc, const void *buf, size_t len)
1251 {
1252 int i;
1253 unsigned char buff[17];
1254 unsigned char *pc = (unsigned char*)buf;
1255
1256 if (desc != NULL) {
1257 fprintf(f, "%s:\n", desc);
1258 }
1259
1260 if (len == 0) {
1261 fprintf(f, " ZERO LENGTH\n");
1262 return;
1263 }
1264
1265 for (i = 0; i < len; i++) {
1266 if ((i % 16) == 0) {
1267 if (i != 0) {
1268 fprintf(f, " %s\n", buff);
1269 }
1270
1271 fprintf(f, " %04x ", i); // offset
1272 }
1273
1274 fprintf(f, " %02x", pc[i]);
1275
1276 // prepare ascii
1277 if ((pc[i] < 0x20) || (pc[i] > 0x7e)) {
1278 buff[i % 16] = '.';
1279 } else {
1280 buff[i % 16] = pc[i];
1281 }
1282 buff[(i % 16) + 1] = '\0';
1283 }
1284
1285 // pad last line to for ascii
1286 while ((i % 16) != 0) {
1287 fprintf(f, " ");
1288 i++;
1289 }
1290
1291 fprintf(f, " %s\n", buff);
1292 }
1293
1294 int
sysctl_buf(char * oid_name,void ** buffer,size_t * len,void * newp,size_t newlen)1295 sysctl_buf(char *oid_name, void **buffer, size_t *len, void *newp,
1296 size_t newlen)
1297 {
1298 int ret, err;
1299 int try = 0;
1300
1301 *buffer = NULL;
1302 #define RETRY_COUNT 10
1303 try_again:
1304 ret = sysctlbyname(oid_name, NULL, len, newp, newlen);
1305 if (ret != 0) {
1306 if (ret == ENOMEM) {
1307 try++;
1308 if (try <= RETRY_COUNT) {
1309 goto try_again;
1310 }
1311 }
1312 err = errno;
1313 SKT_LOG("sysctl for len failed, %s\n", strerror(errno));
1314 return err;
1315 }
1316 if (*len == 0) {
1317 T_LOG("sysctl for len returned zero! No stats?\n");
1318 *buffer = NULL;
1319 return 0;
1320 }
1321 *buffer = malloc(*len);
1322 if (*buffer == NULL) {
1323 T_LOG("sysctl malloc for %ld bytes failed\n", *len);
1324 return ENOMEM;
1325 }
1326
1327 ret = sysctlbyname(oid_name, *buffer, len, newp, newlen);
1328 if (ret != 0) {
1329 err = errno;
1330 if (ret == ENOMEM) {
1331 free(*buffer);
1332 *buffer = NULL;
1333 try++;
1334 if (try <= RETRY_COUNT) {
1335 goto try_again;
1336 }
1337 }
1338 SKT_LOG("sysctl for buf failed, %s\n", strerror(errno));
1339 free(*buffer);
1340 return err;
1341 }
1342
1343 return 0;
1344 }
1345
1346 uint32_t
sktu_set_inject_error_rmask(uint32_t * mask)1347 sktu_set_inject_error_rmask(uint32_t *mask)
1348 {
1349 uint32_t old_mask;
1350 size_t size = sizeof(old_mask);
1351 int error;
1352
1353 error = sysctlbyname("kern.skywalk.inject_error_rmask",
1354 &old_mask, &size, mask, mask ? sizeof(*mask) : 0);
1355
1356 SKTC_ASSERT_ERR(!error);
1357 return old_mask;
1358 }
1359
1360 /* returns TRUE if a matching IPv4 address is found */
1361 boolean_t
sktu_check_interface_ipv4_address(char * ifname,uint32_t ipaddr)1362 sktu_check_interface_ipv4_address(char *ifname, uint32_t ipaddr)
1363 {
1364 struct ifaddrs *ifaddr, *ifa;
1365 boolean_t match = FALSE;
1366 int error;
1367
1368 error = getifaddrs(&ifaddr);
1369 SKTC_ASSERT_ERR(!error);
1370
1371 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
1372 struct sockaddr_in *sin =
1373 (struct sockaddr_in *)(void *)ifa->ifa_addr;
1374 if (ifa->ifa_addr == NULL) {
1375 continue;
1376 }
1377 if ((strncmp(ifa->ifa_name, ifname, IFNAMSIZ) == 0) &&
1378 (ifa->ifa_addr->sa_family == AF_INET) &&
1379 (sin->sin_addr.s_addr == ipaddr)) {
1380 match = TRUE;
1381 }
1382 }
1383 freeifaddrs(ifaddr);
1384 return match;
1385 }
1386
1387 /****************************************************************/
1388
1389 int
sktu_create_pfkeysock(void)1390 sktu_create_pfkeysock(void)
1391 {
1392 int keysock = socket(PF_KEY, SOCK_RAW, PF_KEY_V2);
1393 assert(keysock != -1);
1394 return keysock;
1395 }
1396
1397 void
sktu_create_sa(int keysock,const char ifname[IFXNAMSIZ],uint32_t spi,struct in_addr * src,struct in_addr * dst)1398 sktu_create_sa(int keysock, const char ifname[IFXNAMSIZ], uint32_t spi, struct in_addr *src, struct in_addr *dst)
1399 {
1400 /*
1401 * <base, SA, (lifetime(HS),) address(SD), (address(P),)
1402 * key(AE), (identity(SD),) (sensitivity)>
1403 */
1404
1405 struct {
1406 struct sadb_msg msg __attribute((aligned(sizeof(uint64_t))));
1407 struct sadb_key key __attribute((aligned(sizeof(uint64_t))));
1408 struct sadb_sa sa __attribute((aligned(sizeof(uint64_t))));
1409 struct sadb_x_sa2 sa2 __attribute((aligned(sizeof(uint64_t))));
1410 struct sadb_x_ipsecif ipsecif __attribute((aligned(sizeof(uint64_t))));
1411 struct {
1412 struct sadb_address addr __attribute((aligned(sizeof(uint64_t))));
1413 struct sockaddr_in saddr __attribute((aligned(sizeof(uint64_t))));
1414 } src;
1415 struct {
1416 struct sadb_address addr __attribute((aligned(sizeof(uint64_t))));
1417 struct sockaddr_in saddr __attribute((aligned(sizeof(uint64_t))));
1418 } dst;
1419 } addcmd;
1420
1421 memset(&addcmd, 0, sizeof(addcmd));
1422
1423 addcmd.msg.sadb_msg_version = PF_KEY_V2;
1424 addcmd.msg.sadb_msg_type = SADB_ADD;
1425 addcmd.msg.sadb_msg_errno = 0;
1426 addcmd.msg.sadb_msg_satype = SADB_SATYPE_ESP;
1427 addcmd.msg.sadb_msg_len = PFKEY_UNIT64(sizeof(addcmd));
1428 addcmd.msg.sadb_msg_reserved = 0;
1429 addcmd.msg.sadb_msg_seq = 0;
1430 addcmd.msg.sadb_msg_pid = (unsigned)getpid();
1431
1432 addcmd.key.sadb_key_len = PFKEY_UNIT64(sizeof(addcmd.key));
1433 addcmd.key.sadb_key_exttype = SADB_EXT_KEY_ENCRYPT;
1434 addcmd.key.sadb_key_bits = 0;
1435 addcmd.key.sadb_key_reserved = 0;
1436
1437 addcmd.sa.sadb_sa_len = PFKEY_UNIT64(sizeof(addcmd.sa));
1438 addcmd.sa.sadb_sa_exttype = SADB_EXT_SA;
1439 addcmd.sa.sadb_sa_spi = htonl(spi);
1440 addcmd.sa.sadb_sa_replay = 0;
1441 addcmd.sa.sadb_sa_state = 0;
1442 addcmd.sa.sadb_sa_auth = SADB_AALG_NONE;
1443 addcmd.sa.sadb_sa_encrypt = SADB_EALG_NULL;
1444 addcmd.sa.sadb_sa_flags = 0;
1445
1446 addcmd.sa2.sadb_x_sa2_len = PFKEY_UNIT64(sizeof(addcmd.sa2));
1447 addcmd.sa2.sadb_x_sa2_exttype = SADB_X_EXT_SA2;
1448 addcmd.sa2.sadb_x_sa2_mode = IPSEC_MODE_TRANSPORT;
1449 addcmd.sa2.sadb_x_sa2_alwaysexpire = 1;
1450 addcmd.sa2.sadb_x_sa2_flags = SADB_X_EXT_SA2_DELETE_ON_DETACH;
1451 addcmd.sa2.sadb_x_sa2_sequence = 0;
1452 addcmd.sa2.sadb_x_sa2_reqid = 0;
1453
1454 addcmd.ipsecif.sadb_x_ipsecif_len = PFKEY_UNIT64(sizeof(addcmd.ipsecif));
1455 addcmd.ipsecif.sadb_x_ipsecif_exttype = SADB_X_EXT_IPSECIF;
1456 memset(addcmd.ipsecif.sadb_x_ipsecif_internal_if, 0, sizeof(addcmd.ipsecif.sadb_x_ipsecif_internal_if));
1457 memset(addcmd.ipsecif.sadb_x_ipsecif_outgoing_if, 0, sizeof(addcmd.ipsecif.sadb_x_ipsecif_outgoing_if));
1458 strlcpy(addcmd.ipsecif.sadb_x_ipsecif_ipsec_if, ifname, sizeof(addcmd.ipsecif.sadb_x_ipsecif_ipsec_if));
1459 addcmd.ipsecif.sadb_x_ipsecif_init_disabled = 0;
1460 addcmd.ipsecif.reserved = 0;
1461
1462 addcmd.src.addr.sadb_address_len = PFKEY_UNIT64(sizeof(addcmd.src));
1463 addcmd.src.addr.sadb_address_exttype = SADB_EXT_ADDRESS_SRC;
1464 addcmd.src.addr.sadb_address_proto = IPSEC_ULPROTO_ANY;
1465 addcmd.src.addr.sadb_address_prefixlen = sizeof(struct in_addr) << 3; //XXX Why?
1466 addcmd.src.addr.sadb_address_reserved = 0;
1467 addcmd.src.saddr.sin_len = sizeof(addcmd.src.saddr);
1468 addcmd.src.saddr.sin_family = AF_INET;
1469 addcmd.src.saddr.sin_port = htons(0);
1470 addcmd.src.saddr.sin_addr = *src;
1471
1472 addcmd.dst.addr.sadb_address_len = PFKEY_UNIT64(sizeof(addcmd.dst));
1473 addcmd.dst.addr.sadb_address_exttype = SADB_EXT_ADDRESS_DST;
1474 addcmd.dst.addr.sadb_address_proto = IPSEC_ULPROTO_ANY;
1475 addcmd.dst.addr.sadb_address_prefixlen = sizeof(struct in_addr) << 3; //XXX Why?
1476 addcmd.dst.addr.sadb_address_reserved = 0;
1477 addcmd.dst.saddr.sin_len = sizeof(addcmd.dst.saddr);
1478 addcmd.dst.saddr.sin_family = AF_INET;
1479 addcmd.dst.saddr.sin_port = htons(0);
1480 addcmd.dst.saddr.sin_addr = *dst;
1481
1482 //log_hexdump(&addcmd, sizeof(addcmd));
1483
1484 ssize_t slen;
1485 slen = send(keysock, &addcmd, sizeof(addcmd), 0);
1486 assert(slen == sizeof(addcmd));
1487 }
1488
1489 typedef union {
1490 char c[2];
1491 u_short s;
1492 } short_union_t;
1493
1494 typedef union {
1495 u_short s[2];
1496 long l;
1497 } long_union_t;
1498
1499 static __inline__ void
reduce(int * sum)1500 reduce(int * sum)
1501 {
1502 long_union_t l_util;
1503
1504 l_util.l = *sum;
1505 *sum = l_util.s[0] + l_util.s[1];
1506 if (*sum > 65535) {
1507 *sum -= 65535;
1508 }
1509 return;
1510 }
1511
1512 unsigned short
in_cksum(void * pkt,int len,int sum0)1513 in_cksum(void * pkt, int len, int sum0)
1514 {
1515 u_short * w;
1516 int sum = sum0;
1517
1518 w = (u_short *)pkt;
1519 while ((len -= 32) >= 0) {
1520 sum += w[0]; sum += w[1];
1521 sum += w[2]; sum += w[3];
1522 sum += w[4]; sum += w[5];
1523 sum += w[6]; sum += w[7];
1524 sum += w[8]; sum += w[9];
1525 sum += w[10]; sum += w[11];
1526 sum += w[12]; sum += w[13];
1527 sum += w[14]; sum += w[15];
1528 w += 16;
1529 }
1530 len += 32;
1531 while ((len -= 8) >= 0) {
1532 sum += w[0]; sum += w[1];
1533 sum += w[2]; sum += w[3];
1534 w += 4;
1535 }
1536 len += 8;
1537 if (len) {
1538 reduce(&sum);
1539 while ((len -= 2) >= 0) {
1540 sum += *w++;
1541 }
1542 }
1543 if (len == -1) { /* odd-length packet */
1544 short_union_t s_util;
1545
1546 s_util.s = 0;
1547 s_util.c[0] = *((char *)w);
1548 s_util.c[1] = 0;
1549 sum += s_util.s;
1550 }
1551 reduce(&sum);
1552 return ~sum & 0xffff;
1553 }
1554
1555 #define ADDCARRY(_x) do { \
1556 while (((_x) >> 16) != 0) \
1557 (_x) = ((_x) >> 16) + ((_x) & 0xffff); \
1558 } while (0)
1559
1560 /*
1561 * Checksum routine for Internet Protocol family headers (Portable Version).
1562 *
1563 * This routine is very heavily used in the network
1564 * code and should be modified for each CPU to be as fast as possible.
1565 */
1566 #define REDUCE16 { \
1567 q_util.q = sum; \
1568 l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
1569 sum = l_util.s[0] + l_util.s[1]; \
1570 ADDCARRY(sum); \
1571 }
1572
1573 union l_util {
1574 uint16_t s[2];
1575 uint32_t l;
1576 };
1577
1578 union q_util {
1579 uint16_t s[4];
1580 uint32_t l[2];
1581 uint64_t q;
1582 };
1583
1584 uint16_t
in_pseudo(uint32_t a,uint32_t b,uint32_t c)1585 in_pseudo(uint32_t a, uint32_t b, uint32_t c)
1586 {
1587 uint64_t sum;
1588 union q_util q_util;
1589 union l_util l_util;
1590
1591 sum = (uint64_t)a + b + c;
1592 REDUCE16;
1593 return sum;
1594 }
1595
1596 uint16_t
in6_pseudo(const struct in6_addr * src,const struct in6_addr * dst,uint32_t x)1597 in6_pseudo(const struct in6_addr *src, const struct in6_addr *dst, uint32_t x)
1598 {
1599 uint32_t sum = 0;
1600 const uint16_t *w;
1601
1602 /*
1603 * IPv6 source address
1604 */
1605 w = (const uint16_t *)src;
1606 sum += w[0]; sum += w[1];
1607 sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5];
1608 sum += w[6]; sum += w[7];
1609
1610 /*
1611 * IPv6 destination address
1612 */
1613 w = (const uint16_t *)dst;
1614 sum += w[0]; sum += w[1];
1615 sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5];
1616 sum += w[6]; sum += w[7];
1617
1618 /*
1619 * Caller-supplied value; 'x' could be one of:
1620 *
1621 * htonl(proto + length), or
1622 * htonl(proto + length + sum)
1623 **/
1624 sum += x;
1625
1626 /* fold in carry bits */
1627 ADDCARRY(sum);
1628
1629 return sum;
1630 }
1631
1632 uint16_t
sktu_ip_id()1633 sktu_ip_id()
1634 {
1635 static int sktu_ip_id;
1636 return sktu_ip_id++;
1637 }
1638
1639 void
sktu_channel_port_init(channel_port_t ch_port,uuid_t instance,nexus_port_t nx_port,bool enable_upp,bool enable_event_ring,bool low_latency)1640 sktu_channel_port_init(channel_port_t ch_port, uuid_t instance,
1641 nexus_port_t nx_port, bool enable_upp, bool enable_event_ring,
1642 bool low_latency)
1643 {
1644 channel_t chan;
1645 nexus_port_t port = nx_port;
1646 ring_id_t ringid;
1647
1648 bzero(ch_port, sizeof(*ch_port));
1649 chan = sktu_channel_create_extended(instance, port,
1650 CHANNEL_DIR_TX_RX, CHANNEL_RING_ID_ANY, NULL,
1651 -1, -1, -1, -1, -1, -1, enable_upp ? 1 : -1, 1,
1652 enable_event_ring ? 1 : -1, low_latency ? 1 : -1);
1653 if (chan == NULL) {
1654 SKT_LOG("Can't open channel on port %d, %s\n", port,
1655 strerror(errno));
1656 return;
1657 }
1658
1659 T_LOG("Opened port %d\n", port);
1660
1661 ch_port->chan = chan;
1662 ch_port->fd = os_channel_get_fd(chan);
1663 ch_port->port = port;
1664 ch_port->user_packet_pool = enable_upp;
1665
1666 /* tx ring */
1667 ringid = os_channel_ring_id(chan, CHANNEL_FIRST_TX_RING);
1668 ch_port->tx_ring = os_channel_tx_ring(ch_port->chan, ringid);
1669 assert(ch_port->tx_ring != NULL);
1670 /* rx ring */
1671 ringid = os_channel_ring_id(chan, CHANNEL_FIRST_RX_RING);
1672 ch_port->rx_ring = os_channel_rx_ring(ch_port->chan, ringid);
1673 assert(ch_port->rx_ring != NULL);
1674 }
1675
1676 static inline uint16_t
sktu_fold_sum_final(uint32_t sum)1677 sktu_fold_sum_final(uint32_t sum)
1678 {
1679 sum = (sum >> 16) + (sum & 0xffff); /* 17-bit */
1680 sum = (sum >> 16) + (sum & 0xffff); /* 16-bit + carry */
1681 sum = (sum >> 16) + (sum & 0xffff); /* final carry */
1682 return ~sum & 0xffff;
1683 }
1684
1685 packet_t
sktu_channel_port_frame_to_pkt(channel_port_t port,struct sktu_frame * frame)1686 sktu_channel_port_frame_to_pkt(channel_port_t port, struct sktu_frame *frame)
1687 {
1688 int error;
1689 packet_t pkt;
1690 void *baddr, *bytes = &frame->bytes[0];
1691 size_t len = frame->len;
1692 buflet_t buf, pbuf = NULL;
1693 uint16_t clen, bdlim, blen, bcnt;
1694
1695 assert(port->user_packet_pool);
1696
1697 error = os_channel_packet_alloc(port->chan, &pkt);
1698 SKTC_ASSERT_ERR(error == 0);
1699 assert(pkt != 0);
1700
1701 buf = os_packet_get_next_buflet(pkt, NULL);
1702 assert(buf != NULL);
1703 error = os_buflet_set_data_offset(buf, 0);
1704 SKTC_ASSERT_ERR(error == 0);
1705 bdlim = blen = os_buflet_get_data_limit(buf);
1706 assert(bdlim != 0);
1707 bcnt = os_packet_get_buflet_count(pkt);
1708 assert(blen * bcnt >= len);
1709 baddr = os_buflet_get_object_address(buf);
1710 assert(baddr != NULL);
1711
1712 error = os_packet_set_link_header_length(pkt, 0);
1713 SKTC_ASSERT_ERR(error == 0);
1714
1715 /* copy the frame bytes */
1716 while (len != 0) {
1717 if (blen == 0) {
1718 error = os_buflet_set_data_length(buf, bdlim);
1719 SKTC_ASSERT_ERR(error == 0);
1720 pbuf = buf;
1721 buf = os_packet_get_next_buflet(pkt, pbuf);
1722 assert(buf != NULL);
1723 error = os_buflet_set_data_offset(buf, 0);
1724 SKTC_ASSERT_ERR(error == 0);
1725 baddr = os_buflet_get_object_address(buf);
1726 assert(baddr != NULL);
1727 bdlim = blen = os_buflet_get_data_limit(buf);
1728 }
1729 clen = MIN(blen, len);
1730 memcpy(baddr, bytes, clen);
1731 len -= clen;
1732 blen -= clen;
1733 bytes += clen;
1734 baddr += clen;
1735 assert(len == 0 || blen == 0);
1736 }
1737 if (frame->csum_flags != 0) {
1738 os_packet_set_inet_checksum(pkt, frame->csum_flags,
1739 frame->csum_start, frame->csum_stuff);
1740 }
1741 if (pbuf == NULL) {
1742 error = os_buflet_set_data_length(buf, frame->len);
1743 } else {
1744 error = os_buflet_set_data_length(buf, clen);
1745 }
1746 SKTC_ASSERT_ERR(error == 0);
1747
1748 os_packet_set_flow_uuid(pkt, frame->flow_uuid);
1749 error = os_packet_finalize(pkt);
1750 SKTC_ASSERT_ERR(error == 0);
1751 return pkt;
1752 }
1753
1754 int
sktu_channel_port_tx(channel_port_t port,packet_t pkt)1755 sktu_channel_port_tx(channel_port_t port, packet_t pkt)
1756 {
1757 int error;
1758 slot_prop_t prop;
1759 channel_slot_t slot;
1760
1761 slot = os_channel_get_next_slot(port->tx_ring, NULL, &prop);
1762 if (slot == NULL) {
1763 return ENOENT;
1764 }
1765 error = os_channel_slot_attach_packet(port->tx_ring, slot, pkt);
1766 SKTC_ASSERT_ERR(error == 0);
1767 error = os_channel_advance_slot(port->tx_ring, slot);
1768 SKTC_ASSERT_ERR(error == 0);
1769 return 0;
1770 }
1771
1772 /*
1773 * Burst Tx tries to tx as many it can in one shot.
1774 *
1775 * Returns number of actually completed Tx.
1776 */
1777 uint32_t
sktu_channel_port_tx_burst_pkt(channel_port_t port,packet_t * pkts,uint32_t n)1778 sktu_channel_port_tx_burst_pkt(channel_port_t port, packet_t *pkts,
1779 uint32_t n)
1780 {
1781 struct timespec timeout = {
1782 .tv_sec = 10,
1783 .tv_nsec = 0,
1784 };
1785 struct kevent evlist, kev;
1786 int kq;
1787 int error;
1788 uint32_t i;
1789
1790 kq = kqueue();
1791 assert(kq != -1);
1792
1793 EV_SET(&kev, port->fd, EVFILT_WRITE, EV_ADD | EV_ENABLE, 0, 0, NULL);
1794 error = kevent(kq, &kev, 1, NULL, 0, NULL);
1795 SKTC_ASSERT_ERR(error == 0);
1796
1797 /* wait for Tx to become available */
1798 error = kevent(kq, NULL, 0, &evlist, 1, &timeout);
1799 if (error <= 0) {
1800 if (errno == EAGAIN) {
1801 return 0;
1802 }
1803 SKTC_ASSERT_ERR(error == 0);
1804 }
1805 if (error == 0) {
1806 T_LOG("kevent timeout\n");
1807 return 0;
1808 }
1809 if (evlist.flags & EV_ERROR) {
1810 int err = evlist.data;
1811 if (err == EAGAIN) {
1812 return 0;
1813 }
1814 SKTC_ASSERT_ERR(err == 0);
1815 }
1816
1817 if (evlist.filter != EVFILT_WRITE) {
1818 err(EX_OSERR, "%lu event %d?\n", evlist.ident, evlist.filter);
1819 }
1820
1821 for (i = 0; i < n; i++) {
1822 error = sktu_channel_port_tx(port, pkts[i]);
1823 if (error != 0) {
1824 break;
1825 }
1826 }
1827
1828 if (i != 0) {
1829 error = os_channel_sync(port->chan, CHANNEL_SYNC_TX);
1830 SKTC_ASSERT_ERR(error == 0);
1831 }
1832
1833 return i;
1834 }
1835
1836 /*
1837 * Burst Tx tries to tx as many it can in one shot.
1838 *
1839 * Returns number of actually completed Tx.
1840 */
1841 uint32_t
sktu_channel_port_tx_burst(channel_port_t port,struct sktu_frame ** frames,uint32_t n)1842 sktu_channel_port_tx_burst(channel_port_t port, struct sktu_frame **frames,
1843 uint32_t n)
1844 {
1845 struct timespec timeout = {
1846 .tv_sec = 10,
1847 .tv_nsec = 0,
1848 };
1849 struct kevent evlist, kev;
1850 int kq;
1851 int error;
1852 uint32_t i;
1853 packet_t pkt;
1854
1855 kq = kqueue();
1856 assert(kq != -1);
1857
1858 EV_SET(&kev, port->fd, EVFILT_WRITE, EV_ADD | EV_ENABLE, 0, 0, NULL);
1859 error = kevent(kq, &kev, 1, NULL, 0, NULL);
1860 SKTC_ASSERT_ERR(error == 0);
1861
1862 /* wait for Tx to become available */
1863 error = kevent(kq, NULL, 0, &evlist, 1, &timeout);
1864 if (error <= 0) {
1865 if (errno == EAGAIN) {
1866 return 0;
1867 }
1868 SKTC_ASSERT_ERR(error == 0);
1869 }
1870 if (error == 0) {
1871 T_LOG("kevent timeout\n");
1872 return 0;
1873 }
1874 if (evlist.flags & EV_ERROR) {
1875 int err = evlist.data;
1876 if (err == EAGAIN) {
1877 return 0;
1878 }
1879 SKTC_ASSERT_ERR(err == 0);
1880 }
1881
1882 if (evlist.filter != EVFILT_WRITE) {
1883 err(EX_OSERR, "%lu event %d?\n", evlist.ident, evlist.filter);
1884 }
1885
1886 for (i = 0; i < n; i++) {
1887 pkt = sktu_channel_port_frame_to_pkt(port, frames[i]);
1888 error = sktu_channel_port_tx(port, pkt);
1889 if (error != 0) {
1890 break;
1891 }
1892 }
1893
1894 if (i != 0) {
1895 error = os_channel_sync(port->chan, CHANNEL_SYNC_TX);
1896 SKTC_ASSERT_ERR(error == 0);
1897 }
1898
1899 return i;
1900 }
1901
1902 /*
1903 * Bulk Tx makes sure all Tx operations are completed; otherwise fails the test.
1904 */
1905 void
sktu_channel_port_tx_bulk(channel_port_t port,struct sktu_frame ** frames,uint32_t n)1906 sktu_channel_port_tx_bulk(channel_port_t port, struct sktu_frame **frames,
1907 uint32_t n)
1908 {
1909 uint32_t ret = 0;
1910 ret = sktu_channel_port_tx_burst(port, frames, n);
1911 assert(ret < n);
1912 if (ret != n) {
1913 errx(EX_OSERR, "tx bulk failed %u/%u", n, ret);
1914 }
1915 }
1916
1917 int
sktu_parse_ipv4_frame(struct sktu_frame * frame,void * ip_payload,uint32_t * ip_payload_len)1918 sktu_parse_ipv4_frame(struct sktu_frame *frame, void *ip_payload,
1919 uint32_t *ip_payload_len)
1920 {
1921 size_t pkt_len, payload_len;
1922 void *buf;
1923 struct ip *ip;
1924 uint16_t csum;
1925
1926 buf = &frame->bytes[0];
1927 ip = (struct ip*)buf;
1928 pkt_len = frame->len;
1929 assert(pkt_len == ntohs(ip->ip_len));
1930 payload_len = pkt_len - sizeof(*ip);
1931 assert(payload_len <= SKTU_FRAME_BUF_SIZE);
1932
1933 /* verify ip header checksum */
1934 csum = in_cksum(ip, sizeof(*ip), 0);
1935 if (csum != 0) {
1936 sktu_dump_buffer(stderr, __func__, buf, pkt_len);
1937 errx(EX_PROTOCOL, "IP header checksum invalid");
1938 }
1939
1940 if (ip_payload != NULL) { /* copy the data */
1941 memcpy(ip_payload, buf + sizeof(*ip), pkt_len - sizeof(*ip));
1942 }
1943
1944 *ip_payload_len = payload_len;
1945 return 0;
1946 }
1947
1948 int
sktu_parse_tcp4_frame(struct sktu_frame * frame,void * tcp_payload,uint32_t * tcp_payload_len)1949 sktu_parse_tcp4_frame(struct sktu_frame *frame, void *tcp_payload,
1950 uint32_t *tcp_payload_len)
1951 {
1952 uint32_t pkt_len, payload_len;
1953 void *buf;
1954 struct ip *ip;
1955 ip_tcp_header_t *ip_tcp;
1956 uint16_t csum;
1957
1958 buf = &frame->bytes[0];
1959 ip = buf;
1960 ip_tcp = buf;
1961 pkt_len = frame->len;
1962 if (ip->ip_p != IPPROTO_TCP) {
1963 sktu_dump_buffer(stderr, "non-TCP packet", buf, pkt_len);
1964 return EINVAL;
1965 }
1966 assert(pkt_len == ntohs(ip_tcp->ip.ip_len));
1967 payload_len = pkt_len - sizeof(ip_tcp_header_t);
1968 assert(payload_len <= SKTU_FRAME_BUF_SIZE);
1969
1970 csum = in_cksum(ip, sizeof(*ip), 0);
1971 if (csum != 0) {
1972 sktu_dump_buffer(stderr, __func__, buf, pkt_len);
1973 errx(EX_PROTOCOL, "IP header checksum invalid");
1974 }
1975
1976 csum = os_inet_checksum(&ip_tcp->tcp, pkt_len - sizeof(struct ip), 0);
1977 csum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
1978 csum + htonl(payload_len + sizeof(struct tcphdr) + IPPROTO_TCP));
1979 csum ^= 0xffff;
1980 if (csum != 0) {
1981 sktu_dump_buffer(stderr, "invalid TCP csum", buf, pkt_len);
1982 return -1;
1983 }
1984
1985 if (tcp_payload != NULL) { /* copy the data */
1986 memcpy(tcp_payload, buf + sizeof(*ip_tcp), payload_len);
1987 }
1988
1989 *tcp_payload_len = payload_len;
1990
1991 return 0;
1992 }
1993
1994 int
sktu_parse_udp4_frame(struct sktu_frame * frame,void * udp_payload,uint32_t * udp_payload_len)1995 sktu_parse_udp4_frame(struct sktu_frame *frame, void *udp_payload,
1996 uint32_t *udp_payload_len)
1997 {
1998 size_t pkt_len, payload_len;
1999 void *buf;
2000 struct ip *ip;
2001 ip_udp_header_t *ip_udp;
2002 uint16_t csum;
2003
2004 buf = &frame->bytes[0];
2005 ip = buf;
2006 ip_udp = buf;
2007 pkt_len = frame->len;
2008 if (ip->ip_p != IPPROTO_UDP) {
2009 sktu_dump_buffer(stderr,
2010 "sktu_parse_udp4_frame: non-UDP packet", buf, pkt_len);
2011 return EINVAL;
2012 }
2013 assert(pkt_len == ntohs(ip_udp->ip.ip_len));
2014 payload_len = pkt_len - sizeof(ip_udp_header_t);
2015 assert(payload_len <= SKTU_FRAME_BUF_SIZE);
2016
2017 csum = in_cksum(ip, sizeof(*ip), 0);
2018 if (csum != 0) {
2019 sktu_dump_buffer(stderr, __func__, buf, pkt_len);
2020 errx(EX_PROTOCOL, "IP header checksum invalid");
2021 }
2022
2023 if (ip_udp->udp.uh_sum == 0) {
2024 goto skip_udp_checksum;
2025 }
2026
2027 csum = os_inet_checksum(&ip_udp->udp, pkt_len - sizeof(struct ip), 0);
2028 csum += htons(payload_len + sizeof(struct udphdr) + IPPROTO_UDP);
2029 csum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, csum);
2030 csum ^= 0xffff;
2031 if (csum != 0) {
2032 sktu_dump_buffer(stderr, __func__, buf, pkt_len);
2033 return -1;
2034 }
2035
2036 skip_udp_checksum:
2037 if (udp_payload != NULL) {
2038 memcpy(udp_payload, buf + sizeof(*ip_udp), payload_len);
2039 }
2040
2041 *udp_payload_len = payload_len;
2042
2043 return 0;
2044 }
2045
2046 /*
2047 * Rx once from an available ring;
2048 * Return 0, if successful; non-zero, otherwise.
2049 */
2050 struct sktu_frame *
sktu_channel_port_rx(channel_port_t port)2051 sktu_channel_port_rx(channel_port_t port)
2052 {
2053 int error;
2054 slot_prop_t prop;
2055 channel_slot_t slot;
2056 struct sktu_frame *frame;
2057 packet_t pkt;
2058 void *addr, *buf;
2059 size_t buf_len;
2060 size_t frame_length;
2061 buflet_t buflet;
2062
2063 slot = os_channel_get_next_slot(port->rx_ring, NULL, &prop);
2064 if (slot == NULL) {
2065 return NULL;
2066 }
2067 assert(prop.sp_buf_ptr != 0);
2068
2069 frame = sktu_frame_alloc();
2070
2071 pkt = os_channel_slot_get_packet(port->rx_ring, slot);
2072 assert(pkt != 0);
2073 if (port->user_packet_pool) {
2074 error = os_channel_slot_detach_packet(port->rx_ring,
2075 slot, pkt);
2076 SKTC_ASSERT_ERR(error == 0);
2077 }
2078
2079 buflet = os_packet_get_next_buflet(pkt, NULL);
2080 assert(buflet != NULL);
2081 buf = os_buflet_get_object_address(buflet) +
2082 os_buflet_get_data_offset(buflet);
2083 frame_length = os_packet_get_data_length(pkt);
2084
2085 buflet = os_packet_get_next_buflet(pkt, NULL);
2086 assert(buflet != NULL);
2087 buf = os_buflet_get_object_address(buflet) +
2088 os_buflet_get_data_offset(buflet);
2089 buf_len = os_buflet_get_data_length(buflet);
2090 assert(buf_len < SKTU_FRAME_BUF_SIZE);
2091
2092 frame->len = os_packet_get_data_length(pkt);
2093
2094 addr = &frame->bytes[0];
2095 memcpy(addr, buf, buf_len);
2096 frame_length -= buf_len;
2097
2098 while (frame_length != 0) {
2099 buflet = os_packet_get_next_buflet(pkt, buflet);
2100 assert(buflet != NULL);
2101 buf = os_buflet_get_object_address(buflet) +
2102 os_buflet_get_data_offset(buflet);
2103 assert(buf != 0);
2104 buf_len = os_buflet_get_data_length(buflet);
2105 assert(buf_len != 0);
2106 memcpy(addr, buf, buf_len);
2107 addr += buf_len;
2108 frame_length -= buf_len;
2109 }
2110
2111 os_packet_get_flow_uuid(pkt, &frame->flow_uuid);
2112 error = os_channel_packet_free(port->chan, pkt);
2113
2114 error = os_channel_advance_slot(port->rx_ring, slot);
2115 SKTC_ASSERT_ERR(error == 0);
2116
2117 return frame;
2118 }
2119
2120 uint32_t
sktu_channel_port_rx_burst(channel_port_t port,struct sktu_frame ** frames,uint32_t n)2121 sktu_channel_port_rx_burst(channel_port_t port, struct sktu_frame **frames,
2122 uint32_t n)
2123 {
2124 struct timespec timeout = {
2125 .tv_sec = 10,
2126 .tv_nsec = 0,
2127 };
2128
2129 int error;
2130 struct kevent evlist, kev;
2131 int kq;
2132 uint32_t i;
2133
2134 kq = kqueue();
2135 assert(kq != -1);
2136
2137 EV_SET(&kev, port->fd, EVFILT_READ, EV_ADD | EV_ENABLE, 0, 0, NULL);
2138 error = kevent(kq, &kev, 1, NULL, 0, NULL);
2139 SKTC_ASSERT_ERR(error == 0);
2140
2141 /* wait for RX to become available */
2142 error = kevent(kq, NULL, 0, &evlist, 1, &timeout);
2143 if (error <= 0) {
2144 if (errno == EAGAIN) {
2145 return 0;
2146 }
2147 SKTC_ASSERT_ERR(error == 0);
2148 }
2149 if (error == 0) {
2150 T_LOG("kevent timeout\n");
2151 return 0;
2152 }
2153 if (evlist.flags & EV_ERROR) {
2154 int err = evlist.data;
2155 if (err == EAGAIN) {
2156 return 0;
2157 }
2158 SKTC_ASSERT_ERR(err == 0);
2159 }
2160
2161 if (evlist.filter != EVFILT_READ) {
2162 err(EX_OSERR, "%lu event %d?\n", evlist.ident, evlist.filter);
2163 }
2164
2165 for (i = 0; i < n; i++) {
2166 frames[i] = sktu_channel_port_rx(port);
2167 if (frames[i] == NULL) {
2168 break;
2169 }
2170 }
2171
2172 if (i != 0) {
2173 error = os_channel_sync(port->chan, CHANNEL_SYNC_RX);
2174 SKTC_ASSERT_ERR(error == 0);
2175 }
2176
2177 close(kq);
2178
2179 return i;
2180 }
2181
2182 void
sktu_channel_port_rx_bulk(channel_port_t port,struct sktu_frame ** frames,uint32_t n)2183 sktu_channel_port_rx_bulk(channel_port_t port, struct sktu_frame **frames,
2184 uint32_t n)
2185 {
2186 uint32_t ret = 0;
2187 ret = sktu_channel_port_rx_burst(port, frames, n);
2188 assert(ret < n);
2189 if (ret != n) {
2190 errx(EX_OSERR, "rx bulk failed, %u/%u packets", n, ret);
2191 }
2192 }
2193
2194 /*
2195 * Received batch of frames from utun file descriptor.
2196 *
2197 * Returns number of frames actually received.
2198 */
2199 uint32_t
sktu_utun_fd_rx_burst(int utun_fd,struct sktu_frame ** frames,uint32_t n)2200 sktu_utun_fd_rx_burst(int utun_fd, struct sktu_frame **frames, uint32_t n)
2201 {
2202 struct timeval timeout = {
2203 .tv_sec = 10,
2204 .tv_usec = 0,
2205 };
2206
2207 fd_set readfds, errorfds;
2208 int retval;
2209
2210 FD_ZERO(&readfds);
2211 FD_ZERO(&errorfds);
2212 FD_SET(utun_fd, &readfds);
2213 FD_SET(utun_fd, &errorfds);
2214
2215 retval = select(utun_fd + 1, &readfds, NULL, &errorfds, &timeout);
2216 if (retval == -1) {
2217 err(EX_OSERR, "select()");
2218 }
2219
2220 if (!FD_ISSET(utun_fd, &readfds) && retval == 0) { // timeout
2221 T_LOG("recv timeout\n");
2222 return 0;
2223 }
2224 assert(!FD_ISSET(utun_fd, &errorfds));
2225 assert(retval == 1);
2226
2227 if (!FD_ISSET(utun_fd, &readfds)) {
2228 errx(EX_OSERR, "fd selected but no read fd available");
2229 }
2230
2231 uint32_t i = 0;
2232 for (i = 0; i < n; i++) {
2233 struct {
2234 uint32_t af;
2235 char bytes[SKTU_FRAME_BUF_SIZE];
2236 } utun_packet;
2237 ssize_t len;
2238 len = read(utun_fd, &utun_packet, sizeof(utun_packet));
2239 if (len < 1) {
2240 errx(EX_OSERR, "utun read 0 len");
2241 }
2242 struct sktu_frame *frame = frames[i] = sktu_frame_alloc();
2243 memcpy(frame->bytes, &utun_packet.bytes, len - sizeof(uint32_t));
2244 frame->len = len - sizeof(uint32_t);
2245 }
2246
2247 return i;
2248 }
2249
2250 void
sktu_utun_fd_tx_burst(int utun_fd,struct sktu_frame ** frames,uint32_t n)2251 sktu_utun_fd_tx_burst(int utun_fd, struct sktu_frame **frames, uint32_t n)
2252 {
2253 struct timeval timeout = {
2254 .tv_sec = 10,
2255 .tv_usec = 0,
2256 };
2257 fd_set writefds, errorfds;
2258 int retval;
2259
2260 FD_ZERO(&writefds);
2261 FD_ZERO(&errorfds);
2262 FD_SET(utun_fd, &writefds);
2263 FD_SET(utun_fd, &errorfds);
2264
2265 retval = select(utun_fd + 1, NULL, &writefds, &errorfds, &timeout);
2266 if (retval == -1) {
2267 err(EX_OSERR, "select()");
2268 }
2269
2270 if (!FD_ISSET(utun_fd, &writefds) && retval == 0) { // timeout
2271 err(EX_OSERR, "recv timeout\n");
2272 }
2273
2274 assert(!FD_ISSET(utun_fd, &errorfds));
2275 assert(retval == 1);
2276
2277 if (!FD_ISSET(utun_fd, &writefds)) {
2278 errx(EX_OSERR, "fd selected but no write fd available");
2279 }
2280
2281 uint32_t i = 0;
2282 for (i = 0; i < n; i++) {
2283 struct sktu_frame *frame = frames[i];
2284 struct ip *ip = (void *)&frame->bytes[0];
2285 uint32_t af;
2286 switch (ip->ip_v) {
2287 case IPVERSION:
2288 af = htonl(AF_INET);
2289 break;
2290 case IPV6_VERSION:
2291 af = htonl(AF_INET6);
2292 break;
2293 default:
2294 assert("unrecoginzed IP version");
2295 __builtin_unreachable();
2296 break;
2297 }
2298 struct {
2299 uint32_t af;
2300 char bytes[SKTU_FRAME_BUF_SIZE];
2301 } utun_packet;
2302 memcpy(&utun_packet.af, &af, sizeof(af));
2303 memcpy(&utun_packet.bytes, &frame->bytes[0], frame->len);
2304 ssize_t write_len = frame->len + sizeof(uint32_t);
2305 T_LOG("%s writing frame len %zu\n", __func__, write_len);
2306 ssize_t len = write(utun_fd, &utun_packet, write_len);
2307 if (len != write_len) {
2308 err(EX_OSERR, "utun write error\n");
2309 }
2310 }
2311 }
2312
2313 struct sktu_frame *
sktu_frame_alloc()2314 sktu_frame_alloc()
2315 {
2316 return malloc(sizeof(struct sktu_frame));
2317 }
2318
2319 #define sktu_frame_free(frame) \
2320 do { \
2321 free(frame); \
2322 frame = NULL; \
2323 } while (0)
2324
2325 void
sktu_frames_free(struct sktu_frame ** frames,size_t n)2326 sktu_frames_free(struct sktu_frame **frames, size_t n)
2327 {
2328 for (size_t i = 0; i < n; i++) {
2329 sktu_frame_free(frames[i]);
2330 frames[i] = NULL;
2331 }
2332 }
2333
2334 size_t
sktu_create_ip_frames(struct sktu_frame ** frames,size_t n,void * src_ip,void * dst_ip,uint8_t proto,const void * sdu,size_t sdu_len,size_t mtu,uint16_t csum_flags,uint16_t csum_start,uint16_t csum_stuff)2335 sktu_create_ip_frames(struct sktu_frame **frames, size_t n,
2336 void *src_ip, void *dst_ip, uint8_t proto, const void *sdu, size_t sdu_len,
2337 size_t mtu, uint16_t csum_flags, uint16_t csum_start, uint16_t csum_stuff)
2338 {
2339 size_t off = 0, remaining_sdu_len = sdu_len;
2340 size_t i = 0;
2341 uint16_t ip_id = sktu_ip_id();
2342 bool needs_frag = false;
2343
2344 while (remaining_sdu_len > 0) {
2345 assert(i < n);
2346
2347 struct sktu_frame *frame = frames[i] = sktu_frame_alloc();
2348 char *baddr = &frame->bytes[0];
2349 struct ip *ip = (struct ip *)baddr;
2350 size_t dlen;
2351 bool more_frag = false;
2352
2353 dlen = mtu - sizeof(*ip);
2354 if (dlen >= remaining_sdu_len) {
2355 dlen = remaining_sdu_len;
2356 needs_frag = false;
2357 more_frag = false;
2358 } else {
2359 dlen = dlen & ~0x7; // round down to 8-byte multiple
2360 needs_frag = true;
2361 more_frag = true;
2362 }
2363
2364 // can't handle fragmented csum offload
2365 assert(!(needs_frag && csum_flags != 0));
2366
2367 memset(ip, 0, sizeof(*ip));
2368 ip->ip_v = IPVERSION;
2369 ip->ip_hl = sizeof(struct ip) >> 2;
2370 ip->ip_ttl = MAXTTL;
2371 ip->ip_p = proto;
2372 memcpy(&ip->ip_src, src_ip, sizeof(struct in_addr));
2373 memcpy(&ip->ip_dst, dst_ip, sizeof(struct in_addr));
2374 ip->ip_len = htons(sizeof(*ip) + dlen);
2375 ip->ip_id = htons(ip_id);
2376 ip->ip_off = ((off >> 3) & IP_OFFMASK);
2377 if (more_frag) {
2378 ip->ip_off |= IP_MF;
2379 }
2380 ip->ip_off = htons(ip->ip_off);
2381
2382 /* compute the IP header checksum */
2383 ip->ip_sum = in_cksum(ip, sizeof(*ip), 0);
2384 baddr += sizeof(*ip);
2385
2386 memcpy(baddr, sdu + off, dlen);
2387
2388 frame->csum_flags = csum_flags;
2389 frame->csum_start = sizeof(*ip) + csum_start;
2390 frame->csum_stuff = sizeof(*ip) + csum_stuff;
2391
2392 frame->len = sizeof(*ip) + dlen;
2393
2394 off += dlen;
2395 remaining_sdu_len -= dlen;
2396 i++;
2397 }
2398
2399 return i;
2400 }
2401
2402 size_t
sktu_create_ip6_frames(struct sktu_frame ** frames,size_t n,void * src_ip,void * dst_ip,uint8_t proto,const void * sdu,size_t sdu_len,size_t mtu,uint16_t csum_flags,uint16_t csum_start,uint16_t csum_stuff)2403 sktu_create_ip6_frames(struct sktu_frame **frames, size_t n,
2404 void *src_ip, void *dst_ip, uint8_t proto, const void *sdu, size_t sdu_len,
2405 size_t mtu, uint16_t csum_flags, uint16_t csum_start, uint16_t csum_stuff)
2406 {
2407 size_t off = 0, remaining_sdu_len = sdu_len;
2408 size_t i = 0;
2409 uint16_t ip_id = sktu_ip_id();
2410 bool needs_frag = false;
2411
2412 while (remaining_sdu_len > 0) {
2413 assert(i < n);
2414
2415 struct sktu_frame *frame = frames[i] = sktu_frame_alloc();
2416 char *baddr = &frame->bytes[0];
2417 struct ip6_hdr *ip6 = (struct ip6_hdr *)baddr;
2418 size_t hlen = sizeof(*ip6);
2419 size_t plen, dlen;
2420 bool more_frag = false;
2421
2422 dlen = mtu - hlen;
2423 if (dlen >= remaining_sdu_len) {
2424 // fits in one packet
2425 dlen = plen = remaining_sdu_len;
2426 remaining_sdu_len = 0;
2427 more_frag = false;
2428 } else {
2429 // need to fragment
2430 dlen -= sizeof(struct ip6_frag);
2431 dlen = dlen & ~0x7; // round down to 8-byte multiple
2432 plen = sizeof(struct ip6_frag) + dlen;
2433 remaining_sdu_len -= dlen;
2434 needs_frag = true;
2435 more_frag = true;
2436 }
2437
2438 // can't handle fragmented csum offload
2439 assert(!(needs_frag && csum_flags != 0));
2440
2441 // insert ipv6 header
2442 memset(ip6, 0, sizeof(*ip6));
2443 ip6->ip6_vfc = (IPV6_VERSION & IPV6_VERSION_MASK);
2444 ip6->ip6_plen = htons(plen);
2445 ip6->ip6_nxt = needs_frag ? IPPROTO_FRAGMENT : proto;
2446 ip6->ip6_hlim = IPV6_DEFHLIM;
2447 memcpy(&ip6->ip6_src, src_ip, sizeof(struct in6_addr));
2448 memcpy(&ip6->ip6_dst, dst_ip, sizeof(struct in6_addr));
2449
2450 baddr += sizeof(*ip6);
2451
2452 // insert ipv6 frag header
2453 if (needs_frag) {
2454 struct ip6_frag *ip6f = (struct ip6_frag *)baddr;
2455 ip6f->ip6f_nxt = proto;
2456 ip6f->ip6f_reserved = 0;
2457 ip6f->ip6f_offlg = htons(off);
2458 if (more_frag) {
2459 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
2460 }
2461 ip6f->ip6f_ident = htonl(ip_id);
2462
2463 hlen += sizeof(*ip6f);
2464 baddr += sizeof(*ip6f);
2465 }
2466
2467 memcpy(baddr, sdu + off, dlen);
2468
2469 frame->csum_flags = csum_flags;
2470 frame->csum_start = sizeof(*ip6) + csum_start;
2471 frame->csum_stuff = sizeof(*ip6) + csum_stuff;
2472 frame->len = hlen + dlen;
2473
2474 off += dlen;
2475 i++;
2476 }
2477
2478 return i;
2479 }
2480
2481 size_t
sktu_create_tcp_frames(struct sktu_frame ** frames,size_t n,uint8_t ipver,void * src_ip,void * dst_ip,uint16_t sport,uint16_t dport,const void * data,size_t data_len,size_t mtu,bool csum_offload)2482 sktu_create_tcp_frames(struct sktu_frame **frames, size_t n,
2483 uint8_t ipver, void *src_ip, void *dst_ip, uint16_t sport, uint16_t dport,
2484 const void *data, size_t data_len, size_t mtu, bool csum_offload)
2485 {
2486 uint32_t n_frames;
2487 size_t sdu_len = data_len + sizeof(struct tcphdr);
2488 void *sdu = malloc(sdu_len);
2489
2490 // populate header
2491 struct tcphdr *tcp = (struct tcphdr *)sdu;
2492 tcp->th_sport = htons(sport);
2493 tcp->th_dport = htons(dport);
2494 tcp->th_flags |= 0; //FIXME (connect ? TH_SYN : TH_RST);
2495 tcp->th_off = (sizeof(struct tcphdr)) >> 2;
2496
2497 // copy payload
2498 memcpy(sdu + sizeof(*tcp), data, data_len);
2499
2500 // compute checksum
2501 uint16_t sum = 0;
2502
2503 if (ipver == IPVERSION) {
2504 sum = in_pseudo(*(uint32_t*)src_ip, *(uint32_t*)dst_ip,
2505 htons(data_len + sizeof(struct tcphdr) + IPPROTO_TCP));
2506 } else {
2507 sum = in6_pseudo(src_ip, dst_ip,
2508 htonl(data_len + sizeof(struct tcphdr) + IPPROTO_TCP));
2509 }
2510 tcp->th_sum = sum;
2511
2512 uint16_t csum_flags = 0, csum_start = 0, csum_stuff = 0;
2513 if (csum_offload) {
2514 csum_flags = PACKET_CSUM_PARTIAL;
2515 csum_start = 0;
2516 csum_stuff = offsetof(struct tcphdr, th_sum);
2517 } else {
2518 sum = os_inet_checksum(sdu, sdu_len, 0);
2519 tcp->th_sum = sktu_fold_sum_final(sum);
2520 }
2521
2522 // IP framing
2523 if (ipver == IPVERSION) {
2524 n_frames = sktu_create_ip_frames(frames, n, src_ip, dst_ip,
2525 IPPROTO_TCP, sdu, sdu_len, mtu, csum_flags, csum_start,
2526 csum_stuff);
2527 } else {
2528 n_frames = sktu_create_ip6_frames(frames, n, src_ip, dst_ip,
2529 IPPROTO_TCP, sdu, sdu_len, mtu, csum_flags, csum_start,
2530 csum_stuff);
2531 }
2532
2533 free(sdu);
2534
2535 return n_frames;
2536 }
2537
2538 size_t
sktu_create_udp_frames(struct sktu_frame ** frames,size_t n,uint8_t ipver,void * src_ip,void * dst_ip,uint16_t sport,uint16_t dport,const void * data,size_t data_len,size_t mtu,bool csum_offload)2539 sktu_create_udp_frames(struct sktu_frame **frames, size_t n,
2540 uint8_t ipver, void *src_ip, void *dst_ip, uint16_t sport, uint16_t dport,
2541 const void *data, size_t data_len, size_t mtu, bool csum_offload)
2542 {
2543 uint32_t n_frames;
2544 size_t sdu_len = data_len + sizeof(struct udphdr);
2545 void *sdu = malloc(sdu_len);
2546
2547 // populate header
2548 struct udphdr *udp = (struct udphdr *)sdu;
2549 udp->uh_sport = htons(sport);
2550 udp->uh_dport = htons(dport);
2551 udp->uh_ulen = htons(sizeof(*udp) + data_len);
2552
2553 // compute payload checksum
2554 uint32_t payload_sum = 0, pseudo_sum = 0;
2555 if (ipver == IPVERSION) {
2556 struct ipv4_udp_pseudo_hdr udp_pseudo = {};
2557 memcpy(&udp_pseudo.src_ip, src_ip, sizeof(struct in_addr));
2558 memcpy(&udp_pseudo.dst_ip, dst_ip, sizeof(struct in_addr));
2559 udp_pseudo.proto = IPPROTO_UDP;
2560 udp_pseudo.length = htons(sizeof(struct udphdr) + data_len);
2561 pseudo_sum = os_inet_checksum(&udp_pseudo, sizeof(udp_pseudo)
2562 + sizeof(struct udphdr), 0);
2563 } else {
2564 struct ipv6_udp_pseudo_hdr udp_pseudo = {};
2565 memcpy(&udp_pseudo.src_ip, src_ip, sizeof(struct in6_addr));
2566 memcpy(&udp_pseudo.dst_ip, dst_ip, sizeof(struct in6_addr));
2567 udp_pseudo.proto = IPPROTO_UDP;
2568 udp_pseudo.length = htons(sizeof(struct udphdr) + data_len);
2569 pseudo_sum = os_inet_checksum(&udp_pseudo, sizeof(udp_pseudo)
2570 + sizeof(struct udphdr), 0);
2571 }
2572
2573 uint16_t csum_flags = 0, csum_start = 0, csum_stuff = 0;
2574 if (csum_offload) {
2575 csum_flags = PACKET_CSUM_PARTIAL | PACKET_CSUM_ZERO_INVERT;
2576 csum_start = 0;
2577 csum_stuff = offsetof(struct udphdr, uh_sum);
2578 udp->uh_sum = sktu_fold_sum_final(pseudo_sum);
2579 } else {
2580 payload_sum = os_inet_checksum(data, data_len, 0);
2581 udp->uh_sum = ~sktu_fold_sum_final(pseudo_sum + payload_sum);
2582 }
2583
2584 // copy payload
2585 memcpy(sdu + sizeof(*udp), data, data_len);
2586
2587 // IP framing
2588 if (ipver == IPVERSION) {
2589 n_frames = sktu_create_ip_frames(frames, n, src_ip, dst_ip,
2590 IPPROTO_UDP, sdu, sdu_len, mtu, csum_flags, csum_start,
2591 csum_stuff);
2592 } else {
2593 n_frames = sktu_create_ip6_frames(frames, n, src_ip, dst_ip,
2594 IPPROTO_UDP, sdu, sdu_len, mtu, csum_flags, csum_start,
2595 csum_stuff);
2596 }
2597
2598 free(sdu);
2599
2600 return n_frames;
2601 }
2602
2603 void
sktu_attach_flow_metadata_to_frames(struct sktu_flow * flow,struct sktu_frame ** frames,size_t n_frames)2604 sktu_attach_flow_metadata_to_frames(struct sktu_flow *flow,
2605 struct sktu_frame **frames, size_t n_frames)
2606 {
2607 for (uint32_t i = 0; i < n_frames; i++) {
2608 struct sktu_frame *frame = frames[i];
2609 uuid_copy(frame->flow_uuid, flow->uuid);
2610 }
2611 }
2612
2613 static size_t
_sktu_create_udp_flow_input_frames(struct sktu_flow * flow,struct sktu_frame ** frames,size_t n,const void * data,size_t data_len)2614 _sktu_create_udp_flow_input_frames(struct sktu_flow *flow,
2615 struct sktu_frame **frames, size_t n, const void *data, size_t data_len)
2616 {
2617 n = sktu_create_udp_frames(frames, n, flow->ipver, flow->dst_ip,
2618 flow->src_ip, flow->dport, flow->sport, data, data_len, flow->mtu,
2619 NO_CSUM_OFFLOAD);
2620 sktu_attach_flow_metadata_to_frames(flow, frames, n);
2621 return n;
2622 }
2623
2624 static size_t
_sktu_create_udp_flow_output_frames(struct sktu_flow * flow,struct sktu_frame ** frames,size_t n,const void * data,size_t data_len,bool csum_offload)2625 _sktu_create_udp_flow_output_frames(struct sktu_flow *flow,
2626 struct sktu_frame **frames, size_t n, const void *data, size_t data_len,
2627 bool csum_offload)
2628 {
2629 n = sktu_create_udp_frames(frames, n, flow->ipver, flow->src_ip,
2630 flow->dst_ip, flow->sport, flow->dport, data, data_len, flow->mtu,
2631 csum_offload);
2632 sktu_attach_flow_metadata_to_frames(flow, frames, n);
2633 return n;
2634 }
2635
2636 static size_t
_sktu_create_tcp_flow_input_frames(struct sktu_flow * flow,struct sktu_frame ** frames,size_t n,const void * data,size_t data_len)2637 _sktu_create_tcp_flow_input_frames(struct sktu_flow *flow,
2638 struct sktu_frame **frames, size_t n, const void *data, size_t data_len)
2639 {
2640 n = sktu_create_tcp_frames(frames, n, flow->ipver, flow->dst_ip,
2641 flow->src_ip, flow->dport, flow->sport, data, data_len, flow->mtu,
2642 NO_CSUM_OFFLOAD);
2643 sktu_attach_flow_metadata_to_frames(flow, frames, n);
2644 return n;
2645 }
2646
2647 static size_t
_sktu_create_tcp_flow_output_frames(struct sktu_flow * flow,struct sktu_frame ** frames,size_t n,const void * data,size_t data_len,bool csum_offload)2648 _sktu_create_tcp_flow_output_frames(struct sktu_flow *flow,
2649 struct sktu_frame **frames, size_t n, const void *data, size_t data_len,
2650 bool csum_offload)
2651 {
2652 n = sktu_create_tcp_frames(frames, n, flow->ipver, flow->src_ip,
2653 flow->dst_ip, flow->sport, flow->dport, data, data_len, flow->mtu,
2654 csum_offload);
2655 sktu_attach_flow_metadata_to_frames(flow, frames, n);
2656 return n;
2657 }
2658
2659 static size_t
_sktu_create_ip_flow_input_frames(struct sktu_flow * flow,struct sktu_frame ** frames,size_t n,const void * data,size_t data_len)2660 _sktu_create_ip_flow_input_frames(struct sktu_flow *flow,
2661 struct sktu_frame **frames, size_t n, const void *data, size_t data_len)
2662 {
2663 n = sktu_create_ip_frames(frames, n, flow->dst_ip, flow->src_ip,
2664 flow->ip_protocol, data, data_len, flow->mtu, 0, 0, 0);
2665 sktu_attach_flow_metadata_to_frames(flow, frames, n);
2666 return n;
2667 }
2668
2669 static size_t
_sktu_create_ip_flow_output_frames(struct sktu_flow * flow,struct sktu_frame ** frames,size_t n,const void * data,size_t data_len,bool csum_offload)2670 _sktu_create_ip_flow_output_frames(struct sktu_flow *flow,
2671 struct sktu_frame **frames, size_t n, const void *data,
2672 size_t data_len, bool csum_offload)
2673 {
2674 n = sktu_create_ip_frames(frames, n, flow->src_ip, flow->dst_ip,
2675 flow->ip_protocol, data, data_len, flow->mtu, 0, 0, 0);
2676 sktu_attach_flow_metadata_to_frames(flow, frames, n);
2677 return n;
2678 }
2679
2680 #define SKTU_STRING_BUF_MAX 2048
2681 char *
sktu_nfr_to_string(struct nx_flow_req * nfr)2682 sktu_nfr_to_string(struct nx_flow_req *nfr)
2683 {
2684 static char buf[SKTU_STRING_BUF_MAX];
2685 uuid_string_t uuidstr;
2686 char sa_buf[31];
2687 char da_buf[31];
2688
2689 uuid_unparse(nfr->nfr_flow_uuid, uuidstr);
2690 if (nfr->nfr_saddr.sa.sa_family == AF_INET) {
2691 inet_ntop(AF_INET, &nfr->nfr_saddr.sin.sin_addr.s_addr, sa_buf,
2692 sizeof(sa_buf));
2693 inet_ntop(AF_INET, &nfr->nfr_daddr.sin.sin_addr.s_addr, da_buf,
2694 sizeof(da_buf));
2695 } else {
2696 inet_ntop(AF_INET6, &nfr->nfr_saddr.sin6.sin6_addr, sa_buf,
2697 sizeof(sa_buf));
2698 inet_ntop(AF_INET6, &nfr->nfr_daddr.sin6.sin6_addr, da_buf,
2699 sizeof(da_buf));
2700 }
2701 snprintf(buf, sizeof(buf),
2702 "nx_port[%d] %s src=%s,dst=%s,proto=%d,sport=%d,dport=%d, flags=0x%x",
2703 nfr->nfr_nx_port, uuidstr, sa_buf, da_buf, nfr->nfr_ip_protocol,
2704 ntohs(nfr->nfr_saddr.sin.sin_port),
2705 ntohs(nfr->nfr_daddr.sin.sin_port), nfr->nfr_flags);
2706
2707 return buf;
2708 }
2709
2710 char *
sktu_flow_to_string(struct sktu_flow * flow)2711 sktu_flow_to_string(struct sktu_flow *flow)
2712 {
2713 return sktu_nfr_to_string(&flow->nfr);
2714 }
2715
2716 struct sktu_flow *
_sktu_create_nexus_flow(sktu_nexus_t nexus,nexus_port_t nx_port,uint8_t af,void * src,void * dst,uint8_t proto,uint16_t sport,uint16_t dport,uint32_t flags)2717 _sktu_create_nexus_flow(sktu_nexus_t nexus, nexus_port_t nx_port,
2718 uint8_t af, void *src, void *dst, uint8_t proto, uint16_t sport,
2719 uint16_t dport, uint32_t flags)
2720 {
2721 struct sktu_flow *flow = malloc(sizeof(*flow));
2722
2723 memset(flow, 0, sizeof(*flow));
2724 flow->nexus = nexus;
2725 flow->mtu = 1500;
2726
2727 flow->nx_port = nx_port;
2728
2729 struct nx_flow_req *nfr = &flow->nfr;
2730 union sockaddr_in_4_6 *saddr = &nfr->nfr_saddr;
2731 union sockaddr_in_4_6 *daddr = &nfr->nfr_daddr;
2732 nfr->nfr_nx_port = nx_port;
2733 if (af == AF_INET) {
2734 // initialize flow
2735 flow->ipver = IPVERSION;
2736 // fill in nfr (stuff in network order :)
2737 SIN(saddr)->sin_len = sizeof(struct sockaddr_in);
2738 SIN(daddr)->sin_len = sizeof(struct sockaddr_in);
2739 SIN(saddr)->sin_family = AF_INET;
2740 SIN(daddr)->sin_family = AF_INET;
2741 SIN(saddr)->sin_addr = *(struct in_addr *)src;
2742 SIN(daddr)->sin_addr = *(struct in_addr *)dst;
2743 nfr->nfr_ip_protocol = proto;
2744 SIN(saddr)->sin_port = htons(sport);
2745 SIN(daddr)->sin_port = htons(dport);
2746 } else {
2747 flow->ipver = IPV6_VERSION;
2748 SIN6(saddr)->sin6_len = sizeof(struct sockaddr_in6);
2749 SIN6(daddr)->sin6_len = sizeof(struct sockaddr_in6);
2750 SIN6(saddr)->sin6_family = AF_INET6;
2751 SIN6(daddr)->sin6_family = AF_INET6;
2752 SIN6(saddr)->sin6_addr = *(struct in6_addr *)src;
2753 SIN6(daddr)->sin6_addr = *(struct in6_addr *)dst;
2754 nfr->nfr_ip_protocol = proto;
2755 SIN6(saddr)->sin6_port = htons(sport);
2756 SIN6(daddr)->sin6_port = htons(dport);
2757 }
2758
2759 uuid_generate_random(nfr->nfr_flow_uuid);
2760 nfr->nfr_flags = flags;
2761
2762 errno = 0;
2763 int error = __os_nexus_flow_add(nexus->controller, nexus->fsw_nx_uuid, nfr);
2764 if (error) {
2765 T_LOG("Failed flow %s\n", sktu_nfr_to_string(nfr));
2766 free(flow);
2767 return NULL;
2768 }
2769
2770 if (af == AF_INET) {
2771 flow->src_ip = &SIN(saddr)->sin_addr;
2772 flow->dst_ip = &SIN(daddr)->sin_addr;
2773 flow->sport = ntohs(SIN(saddr)->sin_port);
2774 flow->dport = ntohs(SIN(daddr)->sin_port);
2775 } else {
2776 flow->src_ip = &SIN6(saddr)->sin6_addr;
2777 flow->dst_ip = &SIN6(daddr)->sin6_addr;
2778 flow->sport = ntohs(SIN6(saddr)->sin6_port);
2779 flow->dport = ntohs(SIN6(daddr)->sin6_port);
2780 }
2781
2782 flow->ip_protocol = proto;
2783 uuid_copy(flow->uuid, nfr->nfr_flow_uuid);
2784
2785 switch (proto) {
2786 case IPPROTO_UDP:
2787 flow->create_input_frames = _sktu_create_udp_flow_input_frames;
2788 flow->create_output_frames = _sktu_create_udp_flow_output_frames;
2789 break;
2790 case IPPROTO_TCP:
2791 flow->create_input_frames = _sktu_create_tcp_flow_input_frames;
2792 flow->create_output_frames = _sktu_create_tcp_flow_output_frames;
2793 break;
2794 default:
2795 flow->create_input_frames = _sktu_create_ip_flow_input_frames;
2796 flow->create_output_frames = _sktu_create_ip_flow_output_frames;
2797 }
2798
2799 assert(nfr->nfr_nx_port != NEXUS_PORT_ANY);
2800
2801 T_LOG("Created flow %s\n", sktu_nfr_to_string(nfr));
2802
2803 return flow;
2804 }
2805
2806 struct sktu_flow *
sktu_create_nexus_flow(sktu_nexus_t nexus,uint8_t af,void * src,void * dst,uint8_t proto,uint16_t sport,uint16_t dport)2807 sktu_create_nexus_flow(sktu_nexus_t nexus, uint8_t af, void *src, void *dst,
2808 uint8_t proto, uint16_t sport, uint16_t dport)
2809 {
2810 return _sktu_create_nexus_flow(nexus, NEXUS_PORT_ANY, af, src, dst, proto, sport, dport, 0);
2811 }
2812
2813 struct sktu_flow *
sktu_create_nexus_flow_with_nx_port(sktu_nexus_t nexus,nexus_port_t nx_port,uint8_t af,void * src,void * dst,uint8_t proto,uint16_t sport,uint16_t dport)2814 sktu_create_nexus_flow_with_nx_port(sktu_nexus_t nexus, nexus_port_t nx_port,
2815 uint8_t af, void *src, void *dst, uint8_t proto, uint16_t sport,
2816 uint16_t dport)
2817 {
2818 return _sktu_create_nexus_flow(nexus, nx_port, af, src, dst, proto, sport, dport, 0);
2819 }
2820
2821 struct sktu_flow *
sktu_create_nexus_low_latency_flow(sktu_nexus_t nexus,uint8_t af,void * src,void * dst,uint8_t proto,uint16_t sport,uint16_t dport)2822 sktu_create_nexus_low_latency_flow(sktu_nexus_t nexus, uint8_t af, void *src, void *dst,
2823 uint8_t proto, uint16_t sport, uint16_t dport)
2824 {
2825 return _sktu_create_nexus_flow(nexus, NEXUS_PORT_ANY, af, src, dst, proto, sport, dport, NXFLOWREQF_LOW_LATENCY);
2826 }
2827
2828 void
_sktu_destroy_nexus_flow(struct sktu_flow * flow)2829 _sktu_destroy_nexus_flow(struct sktu_flow *flow)
2830 {
2831 sktu_nexus_t nexus = flow->nexus;
2832 struct nx_flow_req *nfr = &flow->nfr;
2833
2834 int error = __os_nexus_flow_del(nexus->controller, nexus->fsw_nx_uuid, nfr);
2835 SKTC_ASSERT_ERR(!error);
2836 if (error) {
2837 T_LOG("failed to deling flow %s", sktu_nfr_to_string(nfr));
2838 }
2839
2840 free(flow);
2841 }
2842
2843 int
sktu_get_nexus_flow_stats(uuid_t flow_uuid,struct sk_stats_flow * sf)2844 sktu_get_nexus_flow_stats(uuid_t flow_uuid, struct sk_stats_flow *sf)
2845 {
2846 size_t length = 0;
2847 void *buffer = NULL;
2848 int ret = sysctl_buf(SK_STATS_FLOW, &buffer, &length, NULL, 0);
2849 assert(ret == 0);
2850 assert(buffer != NULL && length != 0);
2851
2852 assert((length % sizeof(*sf)) == 0);
2853
2854 struct sk_stats_flow *iter;
2855 for (iter = buffer; (void *)iter < buffer + length; iter++) {
2856 if (uuid_compare(iter->sf_uuid, flow_uuid) == 0) {
2857 *sf = *iter;
2858 return 0;
2859 }
2860 }
2861 return ENOENT;
2862 }
2863
2864 int
sktu_get_nexus_flowswitch_stats(struct sk_stats_flow_switch ** sfsw,size_t * len)2865 sktu_get_nexus_flowswitch_stats(struct sk_stats_flow_switch **sfsw, size_t *len)
2866 {
2867 int ret;
2868 void *buffer = NULL;
2869 size_t length = 0;
2870 size_t width = sizeof(struct sk_stats_flow_switch);
2871
2872 ret = sysctl_buf(SK_STATS_FLOW_SWITCH, &buffer, &length, NULL, 0);
2873 if (ret != 0 || buffer == NULL || length == 0) {
2874 return ret;
2875 }
2876 if ((length % width) != 0) {
2877 T_LOG("Error, mismatching sk_stats_flow_switch, quit\n");
2878 exit(EX_OSERR);
2879 }
2880
2881 *sfsw = (struct sk_stats_flow_switch *)buffer;
2882 *len = length;
2883
2884 return 0;
2885 }
2886
2887 void
__fsw_stats_print(struct fsw_stats * s)2888 __fsw_stats_print(struct fsw_stats *s)
2889 {
2890 int i;
2891
2892 for (i = 0; i < __FSW_STATS_MAX; i++) {
2893 if (STATS_VAL(s, i) == 0) {
2894 continue;
2895 }
2896 os_log(OS_LOG_DEFAULT, "\t%-24s: %llu\n",
2897 fsw_stats_str(i), STATS_VAL(s, i));
2898 }
2899 }
2900