1 /*
2 * Copyright (c) 2017-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <skywalk/os_skywalk_private.h>
30 #include <machine/endian.h>
31 #include <net/necp.h>
32
33 #if (DEVELOPMENT || DEBUG)
34 SYSCTL_NODE(_kern_skywalk, OID_AUTO, packet,
35 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Skywalk packet");
36 int pkt_trailers = 0; /* for testing trailing bytes */
37 SYSCTL_INT(_kern_skywalk_packet, OID_AUTO, trailers,
38 CTLFLAG_RW | CTLFLAG_LOCKED, &pkt_trailers, 0, "");
39 #endif /* !DEVELOPMENT && !DEBUG */
40
41
42 __attribute__((always_inline))
43 static inline void
_pkt_copy(void * src,void * dst,size_t len)44 _pkt_copy(void *src, void *dst, size_t len)
45 {
46 if (__probable(IS_P2ALIGNED(src, 8) && IS_P2ALIGNED(dst, 8))) {
47 switch (len) {
48 case 20: /* standard IPv4 header */
49 sk_copy64_20(src, dst);
50 return;
51
52 case 40: /* IPv6 header */
53 sk_copy64_40(src, dst);
54 return;
55
56 default:
57 if (IS_P2ALIGNED(len, 64)) {
58 sk_copy64_64x(src, dst, len);
59 return;
60 } else if (IS_P2ALIGNED(len, 32)) {
61 sk_copy64_32x(src, dst, len);
62 return;
63 } else if (IS_P2ALIGNED(len, 8)) {
64 sk_copy64_8x(src, dst, len);
65 return;
66 } else if (IS_P2ALIGNED(len, 4)) {
67 sk_copy64_4x(src, dst, len);
68 return;
69 }
70 break;
71 }
72 }
73 bcopy(src, dst, len);
74 }
75
76 /*
77 * This routine is used for copying data across two kernel packets.
78 * Can also optionally compute 16-bit partial inet checksum as the
79 * data is copied.
80 * This routine is used by flowswitch while copying packet from vp
81 * adapter pool to packet in native netif pool and vice-a-versa.
82 *
83 * start/stuff is relative to soff, within [0, len], such that
84 * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
85 */
86 void
pkt_copy_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)87 pkt_copy_from_pkt(const enum txrx t, kern_packet_t dph, const uint16_t doff,
88 kern_packet_t sph, const uint16_t soff, const uint32_t len,
89 const boolean_t copysum, const uint16_t start, const uint16_t stuff,
90 const boolean_t invert)
91 {
92 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
93 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
94 uint32_t partial;
95 uint16_t csum = 0;
96 uint8_t *sbaddr, *dbaddr;
97 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
98
99 _CASSERT(sizeof(csum) == sizeof(uint16_t));
100
101 /* get buffer address from packet */
102 MD_BUFLET_ADDR_ABS(spkt, sbaddr);
103 ASSERT(sbaddr != NULL);
104 sbaddr += soff;
105 MD_BUFLET_ADDR_ABS(dpkt, dbaddr);
106 ASSERT(dbaddr != NULL);
107 dbaddr += doff;
108 VERIFY((doff + len) <= PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
109
110 switch (t) {
111 case NR_RX:
112 dpkt->pkt_csum_flags = 0;
113 if (__probable(do_sum)) {
114 /*
115 * Use pkt_copy() to copy the portion up to the
116 * point where we need to start the checksum, and
117 * copy the remainder, checksumming as we go.
118 */
119 if (__probable(start != 0)) {
120 _pkt_copy(sbaddr, dbaddr, start);
121 }
122 partial = __packet_copy_and_sum((sbaddr + start),
123 (dbaddr + start), (len - start), 0);
124 csum = __packet_fold_sum(partial);
125
126 __packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
127 start, csum, FALSE);
128 } else {
129 _pkt_copy(sbaddr, dbaddr, len);
130 dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
131 dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
132 dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
133 }
134
135 SK_DF(SK_VERB_COPY | SK_VERB_RX,
136 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
137 sk_proc_name_address(current_proc()),
138 sk_proc_pid(current_proc()), len,
139 (copysum ? (len - start) : 0), csum, start);
140 SK_DF(SK_VERB_COPY | SK_VERB_RX,
141 " pkt 0x%llx doff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
142 SK_KVA(dpkt), doff, dpkt->pkt_csum_flags,
143 (uint32_t)dpkt->pkt_csum_rx_start_off,
144 (uint32_t)dpkt->pkt_csum_rx_value);
145 break;
146
147 case NR_TX:
148 if (__probable(copysum)) {
149 /*
150 * Use pkt_copy() to copy the portion up to the
151 * point where we need to start the checksum, and
152 * copy the remainder, checksumming as we go.
153 */
154 if (__probable(start != 0)) {
155 _pkt_copy(sbaddr, dbaddr, start);
156 }
157 partial = __packet_copy_and_sum((sbaddr + start),
158 (dbaddr + start), (len - start), 0);
159 csum = __packet_fold_sum_final(partial);
160
161 /* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
162 if (csum == 0 && invert) {
163 csum = 0xffff;
164 }
165
166 /* Insert checksum into packet */
167 ASSERT(stuff <= (len - sizeof(csum)));
168 if (IS_P2ALIGNED(dbaddr + stuff, sizeof(csum))) {
169 *(uint16_t *)(uintptr_t)(dbaddr + stuff) = csum;
170 } else {
171 bcopy((void *)&csum, dbaddr + stuff,
172 sizeof(csum));
173 }
174 } else {
175 _pkt_copy(sbaddr, dbaddr, len);
176 }
177 dpkt->pkt_csum_flags = spkt->pkt_csum_flags & (PACKET_CSUM_TSO_FLAGS);
178 dpkt->pkt_csum_tx_start_off = 0;
179 dpkt->pkt_csum_tx_stuff_off = 0;
180
181 SK_DF(SK_VERB_COPY | SK_VERB_TX,
182 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
183 sk_proc_name_address(current_proc()),
184 sk_proc_pid(current_proc()), len,
185 (copysum ? (len - start) : 0), csum, start);
186 break;
187
188 default:
189 VERIFY(0);
190 /* NOTREACHED */
191 __builtin_unreachable();
192 }
193 METADATA_ADJUST_LEN(dpkt, len, doff);
194
195 SK_DF(SK_VERB_COPY | SK_VERB_DUMP, "%s(%d) %s %s",
196 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
197 (t == NR_RX) ? "RX" : "TX",
198 sk_dump("buf", dbaddr, len, 128, NULL, 0));
199 }
200
201 /*
202 * NOTE: soff is the offset within the packet
203 * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
204 * caller is responsible for further reducing it to 16-bit if needed,
205 * as well as to perform the final 1's complement on it.
206 */
207 uint32_t static inline
_pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)208 _pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
209 uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
210 {
211 uint8_t odd = 0;
212 uint8_t *sbaddr = NULL;
213 uint32_t sum = initial_sum, partial;
214 uint32_t len0 = len;
215 boolean_t needs_swap, started_on_odd = FALSE;
216 uint16_t clen, sboff, sblen, sbcnt, off0 = soff;
217 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
218 kern_buflet_t sbuf = NULL, sbufp = NULL;
219
220 sbcnt = __packet_get_buflet_count(sph);
221
222 if (odd_start) {
223 started_on_odd = *odd_start;
224 }
225
226 /* fastpath (copy+sum, single buflet, even aligned, even length) */
227 if (do_csum && sbcnt == 1 && len != 0) {
228 PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
229 ASSERT(sbuf != NULL);
230 sboff = __buflet_get_data_offset(sbuf);
231 sblen = __buflet_get_data_length(sbuf);
232 ASSERT(sboff <= soff);
233 ASSERT(soff < sboff + sblen);
234 sblen -= (soff - sboff);
235 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
236
237 clen = (uint16_t)MIN(len, sblen);
238
239 if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
240 sum = __packet_copy_and_sum(sbaddr, dbaddr, clen, sum);
241 return __packet_fold_sum(sum);
242 }
243
244 sbaddr = NULL;
245 sbuf = sbufp = NULL;
246 }
247
248 while (len != 0) {
249 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
250 if (__improbable(sbuf == NULL)) {
251 panic("%s: bad packet, 0x%llx [off %d, len %d]",
252 __func__, SK_KVA(spkt), off0, len0);
253 /* NOTREACHED */
254 __builtin_unreachable();
255 }
256 sbufp = sbuf;
257 sboff = __buflet_get_data_offset(sbuf);
258 sblen = __buflet_get_data_length(sbuf);
259 ASSERT((sboff <= soff) && (soff < sboff + sblen));
260 sblen -= (soff - sboff);
261 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
262 soff = 0;
263 clen = (uint16_t)MIN(len, sblen);
264 if (__probable(do_csum)) {
265 partial = 0;
266 if (__improbable((uintptr_t)sbaddr & 1)) {
267 /* Align on word boundary */
268 started_on_odd = !started_on_odd;
269 #if BYTE_ORDER == LITTLE_ENDIAN
270 partial = (uint8_t)*sbaddr << 8;
271 #else /* BYTE_ORDER != LITTLE_ENDIAN */
272 partial = (uint8_t)*sbaddr;
273 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
274 *dbaddr++ = *sbaddr++;
275 sblen -= 1;
276 clen -= 1;
277 len -= 1;
278 }
279 needs_swap = started_on_odd;
280
281 odd = clen & 1u;
282 clen -= odd;
283
284 if (clen != 0) {
285 partial = __packet_copy_and_sum(sbaddr, dbaddr,
286 clen, partial);
287 }
288
289 if (__improbable(partial & 0xc0000000)) {
290 if (needs_swap) {
291 partial = (partial << 8) +
292 (partial >> 24);
293 }
294 sum += (partial >> 16);
295 sum += (partial & 0xffff);
296 partial = 0;
297 }
298 } else {
299 _pkt_copy(sbaddr, dbaddr, clen);
300 }
301
302 dbaddr += clen;
303 sbaddr += clen;
304
305 if (__probable(do_csum)) {
306 if (odd != 0) {
307 #if BYTE_ORDER == LITTLE_ENDIAN
308 partial += (uint8_t)*sbaddr;
309 #else /* BYTE_ORDER != LITTLE_ENDIAN */
310 partial += (uint8_t)*sbaddr << 8;
311 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
312 *dbaddr++ = *sbaddr++;
313 started_on_odd = !started_on_odd;
314 }
315
316 if (needs_swap) {
317 partial = (partial << 8) + (partial >> 24);
318 }
319 sum += (partial >> 16) + (partial & 0xffff);
320 /*
321 * Reduce sum to allow potential byte swap
322 * in the next iteration without carry.
323 */
324 sum = (sum >> 16) + (sum & 0xffff);
325 }
326
327 sblen -= clen + odd;
328 len -= clen + odd;
329 ASSERT(sblen == 0 || len == 0);
330 }
331
332 if (odd_start) {
333 *odd_start = started_on_odd;
334 }
335
336 if (__probable(do_csum)) {
337 /* Final fold (reduce 32-bit to 16-bit) */
338 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
339 sum = (sum >> 16) + (sum & 0xffff);
340 }
341 return sum;
342 }
343
344 /*
345 * NOTE: Caller of this function is responsible to adjust the length and offset
346 * of the first buflet of the destination packet if (doff != 0),
347 * i.e. additional data is being prependend to the packet.
348 * It should also finalize the packet.
349 * To simplify & optimize the routine, we have also assumed that soff & doff
350 * will lie within the first buffer, which is true for the current use cases
351 * where, doff is the offset of the checksum field in the TCP/IP header and
352 * soff is the L3 offset.
353 * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
354 * caller is responsible for further reducing it to 16-bit if needed,
355 * as well as to perform the final 1's complement on it.
356 */
357 static inline boolean_t
_pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint32_t len,uint32_t * csum_partial,boolean_t do_csum)358 _pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
359 uint16_t doff, uint32_t len, uint32_t *csum_partial, boolean_t do_csum)
360 {
361 uint8_t odd = 0;
362 uint32_t sum = 0, partial;
363 boolean_t needs_swap, started_on_odd = FALSE;
364 uint8_t *sbaddr = NULL, *dbaddr = NULL;
365 uint16_t clen, sblen, dlen0, dlim, sbcnt, dbcnt, sboff;
366 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
367 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
368 kern_buflet_t sbuf = NULL, sbufp = NULL, dbuf = NULL, dbufp = NULL;
369
370 ASSERT(csum_partial != NULL || !do_csum);
371 sbcnt = __packet_get_buflet_count(sph);
372 dbcnt = __packet_get_buflet_count(dph);
373
374 while (len != 0) {
375 ASSERT(sbaddr == NULL || dbaddr == NULL);
376 if (sbaddr == NULL) {
377 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
378 if (__improbable(sbuf == NULL)) {
379 break;
380 }
381 sbufp = sbuf;
382 sblen = __buflet_get_data_length(sbuf);
383 sboff = __buflet_get_data_offset(sbuf);
384 ASSERT(soff >= sboff);
385 ASSERT(sboff + sblen > soff);
386 sblen -= (soff - sboff);
387 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
388 soff = 0;
389 }
390
391 if (dbaddr == NULL) {
392 if (dbufp != NULL) {
393 __buflet_set_data_length(dbufp, dlen0);
394 }
395
396 PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
397 if (__improbable(dbuf == NULL)) {
398 break;
399 }
400 dbufp = dbuf;
401 dlim = __buflet_get_data_limit(dbuf);
402 ASSERT(dlim > doff);
403 dlim -= doff;
404 if (doff != 0) {
405 VERIFY(__buflet_set_data_offset(dbuf, doff) == 0);
406 }
407 dbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(dbuf) + doff);
408 dlen0 = dlim;
409 doff = 0;
410 }
411
412 clen = (uint16_t)MIN(len, sblen);
413 clen = MIN(clen, dlim);
414
415 if (__probable(do_csum)) {
416 partial = 0;
417 if (__improbable((uintptr_t)sbaddr & 1)) {
418 /* Align on word boundary */
419 started_on_odd = !started_on_odd;
420 #if BYTE_ORDER == LITTLE_ENDIAN
421 partial = (uint8_t)*sbaddr << 8;
422 #else /* BYTE_ORDER != LITTLE_ENDIAN */
423 partial = (uint8_t)*sbaddr;
424 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
425 *dbaddr++ = *sbaddr++;
426 clen -= 1;
427 dlim -= 1;
428 len -= 1;
429 }
430 needs_swap = started_on_odd;
431
432 odd = clen & 1u;
433 clen -= odd;
434
435 if (clen != 0) {
436 partial = __packet_copy_and_sum(sbaddr, dbaddr,
437 clen, partial);
438 }
439
440 if (__improbable(partial & 0xc0000000)) {
441 if (needs_swap) {
442 partial = (partial << 8) +
443 (partial >> 24);
444 }
445 sum += (partial >> 16);
446 sum += (partial & 0xffff);
447 partial = 0;
448 }
449 } else {
450 _pkt_copy(sbaddr, dbaddr, clen);
451 }
452 sbaddr += clen;
453 dbaddr += clen;
454
455 if (__probable(do_csum)) {
456 if (odd != 0) {
457 #if BYTE_ORDER == LITTLE_ENDIAN
458 partial += (uint8_t)*sbaddr;
459 #else /* BYTE_ORDER != LITTLE_ENDIAN */
460 partial += (uint8_t)*sbaddr << 8;
461 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
462 *dbaddr++ = *sbaddr++;
463 started_on_odd = !started_on_odd;
464 }
465
466 if (needs_swap) {
467 partial = (partial << 8) + (partial >> 24);
468 }
469 sum += (partial >> 16) + (partial & 0xffff);
470 /*
471 * Reduce sum to allow potential byte swap
472 * in the next iteration without carry.
473 */
474 sum = (sum >> 16) + (sum & 0xffff);
475 }
476
477 sblen -= clen + odd;
478 dlim -= clen + odd;
479 len -= clen + odd;
480
481 if (sblen == 0) {
482 sbaddr = NULL;
483 }
484
485 if (dlim == 0) {
486 dbaddr = NULL;
487 }
488 }
489
490 if (__probable(dbuf != NULL)) {
491 __buflet_set_data_length(dbuf, (dlen0 - dlim));
492 }
493 if (__probable(do_csum)) {
494 /* Final fold (reduce 32-bit to 16-bit) */
495 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
496 sum = (sum >> 16) + (sum & 0xffff);
497 *csum_partial = (uint32_t)sum;
498 }
499 return len == 0;
500 }
501
502 uint32_t
pkt_sum(kern_packet_t sph,uint16_t soff,uint16_t len)503 pkt_sum(kern_packet_t sph, uint16_t soff, uint16_t len)
504 {
505 uint8_t odd = 0;
506 uint32_t sum = 0, partial;
507 boolean_t needs_swap, started_on_odd = FALSE;
508 uint8_t *sbaddr = NULL;
509 uint16_t clen, sblen, sbcnt, sboff;
510 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
511 kern_buflet_t sbuf = NULL, sbufp = NULL;
512
513 sbcnt = __packet_get_buflet_count(sph);
514
515 /* fastpath (single buflet, even aligned, even length) */
516 if (sbcnt == 1 && len != 0) {
517 PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
518 ASSERT(sbuf != NULL);
519 sblen = __buflet_get_data_length(sbuf);
520 sboff = __buflet_get_data_offset(sbuf);
521 ASSERT(soff >= sboff);
522 ASSERT(sboff + sblen > soff);
523 sblen -= (soff - sboff);
524 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
525
526 clen = MIN(len, sblen);
527
528 if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
529 sum = __packet_cksum(sbaddr, clen, 0);
530 return __packet_fold_sum(sum);
531 }
532
533 sbaddr = NULL;
534 sbuf = sbufp = NULL;
535 }
536
537 /* slowpath */
538 while (len != 0) {
539 ASSERT(sbaddr == NULL);
540 if (sbaddr == NULL) {
541 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
542 if (__improbable(sbuf == NULL)) {
543 break;
544 }
545 sbufp = sbuf;
546 sblen = __buflet_get_data_length(sbuf);
547 sboff = __buflet_get_data_offset(sbuf);
548 ASSERT(soff >= sboff);
549 ASSERT(sboff + sblen > soff);
550 sblen -= (soff - sboff);
551 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
552 soff = 0;
553 }
554
555 clen = MIN(len, sblen);
556
557 partial = 0;
558 if (__improbable((uintptr_t)sbaddr & 1)) {
559 /* Align on word boundary */
560 started_on_odd = !started_on_odd;
561 #if BYTE_ORDER == LITTLE_ENDIAN
562 partial = (uint8_t)*sbaddr << 8;
563 #else /* BYTE_ORDER != LITTLE_ENDIAN */
564 partial = (uint8_t)*sbaddr;
565 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
566 clen -= 1;
567 len -= 1;
568 }
569 needs_swap = started_on_odd;
570
571 odd = clen & 1u;
572 clen -= odd;
573
574 if (clen != 0) {
575 partial = __packet_cksum(sbaddr,
576 clen, partial);
577 }
578
579 if (__improbable(partial & 0xc0000000)) {
580 if (needs_swap) {
581 partial = (partial << 8) +
582 (partial >> 24);
583 }
584 sum += (partial >> 16);
585 sum += (partial & 0xffff);
586 partial = 0;
587 }
588 sbaddr += clen;
589
590 if (odd != 0) {
591 #if BYTE_ORDER == LITTLE_ENDIAN
592 partial += (uint8_t)*sbaddr;
593 #else /* BYTE_ORDER != LITTLE_ENDIAN */
594 partial += (uint8_t)*sbaddr << 8;
595 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
596 started_on_odd = !started_on_odd;
597 }
598
599 if (needs_swap) {
600 partial = (partial << 8) + (partial >> 24);
601 }
602 sum += (partial >> 16) + (partial & 0xffff);
603 /*
604 * Reduce sum to allow potential byte swap
605 * in the next iteration without carry.
606 */
607 sum = (sum >> 16) + (sum & 0xffff);
608
609 sblen -= clen + odd;
610 len -= clen + odd;
611
612 if (sblen == 0) {
613 sbaddr = NULL;
614 }
615 }
616
617 /* Final fold (reduce 32-bit to 16-bit) */
618 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
619 sum = (sum >> 16) + (sum & 0xffff);
620 return (uint32_t)sum;
621 }
622
623
624 /*
625 * This is a multi-buflet variant of pkt_copy_from_pkt().
626 *
627 * start/stuff is relative to soff, within [0, len], such that
628 * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
629 */
630 void
pkt_copy_multi_buflet_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)631 pkt_copy_multi_buflet_from_pkt(const enum txrx t, kern_packet_t dph,
632 const uint16_t doff, kern_packet_t sph, const uint16_t soff,
633 const uint32_t len, const boolean_t copysum, const uint16_t start,
634 const uint16_t stuff, const boolean_t invert)
635 {
636 boolean_t rc;
637 uint32_t partial;
638 uint16_t csum = 0;
639 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
640 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
641 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
642
643 VERIFY((doff + len) <= (PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp) *
644 __packet_get_buflet_count(dph)));
645
646 switch (t) {
647 case NR_RX:
648 dpkt->pkt_csum_flags = 0;
649 if (__probable(do_sum)) {
650 /*
651 * copy the portion up to the point where we need to
652 * start the checksum, and copy the remainder,
653 * checksumming as we go.
654 */
655 if (__probable(start != 0)) {
656 rc = _pkt_copypkt_sum(sph, soff, dph, doff,
657 start, NULL, FALSE);
658 ASSERT(rc);
659 }
660 _pkt_copypkt_sum(sph, (soff + start), dph,
661 (doff + start), (len - start), &partial, TRUE);
662 csum = __packet_fold_sum(partial);
663 __packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
664 start, csum, FALSE);
665 METADATA_ADJUST_LEN(dpkt, start, doff);
666 } else {
667 rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
668 FALSE);
669 ASSERT(rc);
670 dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
671 dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
672 dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
673 }
674 break;
675
676 case NR_TX:
677 if (__probable(copysum)) {
678 uint8_t *baddr;
679 /*
680 * copy the portion up to the point where we need to
681 * start the checksum, and copy the remainder,
682 * checksumming as we go.
683 */
684 if (__probable(start != 0)) {
685 rc = _pkt_copypkt_sum(sph, soff, dph, doff,
686 start, NULL, FALSE);
687 ASSERT(rc);
688 }
689 rc = _pkt_copypkt_sum(sph, (soff + start), dph,
690 (doff + start), (len - start), &partial, TRUE);
691 ASSERT(rc);
692 csum = __packet_fold_sum_final(partial);
693
694 /* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
695 if (csum == 0 && invert) {
696 csum = 0xffff;
697 }
698
699 /*
700 * Insert checksum into packet.
701 * Here we assume that checksum will be in the
702 * first buffer.
703 */
704 ASSERT((stuff + doff + sizeof(csum)) <=
705 PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
706 ASSERT(stuff <= (len - sizeof(csum)));
707
708 /* get first buflet buffer address from packet */
709 MD_BUFLET_ADDR_ABS(dpkt, baddr);
710 ASSERT(baddr != NULL);
711 baddr += doff;
712 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
713 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
714 } else {
715 bcopy((void *)&csum, baddr + stuff,
716 sizeof(csum));
717 }
718 METADATA_ADJUST_LEN(dpkt, start, doff);
719 } else {
720 rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
721 FALSE);
722 ASSERT(rc);
723 }
724 dpkt->pkt_csum_flags = spkt->pkt_csum_flags & (PACKET_CSUM_TSO_FLAGS);
725 dpkt->pkt_csum_tx_start_off = 0;
726 dpkt->pkt_csum_tx_stuff_off = 0;
727 break;
728
729 default:
730 VERIFY(0);
731 /* NOTREACHED */
732 __builtin_unreachable();
733 }
734 }
735
736 /*
737 * This routine is used for copying an mbuf which originated in the host
738 * stack destined to a native skywalk interface (NR_TX), as well as for
739 * mbufs originating on compat network interfaces (NR_RX).
740 *
741 * start/stuff is relative to moff, within [0, len], such that
742 * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
743 */
744 void
pkt_copy_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)745 pkt_copy_from_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
746 struct mbuf *m, const uint16_t moff, const uint32_t len,
747 const boolean_t copysum, const uint16_t start)
748 {
749 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
750 uint32_t partial;
751 uint16_t csum = 0;
752 uint8_t *baddr;
753
754 _CASSERT(sizeof(csum) == sizeof(uint16_t));
755
756 /* get buffer address from packet */
757 MD_BUFLET_ADDR_ABS(pkt, baddr);
758 ASSERT(baddr != NULL);
759 baddr += poff;
760 VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
761
762 switch (t) {
763 case NR_RX:
764 pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
765 pkt->pkt_csum_rx_start_off = 0;
766 pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
767 pkt->pkt_svc_class = m_get_service_class(m);
768 if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
769 != CSUM_RX_FULL_FLAGS) && copysum)) {
770 /*
771 * Use m_copydata() to copy the portion up to the
772 * point where we need to start the checksum, and
773 * copy the remainder, checksumming as we go.
774 */
775 if (start != 0) {
776 m_copydata(m, moff, start, baddr);
777 }
778 partial = m_copydata_sum(m, start, (len - start),
779 (baddr + start), 0, NULL);
780 csum = __packet_fold_sum(partial);
781
782 __packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
783 start, csum, FALSE);
784 } else {
785 m_copydata(m, moff, len, baddr);
786 }
787 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
788 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
789 sk_proc_name_address(current_proc()),
790 sk_proc_pid(current_proc()), len,
791 (copysum ? (len - start) : 0), csum, start);
792 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
793 " mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
794 SK_KVA(m), m->m_pkthdr.csum_flags,
795 (uint32_t)m->m_pkthdr.csum_rx_start,
796 (uint32_t)m->m_pkthdr.csum_rx_val);
797 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
798 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
799 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
800 (uint32_t)pkt->pkt_csum_rx_start_off,
801 (uint32_t)pkt->pkt_csum_rx_value);
802 break;
803
804 case NR_TX:
805 if (__probable(copysum)) {
806 uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
807 /*
808 * Use m_copydata() to copy the portion up to the
809 * point where we need to start the checksum, and
810 * copy the remainder, checksumming as we go.
811 */
812 if (start != 0) {
813 m_copydata(m, moff, start, baddr);
814 }
815 partial = m_copydata_sum(m, start, (len - start),
816 (baddr + start), 0, NULL);
817 csum = __packet_fold_sum_final(partial);
818
819 /*
820 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
821 * ideally we'd only test for CSUM_ZERO_INVERT
822 * here, but catch cases where the originator
823 * did not set it for UDP.
824 */
825 if (csum == 0 && (m->m_pkthdr.csum_flags &
826 (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
827 csum = 0xffff;
828 }
829
830 /* Insert checksum into packet */
831 ASSERT(stuff <= (len - sizeof(csum)));
832 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
833 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
834 } else {
835 bcopy((void *)&csum, baddr + stuff,
836 sizeof(csum));
837 }
838 } else {
839 m_copydata(m, moff, len, baddr);
840 }
841 pkt->pkt_csum_flags = 0;
842 pkt->pkt_csum_tx_start_off = 0;
843 pkt->pkt_csum_tx_stuff_off = 0;
844
845 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
846 pkt->pkt_csum_flags |= PACKET_TSO_IPV4;
847 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
848 }
849 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
850 pkt->pkt_csum_flags |= PACKET_TSO_IPV6;
851 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
852 }
853
854 /* translate mbuf metadata */
855 pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
856 pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
857 pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
858 pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
859 pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
860 switch (m->m_pkthdr.pkt_proto) {
861 case IPPROTO_QUIC:
862 pkt->pkt_flow_ip_proto = IPPROTO_UDP;
863 pkt->pkt_transport_protocol = IPPROTO_QUIC;
864 break;
865
866 default:
867 pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
868 pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
869 break;
870 }
871 (void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
872 pkt->pkt_svc_class = m_get_service_class(m);
873 pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
874 pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
875 if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
876 pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
877 }
878 necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
879 pkt->pkt_policy_id =
880 (uint32_t)necp_get_policy_id_from_packet(m);
881
882 if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
883 if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
884 __packet_set_tx_completion_data(ph,
885 m->m_pkthdr.drv_tx_compl_arg,
886 m->m_pkthdr.drv_tx_compl_data);
887 }
888 pkt->pkt_tx_compl_context =
889 m->m_pkthdr.pkt_compl_context;
890 pkt->pkt_tx_compl_callbacks =
891 m->m_pkthdr.pkt_compl_callbacks;
892 /*
893 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
894 * mbuf can no longer trigger a completion callback.
895 * callback will be invoked when the kernel packet is
896 * completed.
897 */
898 m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
899
900 m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
901 }
902
903 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
904 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
905 sk_proc_name_address(current_proc()),
906 sk_proc_pid(current_proc()), len,
907 (copysum ? (len - start) : 0), csum, start);
908 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
909 " mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
910 SK_KVA(m), m->m_pkthdr.csum_flags,
911 (uint32_t)m->m_pkthdr.csum_tx_start,
912 (uint32_t)m->m_pkthdr.csum_tx_stuff);
913 break;
914
915 default:
916 VERIFY(0);
917 /* NOTREACHED */
918 __builtin_unreachable();
919 }
920 METADATA_ADJUST_LEN(pkt, len, poff);
921
922 if (m->m_flags & M_BCAST) {
923 __packet_set_link_broadcast(ph);
924 } else if (m->m_flags & M_MCAST) {
925 __packet_set_link_multicast(ph);
926 }
927
928 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
929 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
930 (t == NR_RX) ? "RX" : "TX",
931 sk_dump("buf", baddr, len, 128, NULL, 0));
932 }
933
934 /*
935 * Like m_copydata_sum(), but works on a destination kernel packet.
936 */
937 static inline uint32_t
m_copypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint32_t len,boolean_t do_cscum)938 m_copypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
939 uint32_t len, boolean_t do_cscum)
940 {
941 boolean_t needs_swap, started_on_odd = FALSE;
942 int off0 = soff;
943 uint32_t len0 = len;
944 struct mbuf *m0 = m;
945 uint32_t sum = 0, partial;
946 unsigned count0, count, odd, mlen_copied;
947 uint8_t *sbaddr = NULL, *dbaddr = NULL;
948 uint16_t dlen0, dlim, dbcnt = __packet_get_buflet_count(dph);
949 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
950 kern_buflet_t dbuf = NULL, dbufp = NULL;
951
952 while (soff > 0) {
953 if (__improbable(m == NULL)) {
954 panic("%s: invalid mbuf chain %p [off %d, len %d]",
955 __func__, m0, off0, len0);
956 /* NOTREACHED */
957 __builtin_unreachable();
958 }
959 if (soff < m->m_len) {
960 break;
961 }
962 soff -= m->m_len;
963 m = m->m_next;
964 }
965
966 if (__improbable(m == NULL)) {
967 panic("%s: invalid mbuf chain %p [off %d, len %d]",
968 __func__, m0, off0, len0);
969 /* NOTREACHED */
970 __builtin_unreachable();
971 }
972
973 sbaddr = mtod(m, uint8_t *) + soff;
974 count = m->m_len - soff;
975 mlen_copied = 0;
976
977 while (len != 0) {
978 ASSERT(sbaddr == NULL || dbaddr == NULL);
979 if (sbaddr == NULL) {
980 soff = 0;
981 m = m->m_next;
982 if (__improbable(m == NULL)) {
983 panic("%s: invalid mbuf chain %p [off %d, "
984 "len %d]", __func__, m0, off0, len0);
985 /* NOTREACHED */
986 __builtin_unreachable();
987 }
988 sbaddr = mtod(m, uint8_t *);
989 count = m->m_len;
990 mlen_copied = 0;
991 }
992
993 if (__improbable(count == 0)) {
994 sbaddr = NULL;
995 continue;
996 }
997
998 if (dbaddr == NULL) {
999 if (dbufp != NULL) {
1000 __buflet_set_data_length(dbufp, dlen0);
1001 }
1002
1003 PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
1004 if (__improbable(dbuf == NULL)) {
1005 panic("%s: mbuf too large %p [off %d, "
1006 "len %d]", __func__, m0, off0, len0);
1007 /* NOTREACHED */
1008 __builtin_unreachable();
1009 }
1010 dbufp = dbuf;
1011 dlim = __buflet_get_data_limit(dbuf) - doff;
1012 dbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(dbuf) + doff);
1013 dlen0 = dlim;
1014 doff = 0;
1015 }
1016
1017 count = MIN(count, (unsigned)len);
1018 count0 = count = MIN(count, dlim);
1019
1020 if (!do_cscum) {
1021 _pkt_copy(sbaddr, dbaddr, count);
1022 sbaddr += count;
1023 dbaddr += count;
1024 goto skip_csum;
1025 }
1026
1027 partial = 0;
1028 if ((uintptr_t)sbaddr & 1) {
1029 /* Align on word boundary */
1030 started_on_odd = !started_on_odd;
1031 #if BYTE_ORDER == LITTLE_ENDIAN
1032 partial = *sbaddr << 8;
1033 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1034 partial = *sbaddr;
1035 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1036 *dbaddr++ = *sbaddr++;
1037 count -= 1;
1038 }
1039
1040 needs_swap = started_on_odd;
1041 odd = count & 1u;
1042 count -= odd;
1043
1044 if (count) {
1045 partial = __packet_copy_and_sum(sbaddr,
1046 dbaddr, count, partial);
1047 sbaddr += count;
1048 dbaddr += count;
1049 if (__improbable(partial & 0xc0000000)) {
1050 if (needs_swap) {
1051 partial = (partial << 8) +
1052 (partial >> 24);
1053 }
1054 sum += (partial >> 16);
1055 sum += (partial & 0xffff);
1056 partial = 0;
1057 }
1058 }
1059
1060 if (odd) {
1061 #if BYTE_ORDER == LITTLE_ENDIAN
1062 partial += *sbaddr;
1063 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1064 partial += *sbaddr << 8;
1065 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1066 *dbaddr++ = *sbaddr++;
1067 started_on_odd = !started_on_odd;
1068 }
1069
1070 if (needs_swap) {
1071 partial = (partial << 8) + (partial >> 24);
1072 }
1073 sum += (partial >> 16) + (partial & 0xffff);
1074 /*
1075 * Reduce sum to allow potential byte swap
1076 * in the next iteration without carry.
1077 */
1078 sum = (sum >> 16) + (sum & 0xffff);
1079
1080 skip_csum:
1081 dlim -= count0;
1082 len -= count0;
1083 mlen_copied += count0;
1084
1085 if (dlim == 0) {
1086 dbaddr = NULL;
1087 }
1088
1089 count = m->m_len - soff - mlen_copied;
1090 if (count == 0) {
1091 sbaddr = NULL;
1092 }
1093 }
1094
1095 ASSERT(len == 0);
1096 ASSERT(dbuf != NULL);
1097 __buflet_set_data_length(dbuf, (dlen0 - dlim));
1098
1099 if (!do_cscum) {
1100 return 0;
1101 }
1102
1103 /* Final fold (reduce 32-bit to 16-bit) */
1104 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
1105 sum = (sum >> 16) + (sum & 0xffff);
1106 return sum;
1107 }
1108
1109 /*
1110 * This is a multi-buflet variant of pkt_copy_from_mbuf().
1111 *
1112 * start/stuff is relative to moff, within [0, len], such that
1113 * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
1114 */
1115 void
pkt_copy_multi_buflet_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1116 pkt_copy_multi_buflet_from_mbuf(const enum txrx t, kern_packet_t ph,
1117 const uint16_t poff, struct mbuf *m, const uint16_t moff,
1118 const uint32_t len, const boolean_t copysum, const uint16_t start)
1119 {
1120 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1121 uint32_t partial;
1122 uint16_t csum = 0;
1123 uint8_t *baddr;
1124
1125 _CASSERT(sizeof(csum) == sizeof(uint16_t));
1126
1127 /* get buffer address from packet */
1128 MD_BUFLET_ADDR_ABS(pkt, baddr);
1129 ASSERT(baddr != NULL);
1130 baddr += poff;
1131 VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp) *
1132 __packet_get_buflet_count(ph)));
1133
1134 switch (t) {
1135 case NR_RX:
1136 pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
1137 pkt->pkt_csum_rx_start_off = 0;
1138 pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
1139 pkt->pkt_svc_class = m_get_service_class(m);
1140 if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
1141 != CSUM_RX_FULL_FLAGS) && copysum)) {
1142 /*
1143 * Use m_copydata() to copy the portion up to the
1144 * point where we need to start the checksum, and
1145 * copy the remainder, checksumming as we go.
1146 */
1147 if (start != 0) {
1148 m_copydata(m, moff, start, baddr);
1149 }
1150 partial = m_copypkt_sum(m, start, ph, (poff + start),
1151 (len - start), TRUE);
1152 csum = __packet_fold_sum(partial);
1153 __packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
1154 start, csum, FALSE);
1155 METADATA_ADJUST_LEN(pkt, start, poff);
1156 } else {
1157 (void) m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1158 }
1159 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1160 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1161 sk_proc_name_address(current_proc()),
1162 sk_proc_pid(current_proc()), len,
1163 (copysum ? (len - start) : 0), csum, start);
1164 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1165 " mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
1166 SK_KVA(m), m->m_pkthdr.csum_flags,
1167 (uint32_t)m->m_pkthdr.csum_rx_start,
1168 (uint32_t)m->m_pkthdr.csum_rx_val);
1169 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1170 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1171 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1172 (uint32_t)pkt->pkt_csum_rx_start_off,
1173 (uint32_t)pkt->pkt_csum_rx_value);
1174 break;
1175
1176 case NR_TX:
1177 if (__probable(copysum)) {
1178 uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
1179 /*
1180 * Use m_copydata() to copy the portion up to the
1181 * point where we need to start the checksum, and
1182 * copy the remainder, checksumming as we go.
1183 */
1184 if (start != 0) {
1185 m_copydata(m, moff, start, baddr);
1186 }
1187 partial = m_copypkt_sum(m, start, ph, (poff + start),
1188 (len - start), TRUE);
1189 csum = __packet_fold_sum_final(partial);
1190
1191 /*
1192 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
1193 * ideally we'd only test for CSUM_ZERO_INVERT
1194 * here, but catch cases where the originator
1195 * did not set it for UDP.
1196 */
1197 if (csum == 0 && (m->m_pkthdr.csum_flags &
1198 (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
1199 csum = 0xffff;
1200 }
1201
1202 /* Insert checksum into packet */
1203 ASSERT(stuff <= (len - sizeof(csum)));
1204 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
1205 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
1206 } else {
1207 bcopy((void *)&csum, baddr + stuff,
1208 sizeof(csum));
1209 }
1210 METADATA_ADJUST_LEN(pkt, start, poff);
1211 } else {
1212 m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1213 }
1214 pkt->pkt_csum_flags = 0;
1215 pkt->pkt_csum_tx_start_off = 0;
1216 pkt->pkt_csum_tx_stuff_off = 0;
1217
1218 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
1219 pkt->pkt_csum_flags |= PACKET_TSO_IPV4;
1220 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
1221 }
1222 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
1223 pkt->pkt_csum_flags |= PACKET_TSO_IPV6;
1224 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
1225 }
1226
1227 /* translate mbuf metadata */
1228 pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
1229 pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
1230 pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
1231 pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
1232 pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
1233 switch (m->m_pkthdr.pkt_proto) {
1234 case IPPROTO_QUIC:
1235 pkt->pkt_flow_ip_proto = IPPROTO_UDP;
1236 pkt->pkt_transport_protocol = IPPROTO_QUIC;
1237 break;
1238
1239 default:
1240 pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
1241 pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
1242 break;
1243 }
1244 (void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
1245 pkt->pkt_svc_class = m_get_service_class(m);
1246 pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
1247 pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
1248 if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
1249 pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
1250 }
1251 necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
1252 pkt->pkt_policy_id =
1253 (uint32_t)necp_get_policy_id_from_packet(m);
1254
1255 if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
1256 if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
1257 __packet_set_tx_completion_data(ph,
1258 m->m_pkthdr.drv_tx_compl_arg,
1259 m->m_pkthdr.drv_tx_compl_data);
1260 }
1261 pkt->pkt_tx_compl_context =
1262 m->m_pkthdr.pkt_compl_context;
1263 pkt->pkt_tx_compl_callbacks =
1264 m->m_pkthdr.pkt_compl_callbacks;
1265 /*
1266 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
1267 * mbuf can no longer trigger a completion callback.
1268 * callback will be invoked when the kernel packet is
1269 * completed.
1270 */
1271 m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
1272
1273 m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
1274 }
1275
1276 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1277 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1278 sk_proc_name_address(current_proc()),
1279 sk_proc_pid(current_proc()), len,
1280 (copysum ? (len - start) : 0), csum, start);
1281 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1282 " mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
1283 SK_KVA(m), m->m_pkthdr.csum_flags,
1284 (uint32_t)m->m_pkthdr.csum_tx_start,
1285 (uint32_t)m->m_pkthdr.csum_tx_stuff);
1286 break;
1287
1288 default:
1289 VERIFY(0);
1290 /* NOTREACHED */
1291 __builtin_unreachable();
1292 }
1293
1294 if (m->m_flags & M_BCAST) {
1295 __packet_set_link_broadcast(ph);
1296 } else if (m->m_flags & M_MCAST) {
1297 __packet_set_link_multicast(ph);
1298 }
1299
1300 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1301 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1302 (t == NR_RX) ? "RX" : "TX",
1303 sk_dump("buf", baddr, len, 128, NULL, 0));
1304 }
1305
1306 /*
1307 * This routine is used for copying from a packet originating from a native
1308 * skywalk interface to an mbuf destined for the host legacy stack (NR_RX),
1309 * as well as for mbufs destined for the compat network interfaces (NR_TX).
1310 *
1311 * Note that this routine does not alter m_data pointer of the mbuf, as the
1312 * caller may want to use the original value upon return. We do, however,
1313 * adjust the length to reflect the total data span.
1314 *
1315 * This routine supports copying into an mbuf chain for RX but not TX.
1316 *
1317 * start/stuff is relative to poff, within [0, len], such that
1318 * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1319 */
1320 void
pkt_copy_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1321 pkt_copy_to_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
1322 struct mbuf *m, const uint16_t moff, const uint32_t len,
1323 const boolean_t copysum, const uint16_t start)
1324 {
1325 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1326 struct mbuf *curr_m;
1327 uint32_t partial = 0;
1328 uint32_t remaining_len = len, copied_len = 0;
1329 uint16_t csum = 0;
1330 uint8_t *baddr;
1331 uint8_t *dp;
1332 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1333
1334 ASSERT(len >= start);
1335 _CASSERT(sizeof(csum) == sizeof(uint16_t));
1336
1337 /* get buffer address from packet */
1338 MD_BUFLET_ADDR_ABS(pkt, baddr);
1339 ASSERT(baddr != NULL);
1340 baddr += poff;
1341 VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
1342
1343 ASSERT((m->m_flags & M_PKTHDR));
1344 m->m_data += moff;
1345
1346 switch (t) {
1347 case NR_RX:
1348 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1349
1350 /*
1351 * Use pkt_copy() to copy the portion up to the
1352 * point where we need to start the checksum, and
1353 * copy the remainder, checksumming as we go.
1354 */
1355 if (__probable(do_sum && start != 0)) {
1356 ASSERT(M_TRAILINGSPACE(m) >= start);
1357 ASSERT(m->m_len == 0);
1358 dp = (uint8_t *)m->m_data;
1359 _pkt_copy(baddr, dp, start);
1360 remaining_len -= start;
1361 copied_len += start;
1362 m->m_len += start;
1363 m->m_pkthdr.len += start;
1364 }
1365 curr_m = m;
1366 while (curr_m != NULL && remaining_len != 0) {
1367 uint32_t tmp_len = MIN(remaining_len,
1368 (uint32_t)M_TRAILINGSPACE(curr_m));
1369 dp = (uint8_t *)curr_m->m_data + curr_m->m_len;
1370 if (__probable(do_sum)) {
1371 partial = __packet_copy_and_sum((baddr + copied_len),
1372 dp, tmp_len, partial);
1373 } else {
1374 _pkt_copy((baddr + copied_len), dp, tmp_len);
1375 }
1376
1377 curr_m->m_len += tmp_len;
1378 m->m_pkthdr.len += tmp_len;
1379 copied_len += tmp_len;
1380 remaining_len -= tmp_len;
1381 curr_m = curr_m->m_next;
1382 }
1383 ASSERT(remaining_len == 0);
1384
1385 if (__probable(do_sum)) {
1386 csum = __packet_fold_sum(partial);
1387
1388 m->m_pkthdr.csum_flags |=
1389 (CSUM_DATA_VALID | CSUM_PARTIAL);
1390 m->m_pkthdr.csum_rx_start = start;
1391 m->m_pkthdr.csum_rx_val = csum;
1392 } else {
1393 m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1394 m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1395 _CASSERT(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1396 m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1397 if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1398 m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1399 }
1400 }
1401
1402 /* translate packet metadata */
1403 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1404 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1405
1406 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1407 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1408 sk_proc_name_address(current_proc()),
1409 sk_proc_pid(current_proc()), len,
1410 (copysum ? (len - start) : 0), csum, start);
1411 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1412 " mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1413 SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1414 (uint32_t)m->m_pkthdr.csum_rx_start,
1415 (uint32_t)m->m_pkthdr.csum_rx_val);
1416 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1417 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1418 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1419 (uint32_t)pkt->pkt_csum_rx_start_off,
1420 (uint32_t)pkt->pkt_csum_rx_value);
1421 break;
1422
1423 case NR_TX:
1424 dp = (uint8_t *)m->m_data;
1425 ASSERT(m->m_next == NULL);
1426
1427 VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1428 (uint32_t)mbuf_maxlen(m));
1429 m->m_len += len;
1430 m->m_pkthdr.len += len;
1431 VERIFY(m->m_len == m->m_pkthdr.len &&
1432 (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1433
1434 if (__probable(copysum)) {
1435 uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1436 /*
1437 * Use pkt_copy() to copy the portion up to the
1438 * point where we need to start the checksum, and
1439 * copy the remainder, checksumming as we go.
1440 */
1441 if (__probable(start != 0)) {
1442 _pkt_copy(baddr, dp, start);
1443 }
1444 partial = __packet_copy_and_sum((baddr + start),
1445 (dp + start), (len - start), 0);
1446 csum = __packet_fold_sum_final(partial);
1447
1448 /* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1449 if (csum == 0 &&
1450 (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1451 csum = 0xffff;
1452 }
1453
1454 /* Insert checksum into packet */
1455 ASSERT(stuff <= (len - sizeof(csum)));
1456 if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1457 *(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1458 } else {
1459 bcopy((void *)&csum, dp + stuff, sizeof(csum));
1460 }
1461 } else {
1462 _pkt_copy(baddr, dp, len);
1463 }
1464 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1465 m->m_pkthdr.csum_tx_start = 0;
1466 m->m_pkthdr.csum_tx_stuff = 0;
1467
1468 /* translate packet metadata */
1469 m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1470 m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1471 m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1472 m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1473 m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1474 m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1475 m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1476 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1477 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1478 m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1479 m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1480 if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1481 m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1482 }
1483
1484 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1485 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1486 sk_proc_name_address(current_proc()),
1487 sk_proc_pid(current_proc()), len,
1488 (copysum ? (len - start) : 0), csum, start);
1489 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1490 " pkt 0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1491 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1492 (uint32_t)pkt->pkt_csum_tx_start_off,
1493 (uint32_t)pkt->pkt_csum_tx_stuff_off);
1494 break;
1495
1496 default:
1497 VERIFY(0);
1498 /* NOTREACHED */
1499 __builtin_unreachable();
1500 }
1501
1502 if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1503 m->m_flags |= M_BCAST;
1504 } else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1505 m->m_flags |= M_MCAST;
1506 }
1507 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1508 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1509 (t == NR_RX) ? "RX" : "TX",
1510 sk_dump("buf", (uint8_t *)dp, m->m_pkthdr.len, 128, NULL, 0));
1511 }
1512
1513 /*
1514 * This is a multi-buflet variant of pkt_copy_to_mbuf().
1515 * NOTE: poff is the offset within the packet.
1516 *
1517 * This routine supports copying into an mbuf chain for RX but not TX.
1518 *
1519 * start/stuff is relative to poff, within [0, len], such that
1520 * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1521 */
1522 void
pkt_copy_multi_buflet_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1523 pkt_copy_multi_buflet_to_mbuf(const enum txrx t, kern_packet_t ph,
1524 const uint16_t poff, struct mbuf *m, const uint16_t moff,
1525 const uint32_t len, const boolean_t copysum, const uint16_t start)
1526 {
1527 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1528 struct mbuf *curr_m;
1529 uint32_t partial = 0;
1530 uint32_t remaining_len = len, copied_len = 0;
1531 uint16_t csum = 0;
1532 uint8_t *baddr;
1533 uint8_t *dp;
1534 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1535
1536 ASSERT(len >= start);
1537 _CASSERT(sizeof(csum) == sizeof(uint16_t));
1538
1539 /* get buffer address from packet */
1540 MD_BUFLET_ADDR_ABS(pkt, baddr);
1541 ASSERT(baddr != NULL);
1542 baddr += poff;
1543 VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp) *
1544 __packet_get_buflet_count(ph)));
1545
1546 ASSERT((m->m_flags & M_PKTHDR));
1547 m->m_data += moff;
1548
1549 switch (t) {
1550 case NR_RX:
1551 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1552 if (__probable(do_sum && start != 0)) {
1553 ASSERT(M_TRAILINGSPACE(m) >= start);
1554 ASSERT(m->m_len == 0);
1555 dp = (uint8_t *)m->m_data;
1556 _pkt_copy(baddr, dp, start);
1557 remaining_len -= start;
1558 copied_len += start;
1559 m->m_len += start;
1560 m->m_pkthdr.len += start;
1561 }
1562 curr_m = m;
1563 while (curr_m != NULL && remaining_len != 0) {
1564 uint32_t tmp_len = MIN(remaining_len,
1565 (uint32_t)M_TRAILINGSPACE(curr_m));
1566 uint16_t soff = poff + (uint16_t)copied_len;
1567 dp = (uint8_t *)curr_m->m_data + curr_m->m_len;
1568
1569 if (__probable(do_sum)) {
1570 partial = _pkt_copyaddr_sum(ph, soff,
1571 dp, tmp_len, TRUE, partial, NULL);
1572 } else {
1573 pkt_copyaddr_sum(ph, soff,
1574 dp, tmp_len, FALSE, 0, NULL);
1575 }
1576
1577 curr_m->m_len += tmp_len;
1578 m->m_pkthdr.len += tmp_len;
1579 copied_len += tmp_len;
1580 remaining_len -= tmp_len;
1581 curr_m = curr_m->m_next;
1582 }
1583 ASSERT(remaining_len == 0);
1584
1585 if (__probable(do_sum)) {
1586 csum = __packet_fold_sum(partial);
1587
1588 m->m_pkthdr.csum_flags |=
1589 (CSUM_DATA_VALID | CSUM_PARTIAL);
1590 m->m_pkthdr.csum_rx_start = start;
1591 m->m_pkthdr.csum_rx_val = csum;
1592 } else {
1593 m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1594 m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1595 _CASSERT(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1596 m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1597 if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1598 m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1599 }
1600 }
1601
1602 /* translate packet metadata */
1603 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1604 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1605
1606 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1607 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1608 sk_proc_name_address(current_proc()),
1609 sk_proc_pid(current_proc()), len,
1610 (copysum ? (len - start) : 0), csum, start);
1611 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1612 " mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1613 SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1614 (uint32_t)m->m_pkthdr.csum_rx_start,
1615 (uint32_t)m->m_pkthdr.csum_rx_val);
1616 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1617 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1618 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1619 (uint32_t)pkt->pkt_csum_rx_start_off,
1620 (uint32_t)pkt->pkt_csum_rx_value);
1621 break;
1622 case NR_TX:
1623 dp = (uint8_t *)m->m_data;
1624 ASSERT(m->m_next == NULL);
1625 VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1626 (uint32_t)mbuf_maxlen(m));
1627 m->m_len += len;
1628 m->m_pkthdr.len += len;
1629 VERIFY(m->m_len == m->m_pkthdr.len &&
1630 (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1631 if (__probable(copysum)) {
1632 uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1633 /*
1634 * Use pkt_copy() to copy the portion up to the
1635 * point where we need to start the checksum, and
1636 * copy the remainder, checksumming as we go.
1637 */
1638 if (__probable(start != 0)) {
1639 _pkt_copy(baddr, dp, start);
1640 }
1641 partial = _pkt_copyaddr_sum(ph, (poff + start),
1642 (dp + start), (len - start), TRUE, 0, NULL);
1643 csum = __packet_fold_sum_final(partial);
1644
1645 /* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1646 if (csum == 0 &&
1647 (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1648 csum = 0xffff;
1649 }
1650
1651 /* Insert checksum into packet */
1652 ASSERT(stuff <= (len - sizeof(csum)));
1653 if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1654 *(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1655 } else {
1656 bcopy((void *)&csum, dp + stuff, sizeof(csum));
1657 }
1658 } else {
1659 (void) _pkt_copyaddr_sum(ph, poff, dp, len, FALSE, 0, NULL);
1660 }
1661 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1662 m->m_pkthdr.csum_tx_start = 0;
1663 m->m_pkthdr.csum_tx_stuff = 0;
1664
1665 /* translate packet metadata */
1666 m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1667 m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1668 m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1669 m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1670 m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1671 m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1672 m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1673 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1674 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1675 m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1676 m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1677 if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1678 m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1679 }
1680
1681 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1682 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1683 sk_proc_name_address(current_proc()),
1684 sk_proc_pid(current_proc()), len,
1685 (copysum ? (len - start) : 0), csum, start);
1686 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1687 " pkt 0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1688 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1689 (uint32_t)pkt->pkt_csum_tx_start_off,
1690 (uint32_t)pkt->pkt_csum_tx_stuff_off);
1691 break;
1692
1693 default:
1694 VERIFY(0);
1695 /* NOTREACHED */
1696 __builtin_unreachable();
1697 }
1698
1699 if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1700 m->m_flags |= M_BCAST;
1701 } else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1702 m->m_flags |= M_MCAST;
1703 }
1704 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1705 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1706 (t == NR_RX) ? "RX" : "TX",
1707 sk_dump("buf", (uint8_t *)dp, m->m_pkthdr.len, 128, NULL, 0));
1708 }
1709
1710 /*
1711 * Like m_copydata(), but computes 16-bit sum as the data is copied.
1712 * Caller can provide an initial sum to be folded into the computed
1713 * sum. The accumulated partial sum (32-bit) is returned to caller;
1714 * caller is responsible for further reducing it to 16-bit if needed,
1715 * as well as to perform the final 1's complement on it.
1716 */
1717 uint32_t
m_copydata_sum(struct mbuf * m,int off,int len,void * vp,uint32_t initial_sum,boolean_t * odd_start)1718 m_copydata_sum(struct mbuf *m, int off, int len, void *vp, uint32_t initial_sum,
1719 boolean_t *odd_start)
1720 {
1721 boolean_t needs_swap, started_on_odd = FALSE;
1722 int off0 = off, len0 = len;
1723 struct mbuf *m0 = m;
1724 uint64_t sum, partial;
1725 unsigned count, odd;
1726 char *cp = vp;
1727
1728 if (__improbable(off < 0 || len < 0)) {
1729 panic("%s: invalid offset %d or len %d", __func__, off, len);
1730 /* NOTREACHED */
1731 __builtin_unreachable();
1732 }
1733
1734 while (off > 0) {
1735 if (__improbable(m == NULL)) {
1736 panic("%s: invalid mbuf chain %p [off %d, len %d]",
1737 __func__, m0, off0, len0);
1738 /* NOTREACHED */
1739 __builtin_unreachable();
1740 }
1741 if (off < m->m_len) {
1742 break;
1743 }
1744 off -= m->m_len;
1745 m = m->m_next;
1746 }
1747
1748 if (odd_start) {
1749 started_on_odd = *odd_start;
1750 }
1751 sum = initial_sum;
1752
1753 for (; len > 0; m = m->m_next) {
1754 uint8_t *datap;
1755
1756 if (__improbable(m == NULL)) {
1757 panic("%s: invalid mbuf chain %p [off %d, len %d]",
1758 __func__, m0, off0, len0);
1759 /* NOTREACHED */
1760 __builtin_unreachable();
1761 }
1762
1763 datap = mtod(m, uint8_t *) + off;
1764 count = m->m_len;
1765
1766 if (__improbable(count == 0)) {
1767 continue;
1768 }
1769
1770 count = MIN(count - off, (unsigned)len);
1771 partial = 0;
1772
1773 if ((uintptr_t)datap & 1) {
1774 /* Align on word boundary */
1775 started_on_odd = !started_on_odd;
1776 #if BYTE_ORDER == LITTLE_ENDIAN
1777 partial = *datap << 8;
1778 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1779 partial = *datap;
1780 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1781 *cp++ = *datap++;
1782 count -= 1;
1783 len -= 1;
1784 }
1785
1786 needs_swap = started_on_odd;
1787 odd = count & 1u;
1788 count -= odd;
1789
1790 if (count) {
1791 partial = __packet_copy_and_sum(datap,
1792 cp, count, (uint32_t)partial);
1793 datap += count;
1794 cp += count;
1795 len -= count;
1796 if (__improbable((partial & (3ULL << 62)) != 0)) {
1797 if (needs_swap) {
1798 partial = (partial << 8) +
1799 (partial >> 56);
1800 }
1801 sum += (partial >> 32);
1802 sum += (partial & 0xffffffff);
1803 partial = 0;
1804 }
1805 }
1806
1807 if (odd) {
1808 #if BYTE_ORDER == LITTLE_ENDIAN
1809 partial += *datap;
1810 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1811 partial += *datap << 8;
1812 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1813 *cp++ = *datap++;
1814 len -= 1;
1815 started_on_odd = !started_on_odd;
1816 }
1817 off = 0;
1818
1819 if (needs_swap) {
1820 partial = (partial << 8) + (partial >> 24);
1821 }
1822 sum += (partial >> 32) + (partial & 0xffffffff);
1823 /*
1824 * Reduce sum to allow potential byte swap
1825 * in the next iteration without carry.
1826 */
1827 sum = (sum >> 32) + (sum & 0xffffffff);
1828 }
1829
1830 if (odd_start) {
1831 *odd_start = started_on_odd;
1832 }
1833
1834 /* Final fold (reduce 64-bit to 32-bit) */
1835 sum = (sum >> 32) + (sum & 0xffffffff); /* 33-bit */
1836 sum = (sum >> 16) + (sum & 0xffff); /* 17-bit + carry */
1837
1838 /* return 32-bit partial sum to caller */
1839 return (uint32_t)sum;
1840 }
1841
1842 #if DEBUG || DEVELOPMENT
1843 #define TRAILERS_MAX 16 /* max trailing bytes */
1844 #define TRAILERS_REGEN (64 * 1024) /* regeneration threshold */
1845 static uint8_t tb[TRAILERS_MAX]; /* random trailing bytes */
1846 static uint32_t regen = TRAILERS_REGEN; /* regeneration counter */
1847
1848 uint32_t
pkt_add_trailers(kern_packet_t ph,const uint32_t len,const uint16_t start)1849 pkt_add_trailers(kern_packet_t ph, const uint32_t len, const uint16_t start)
1850 {
1851 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1852 uint32_t extra;
1853 uint8_t *baddr;
1854
1855 /* get buffer address from packet */
1856 MD_BUFLET_ADDR_ABS(pkt, baddr);
1857 ASSERT(baddr != NULL);
1858 ASSERT(len <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
1859
1860 extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
1861 if (extra == 0 || extra > sizeof(tb) ||
1862 (len + extra) > PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp)) {
1863 return 0;
1864 }
1865
1866 /* generate random bytes once per TRAILERS_REGEN packets (approx.) */
1867 if (regen++ == TRAILERS_REGEN) {
1868 read_frandom(&tb[0], sizeof(tb));
1869 regen = 0;
1870 }
1871
1872 bcopy(&tb[0], (baddr + len), extra);
1873
1874 /* recompute partial sum (also to exercise related logic) */
1875 pkt->pkt_csum_flags |= PACKET_CSUM_PARTIAL;
1876 pkt->pkt_csum_rx_value = (uint16_t)__packet_cksum((baddr + start),
1877 ((len + extra) - start), 0);
1878 pkt->pkt_csum_rx_start_off = start;
1879
1880 return extra;
1881 }
1882
1883 uint32_t
pkt_add_trailers_mbuf(struct mbuf * m,const uint16_t start)1884 pkt_add_trailers_mbuf(struct mbuf *m, const uint16_t start)
1885 {
1886 uint32_t extra;
1887
1888 extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
1889 if (extra == 0 || extra > sizeof(tb)) {
1890 return 0;
1891 }
1892
1893 if (mbuf_copyback(m, m_pktlen(m), extra, &tb[0], M_NOWAIT) != 0) {
1894 return 0;
1895 }
1896
1897 /* generate random bytes once per TRAILERS_REGEN packets (approx.) */
1898 if (regen++ == TRAILERS_REGEN) {
1899 read_frandom(&tb[0], sizeof(tb));
1900 regen = 0;
1901 }
1902
1903 /* recompute partial sum (also to exercise related logic) */
1904 m->m_pkthdr.csum_rx_val = m_sum16(m, start, (m_pktlen(m) - start));
1905 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1906 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
1907 m->m_pkthdr.csum_rx_start = start;
1908
1909 return extra;
1910 }
1911 #endif /* DEBUG || DEVELOPMENT */
1912
1913 void
pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint16_t len,uint32_t * partial,boolean_t do_csum)1914 pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
1915 uint16_t doff, uint16_t len, uint32_t *partial, boolean_t do_csum)
1916 {
1917 VERIFY(_pkt_copypkt_sum(sph, soff, dph, doff, len, partial, do_csum));
1918 }
1919
1920 uint32_t
pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)1921 pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
1922 uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
1923 {
1924 return _pkt_copyaddr_sum(sph, soff, dbaddr, len, do_csum, initial_sum, odd_start);
1925 }
1926
1927 uint32_t
pkt_mcopypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint16_t len,boolean_t do_cscum)1928 pkt_mcopypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
1929 uint16_t len, boolean_t do_cscum)
1930 {
1931 return m_copypkt_sum(m, soff, dph, doff, len, do_cscum);
1932 }
1933
1934 void
pkt_copy(void * src,void * dst,size_t len)1935 pkt_copy(void *src, void *dst, size_t len)
1936 {
1937 return _pkt_copy(src, dst, len);
1938 }
1939