1 /*
2 * Copyright (c) 2017-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <skywalk/os_skywalk_private.h>
30 #include <machine/endian.h>
31 #include <net/necp.h>
32
33 #if (DEVELOPMENT || DEBUG)
34 SYSCTL_NODE(_kern_skywalk, OID_AUTO, packet,
35 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Skywalk packet");
36 int pkt_trailers = 0; /* for testing trailing bytes */
37 SYSCTL_INT(_kern_skywalk_packet, OID_AUTO, trailers,
38 CTLFLAG_RW | CTLFLAG_LOCKED, &pkt_trailers, 0, "");
39 #endif /* !DEVELOPMENT && !DEBUG */
40
41
42 __attribute__((always_inline))
43 static inline void
_pkt_copy(void * src,void * dst,size_t len)44 _pkt_copy(void *src, void *dst, size_t len)
45 {
46 if (__probable(IS_P2ALIGNED(src, 8) && IS_P2ALIGNED(dst, 8))) {
47 switch (len) {
48 case 20: /* standard IPv4 header */
49 sk_copy64_20(src, dst);
50 return;
51
52 case 40: /* IPv6 header */
53 sk_copy64_40(src, dst);
54 return;
55
56 default:
57 if (IS_P2ALIGNED(len, 64)) {
58 sk_copy64_64x(src, dst, len);
59 return;
60 } else if (IS_P2ALIGNED(len, 32)) {
61 sk_copy64_32x(src, dst, len);
62 return;
63 } else if (IS_P2ALIGNED(len, 8)) {
64 sk_copy64_8x(src, dst, len);
65 return;
66 } else if (IS_P2ALIGNED(len, 4)) {
67 sk_copy64_4x(src, dst, len);
68 return;
69 }
70 break;
71 }
72 }
73 bcopy(src, dst, len);
74 }
75
76 /*
77 * This routine is used for copying data across two kernel packets.
78 * Can also optionally compute 16-bit partial inet checksum as the
79 * data is copied.
80 * This routine is used by flowswitch while copying packet from vp
81 * adapter pool to packet in native netif pool and vice-a-versa.
82 *
83 * start/stuff is relative to soff, within [0, len], such that
84 * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
85 */
86 void
pkt_copy_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)87 pkt_copy_from_pkt(const enum txrx t, kern_packet_t dph, const uint16_t doff,
88 kern_packet_t sph, const uint16_t soff, const uint32_t len,
89 const boolean_t copysum, const uint16_t start, const uint16_t stuff,
90 const boolean_t invert)
91 {
92 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
93 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
94 uint32_t partial;
95 uint16_t csum = 0;
96 uint8_t *sbaddr, *dbaddr;
97 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
98
99 _CASSERT(sizeof(csum) == sizeof(uint16_t));
100
101 /* get buffer address from packet */
102 MD_BUFLET_ADDR_ABS(spkt, sbaddr);
103 ASSERT(sbaddr != NULL);
104 sbaddr += soff;
105 MD_BUFLET_ADDR_ABS(dpkt, dbaddr);
106 ASSERT(dbaddr != NULL);
107 dbaddr += doff;
108 VERIFY((doff + len) <= PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
109
110 switch (t) {
111 case NR_RX:
112 dpkt->pkt_csum_flags = 0;
113 if (__probable(do_sum)) {
114 /*
115 * Use pkt_copy() to copy the portion up to the
116 * point where we need to start the checksum, and
117 * copy the remainder, checksumming as we go.
118 */
119 if (__probable(start != 0)) {
120 _pkt_copy(sbaddr, dbaddr, start);
121 }
122 partial = __packet_copy_and_sum((sbaddr + start),
123 (dbaddr + start), (len - start), 0);
124 csum = __packet_fold_sum(partial);
125
126 __packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
127 start, csum, FALSE);
128 } else {
129 _pkt_copy(sbaddr, dbaddr, len);
130 dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
131 dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
132 dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
133 }
134
135 SK_DF(SK_VERB_COPY | SK_VERB_RX,
136 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
137 sk_proc_name_address(current_proc()),
138 sk_proc_pid(current_proc()), len,
139 (copysum ? (len - start) : 0), csum, start);
140 SK_DF(SK_VERB_COPY | SK_VERB_RX,
141 " pkt 0x%llx doff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
142 SK_KVA(dpkt), doff, dpkt->pkt_csum_flags,
143 (uint32_t)dpkt->pkt_csum_rx_start_off,
144 (uint32_t)dpkt->pkt_csum_rx_value);
145 break;
146
147 case NR_TX:
148 if (__probable(copysum)) {
149 /*
150 * Use pkt_copy() to copy the portion up to the
151 * point where we need to start the checksum, and
152 * copy the remainder, checksumming as we go.
153 */
154 if (__probable(start != 0)) {
155 _pkt_copy(sbaddr, dbaddr, start);
156 }
157 partial = __packet_copy_and_sum((sbaddr + start),
158 (dbaddr + start), (len - start), 0);
159 csum = __packet_fold_sum_final(partial);
160
161 /* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
162 if (csum == 0 && invert) {
163 csum = 0xffff;
164 }
165
166 /* Insert checksum into packet */
167 ASSERT(stuff <= (len - sizeof(csum)));
168 if (IS_P2ALIGNED(dbaddr + stuff, sizeof(csum))) {
169 *(uint16_t *)(uintptr_t)(dbaddr + stuff) = csum;
170 } else {
171 bcopy((void *)&csum, dbaddr + stuff,
172 sizeof(csum));
173 }
174 } else {
175 _pkt_copy(sbaddr, dbaddr, len);
176 }
177 dpkt->pkt_csum_flags = spkt->pkt_csum_flags & (PACKET_CSUM_TSO_FLAGS);
178 dpkt->pkt_csum_tx_start_off = 0;
179 dpkt->pkt_csum_tx_stuff_off = 0;
180
181 SK_DF(SK_VERB_COPY | SK_VERB_TX,
182 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
183 sk_proc_name_address(current_proc()),
184 sk_proc_pid(current_proc()), len,
185 (copysum ? (len - start) : 0), csum, start);
186 break;
187
188 default:
189 VERIFY(0);
190 /* NOTREACHED */
191 __builtin_unreachable();
192 }
193 METADATA_ADJUST_LEN(dpkt, len, doff);
194
195 SK_DF(SK_VERB_COPY | SK_VERB_DUMP, "%s(%d) %s %s",
196 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
197 (t == NR_RX) ? "RX" : "TX",
198 sk_dump("buf", dbaddr, len, 128, NULL, 0));
199 }
200
201 /*
202 * NOTE: soff is the offset within the packet
203 * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
204 * caller is responsible for further reducing it to 16-bit if needed,
205 * as well as to perform the final 1's complement on it.
206 */
207 uint32_t static inline
_pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)208 _pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
209 uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
210 {
211 uint8_t odd = 0;
212 uint8_t *sbaddr = NULL;
213 uint32_t sum = initial_sum, partial;
214 uint32_t len0 = len;
215 boolean_t needs_swap, started_on_odd = FALSE;
216 uint16_t clen, sboff, sblen, sbcnt, off0 = soff;
217 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
218 kern_buflet_t sbuf = NULL, sbufp = NULL;
219
220 sbcnt = __packet_get_buflet_count(sph);
221
222 if (odd_start) {
223 started_on_odd = *odd_start;
224 }
225
226 /* fastpath (copy+sum, single buflet, even aligned, even length) */
227 if (do_csum && sbcnt == 1 && len != 0) {
228 PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
229 ASSERT(sbuf != NULL);
230 sboff = __buflet_get_data_offset(sbuf);
231 sblen = __buflet_get_data_length(sbuf);
232 ASSERT(sboff <= soff);
233 ASSERT(soff < sboff + sblen);
234 sblen -= (soff - sboff);
235 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
236
237 clen = (uint16_t)MIN(len, sblen);
238
239 if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
240 sum = __packet_copy_and_sum(sbaddr, dbaddr, clen, sum);
241 return __packet_fold_sum(sum);
242 }
243
244 sbaddr = NULL;
245 sbuf = sbufp = NULL;
246 }
247
248 while (len != 0) {
249 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
250 if (__improbable(sbuf == NULL)) {
251 panic("%s: bad packet, 0x%llx [off %d, len %d]",
252 __func__, SK_KVA(spkt), off0, len0);
253 /* NOTREACHED */
254 __builtin_unreachable();
255 }
256 sbufp = sbuf;
257 sboff = __buflet_get_data_offset(sbuf);
258 sblen = __buflet_get_data_length(sbuf);
259 ASSERT((sboff <= soff) && (soff < sboff + sblen));
260 sblen -= (soff - sboff);
261 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
262 soff = 0;
263 clen = (uint16_t)MIN(len, sblen);
264 if (__probable(do_csum)) {
265 partial = 0;
266 if (__improbable((uintptr_t)sbaddr & 1)) {
267 /* Align on word boundary */
268 started_on_odd = !started_on_odd;
269 #if BYTE_ORDER == LITTLE_ENDIAN
270 partial = (uint8_t)*sbaddr << 8;
271 #else /* BYTE_ORDER != LITTLE_ENDIAN */
272 partial = (uint8_t)*sbaddr;
273 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
274 *dbaddr++ = *sbaddr++;
275 sblen -= 1;
276 clen -= 1;
277 len -= 1;
278 }
279 needs_swap = started_on_odd;
280
281 odd = clen & 1u;
282 clen -= odd;
283
284 if (clen != 0) {
285 partial = __packet_copy_and_sum(sbaddr, dbaddr,
286 clen, partial);
287 }
288
289 if (__improbable(partial & 0xc0000000)) {
290 if (needs_swap) {
291 partial = (partial << 8) +
292 (partial >> 24);
293 }
294 sum += (partial >> 16);
295 sum += (partial & 0xffff);
296 partial = 0;
297 }
298 } else {
299 _pkt_copy(sbaddr, dbaddr, clen);
300 }
301
302 dbaddr += clen;
303 sbaddr += clen;
304
305 if (__probable(do_csum)) {
306 if (odd != 0) {
307 #if BYTE_ORDER == LITTLE_ENDIAN
308 partial += (uint8_t)*sbaddr;
309 #else /* BYTE_ORDER != LITTLE_ENDIAN */
310 partial += (uint8_t)*sbaddr << 8;
311 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
312 *dbaddr++ = *sbaddr++;
313 started_on_odd = !started_on_odd;
314 }
315
316 if (needs_swap) {
317 partial = (partial << 8) + (partial >> 24);
318 }
319 sum += (partial >> 16) + (partial & 0xffff);
320 /*
321 * Reduce sum to allow potential byte swap
322 * in the next iteration without carry.
323 */
324 sum = (sum >> 16) + (sum & 0xffff);
325 }
326
327 sblen -= clen + odd;
328 len -= clen + odd;
329 ASSERT(sblen == 0 || len == 0);
330 }
331
332 if (odd_start) {
333 *odd_start = started_on_odd;
334 }
335
336 if (__probable(do_csum)) {
337 /* Final fold (reduce 32-bit to 16-bit) */
338 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
339 sum = (sum >> 16) + (sum & 0xffff);
340 }
341 return sum;
342 }
343
344 /*
345 * NOTE: Caller of this function is responsible to adjust the length and offset
346 * of the first buflet of the destination packet if (doff != 0),
347 * i.e. additional data is being prependend to the packet.
348 * It should also finalize the packet.
349 * To simplify & optimize the routine, we have also assumed that soff & doff
350 * will lie within the first buffer, which is true for the current use cases
351 * where, doff is the offset of the checksum field in the TCP/IP header and
352 * soff is the L3 offset.
353 * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
354 * caller is responsible for further reducing it to 16-bit if needed,
355 * as well as to perform the final 1's complement on it.
356 */
357 static inline boolean_t
_pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint32_t len,uint32_t * csum_partial,boolean_t do_csum)358 _pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
359 uint16_t doff, uint32_t len, uint32_t *csum_partial, boolean_t do_csum)
360 {
361 uint8_t odd = 0;
362 uint32_t sum = 0, partial;
363 boolean_t needs_swap, started_on_odd = FALSE;
364 uint8_t *sbaddr = NULL, *dbaddr = NULL;
365 uint16_t clen, sblen, dlen0, dlim, sbcnt, dbcnt, sboff;
366 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
367 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
368 kern_buflet_t sbuf = NULL, sbufp = NULL, dbuf = NULL, dbufp = NULL;
369
370 ASSERT(csum_partial != NULL || !do_csum);
371 sbcnt = __packet_get_buflet_count(sph);
372 dbcnt = __packet_get_buflet_count(dph);
373
374 while (len != 0) {
375 ASSERT(sbaddr == NULL || dbaddr == NULL);
376 if (sbaddr == NULL) {
377 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
378 if (__improbable(sbuf == NULL)) {
379 break;
380 }
381 sbufp = sbuf;
382 sblen = __buflet_get_data_length(sbuf);
383 sboff = __buflet_get_data_offset(sbuf);
384 ASSERT(soff >= sboff);
385 ASSERT(sboff + sblen > soff);
386 sblen -= (soff - sboff);
387 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
388 soff = 0;
389 }
390
391 if (dbaddr == NULL) {
392 if (dbufp != NULL) {
393 __buflet_set_data_length(dbufp, dlen0);
394 }
395
396 PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
397 if (__improbable(dbuf == NULL)) {
398 break;
399 }
400 dbufp = dbuf;
401 dlim = __buflet_get_data_limit(dbuf);
402 ASSERT(dlim > doff);
403 dlim -= doff;
404 if (doff != 0) {
405 VERIFY(__buflet_set_data_offset(dbuf, doff) == 0);
406 }
407 dbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(dbuf) + doff);
408 dlen0 = dlim;
409 doff = 0;
410 }
411
412 clen = (uint16_t)MIN(len, sblen);
413 clen = MIN(clen, dlim);
414
415 if (__probable(do_csum)) {
416 partial = 0;
417 if (__improbable((uintptr_t)sbaddr & 1)) {
418 /* Align on word boundary */
419 started_on_odd = !started_on_odd;
420 #if BYTE_ORDER == LITTLE_ENDIAN
421 partial = (uint8_t)*sbaddr << 8;
422 #else /* BYTE_ORDER != LITTLE_ENDIAN */
423 partial = (uint8_t)*sbaddr;
424 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
425 *dbaddr++ = *sbaddr++;
426 clen -= 1;
427 dlim -= 1;
428 len -= 1;
429 }
430 needs_swap = started_on_odd;
431
432 odd = clen & 1u;
433 clen -= odd;
434
435 if (clen != 0) {
436 partial = __packet_copy_and_sum(sbaddr, dbaddr,
437 clen, partial);
438 }
439
440 if (__improbable(partial & 0xc0000000)) {
441 if (needs_swap) {
442 partial = (partial << 8) +
443 (partial >> 24);
444 }
445 sum += (partial >> 16);
446 sum += (partial & 0xffff);
447 partial = 0;
448 }
449 } else {
450 _pkt_copy(sbaddr, dbaddr, clen);
451 }
452 sbaddr += clen;
453 dbaddr += clen;
454
455 if (__probable(do_csum)) {
456 if (odd != 0) {
457 #if BYTE_ORDER == LITTLE_ENDIAN
458 partial += (uint8_t)*sbaddr;
459 #else /* BYTE_ORDER != LITTLE_ENDIAN */
460 partial += (uint8_t)*sbaddr << 8;
461 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
462 *dbaddr++ = *sbaddr++;
463 started_on_odd = !started_on_odd;
464 }
465
466 if (needs_swap) {
467 partial = (partial << 8) + (partial >> 24);
468 }
469 sum += (partial >> 16) + (partial & 0xffff);
470 /*
471 * Reduce sum to allow potential byte swap
472 * in the next iteration without carry.
473 */
474 sum = (sum >> 16) + (sum & 0xffff);
475 }
476
477 sblen -= clen + odd;
478 dlim -= clen + odd;
479 len -= clen + odd;
480
481 if (sblen == 0) {
482 sbaddr = NULL;
483 }
484
485 if (dlim == 0) {
486 dbaddr = NULL;
487 }
488 }
489
490 if (__probable(dbuf != NULL)) {
491 __buflet_set_data_length(dbuf, (dlen0 - dlim));
492 }
493 if (__probable(do_csum)) {
494 /* Final fold (reduce 32-bit to 16-bit) */
495 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
496 sum = (sum >> 16) + (sum & 0xffff);
497 *csum_partial = (uint32_t)sum;
498 }
499 return len == 0;
500 }
501
502 uint32_t
pkt_sum(kern_packet_t sph,uint16_t soff,uint16_t len)503 pkt_sum(kern_packet_t sph, uint16_t soff, uint16_t len)
504 {
505 uint8_t odd = 0;
506 uint32_t sum = 0, partial;
507 boolean_t needs_swap, started_on_odd = FALSE;
508 uint8_t *sbaddr = NULL;
509 uint16_t clen, sblen, sbcnt, sboff;
510 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
511 kern_buflet_t sbuf = NULL, sbufp = NULL;
512
513 sbcnt = __packet_get_buflet_count(sph);
514
515 /* fastpath (single buflet, even aligned, even length) */
516 if (sbcnt == 1 && len != 0) {
517 PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
518 ASSERT(sbuf != NULL);
519 sblen = __buflet_get_data_length(sbuf);
520 sboff = __buflet_get_data_offset(sbuf);
521 ASSERT(soff >= sboff);
522 ASSERT(sboff + sblen > soff);
523 sblen -= (soff - sboff);
524 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
525
526 clen = MIN(len, sblen);
527
528 if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
529 sum = __packet_cksum(sbaddr, clen, 0);
530 return __packet_fold_sum(sum);
531 }
532
533 sbaddr = NULL;
534 sbuf = sbufp = NULL;
535 }
536
537 /* slowpath */
538 while (len != 0) {
539 ASSERT(sbaddr == NULL);
540 if (sbaddr == NULL) {
541 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
542 if (__improbable(sbuf == NULL)) {
543 break;
544 }
545 sbufp = sbuf;
546 sblen = __buflet_get_data_length(sbuf);
547 sboff = __buflet_get_data_offset(sbuf);
548 ASSERT(soff >= sboff);
549 ASSERT(sboff + sblen > soff);
550 sblen -= (soff - sboff);
551 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
552 soff = 0;
553 }
554
555 clen = MIN(len, sblen);
556
557 partial = 0;
558 if (__improbable((uintptr_t)sbaddr & 1)) {
559 /* Align on word boundary */
560 started_on_odd = !started_on_odd;
561 #if BYTE_ORDER == LITTLE_ENDIAN
562 partial = (uint8_t)*sbaddr << 8;
563 #else /* BYTE_ORDER != LITTLE_ENDIAN */
564 partial = (uint8_t)*sbaddr;
565 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
566 clen -= 1;
567 len -= 1;
568 }
569 needs_swap = started_on_odd;
570
571 odd = clen & 1u;
572 clen -= odd;
573
574 if (clen != 0) {
575 partial = __packet_cksum(sbaddr,
576 clen, partial);
577 }
578
579 if (__improbable(partial & 0xc0000000)) {
580 if (needs_swap) {
581 partial = (partial << 8) +
582 (partial >> 24);
583 }
584 sum += (partial >> 16);
585 sum += (partial & 0xffff);
586 partial = 0;
587 }
588 sbaddr += clen;
589
590 if (odd != 0) {
591 #if BYTE_ORDER == LITTLE_ENDIAN
592 partial += (uint8_t)*sbaddr;
593 #else /* BYTE_ORDER != LITTLE_ENDIAN */
594 partial += (uint8_t)*sbaddr << 8;
595 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
596 started_on_odd = !started_on_odd;
597 }
598
599 if (needs_swap) {
600 partial = (partial << 8) + (partial >> 24);
601 }
602 sum += (partial >> 16) + (partial & 0xffff);
603 /*
604 * Reduce sum to allow potential byte swap
605 * in the next iteration without carry.
606 */
607 sum = (sum >> 16) + (sum & 0xffff);
608
609 sblen -= clen + odd;
610 len -= clen + odd;
611
612 if (sblen == 0) {
613 sbaddr = NULL;
614 }
615 }
616
617 /* Final fold (reduce 32-bit to 16-bit) */
618 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
619 sum = (sum >> 16) + (sum & 0xffff);
620 return (uint32_t)sum;
621 }
622
623
624 /*
625 * This is a multi-buflet variant of pkt_copy_from_pkt().
626 *
627 * start/stuff is relative to soff, within [0, len], such that
628 * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
629 */
630 void
pkt_copy_multi_buflet_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)631 pkt_copy_multi_buflet_from_pkt(const enum txrx t, kern_packet_t dph,
632 const uint16_t doff, kern_packet_t sph, const uint16_t soff,
633 const uint32_t len, const boolean_t copysum, const uint16_t start,
634 const uint16_t stuff, const boolean_t invert)
635 {
636 boolean_t rc;
637 uint32_t partial;
638 uint16_t csum = 0;
639 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
640 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
641 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
642
643 VERIFY((doff + len) <= (PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp) *
644 __packet_get_buflet_count(dph)));
645
646 switch (t) {
647 case NR_RX:
648 dpkt->pkt_csum_flags = 0;
649 if (__probable(do_sum)) {
650 /*
651 * copy the portion up to the point where we need to
652 * start the checksum, and copy the remainder,
653 * checksumming as we go.
654 */
655 if (__probable(start != 0)) {
656 rc = _pkt_copypkt_sum(sph, soff, dph, doff,
657 start, NULL, FALSE);
658 ASSERT(rc);
659 }
660 _pkt_copypkt_sum(sph, (soff + start), dph,
661 (doff + start), (len - start), &partial, TRUE);
662 csum = __packet_fold_sum(partial);
663 __packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
664 start, csum, FALSE);
665 METADATA_ADJUST_LEN(dpkt, start, doff);
666 } else {
667 rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
668 FALSE);
669 ASSERT(rc);
670 dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
671 dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
672 dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
673 }
674 break;
675
676 case NR_TX:
677 if (__probable(copysum)) {
678 uint8_t *baddr;
679 /*
680 * copy the portion up to the point where we need to
681 * start the checksum, and copy the remainder,
682 * checksumming as we go.
683 */
684 if (__probable(start != 0)) {
685 rc = _pkt_copypkt_sum(sph, soff, dph, doff,
686 start, NULL, FALSE);
687 ASSERT(rc);
688 }
689 rc = _pkt_copypkt_sum(sph, (soff + start), dph,
690 (doff + start), (len - start), &partial, TRUE);
691 ASSERT(rc);
692 csum = __packet_fold_sum_final(partial);
693
694 /* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
695 if (csum == 0 && invert) {
696 csum = 0xffff;
697 }
698
699 /*
700 * Insert checksum into packet.
701 * Here we assume that checksum will be in the
702 * first buffer.
703 */
704 ASSERT((stuff + doff + sizeof(csum)) <=
705 PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
706 ASSERT(stuff <= (len - sizeof(csum)));
707
708 /* get first buflet buffer address from packet */
709 MD_BUFLET_ADDR_ABS(dpkt, baddr);
710 ASSERT(baddr != NULL);
711 baddr += doff;
712 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
713 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
714 } else {
715 bcopy((void *)&csum, baddr + stuff,
716 sizeof(csum));
717 }
718 METADATA_ADJUST_LEN(dpkt, start, doff);
719 } else {
720 rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
721 FALSE);
722 ASSERT(rc);
723 }
724 dpkt->pkt_csum_flags = spkt->pkt_csum_flags & (PACKET_CSUM_TSO_FLAGS);
725 dpkt->pkt_csum_tx_start_off = 0;
726 dpkt->pkt_csum_tx_stuff_off = 0;
727 break;
728
729 default:
730 VERIFY(0);
731 /* NOTREACHED */
732 __builtin_unreachable();
733 }
734 }
735
736 /*
737 * This routine is used for copying an mbuf which originated in the host
738 * stack destined to a native skywalk interface (NR_TX), as well as for
739 * mbufs originating on compat network interfaces (NR_RX).
740 *
741 * start/stuff is relative to moff, within [0, len], such that
742 * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
743 */
744 void
pkt_copy_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)745 pkt_copy_from_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
746 struct mbuf *m, const uint16_t moff, const uint32_t len,
747 const boolean_t copysum, const uint16_t start)
748 {
749 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
750 uint32_t partial;
751 uint16_t csum = 0;
752 uint8_t *baddr;
753
754 _CASSERT(sizeof(csum) == sizeof(uint16_t));
755
756 /* get buffer address from packet */
757 MD_BUFLET_ADDR_ABS(pkt, baddr);
758 ASSERT(baddr != NULL);
759 baddr += poff;
760 VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
761
762 switch (t) {
763 case NR_RX:
764 pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
765 pkt->pkt_csum_rx_start_off = 0;
766 pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
767 pkt->pkt_svc_class = m_get_service_class(m);
768 if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
769 != CSUM_RX_FULL_FLAGS) && copysum)) {
770 /*
771 * Use m_copydata() to copy the portion up to the
772 * point where we need to start the checksum, and
773 * copy the remainder, checksumming as we go.
774 */
775 if (start != 0) {
776 m_copydata(m, moff, start, baddr);
777 }
778 partial = m_copydata_sum(m, start, (len - start),
779 (baddr + start), 0, NULL);
780 csum = __packet_fold_sum(partial);
781
782 __packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
783 start, csum, FALSE);
784 } else {
785 m_copydata(m, moff, len, baddr);
786 }
787 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
788 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
789 sk_proc_name_address(current_proc()),
790 sk_proc_pid(current_proc()), len,
791 (copysum ? (len - start) : 0), csum, start);
792 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
793 " mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
794 SK_KVA(m), m->m_pkthdr.csum_flags,
795 (uint32_t)m->m_pkthdr.csum_rx_start,
796 (uint32_t)m->m_pkthdr.csum_rx_val);
797 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
798 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
799 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
800 (uint32_t)pkt->pkt_csum_rx_start_off,
801 (uint32_t)pkt->pkt_csum_rx_value);
802 break;
803
804 case NR_TX:
805 if (__probable(copysum)) {
806 uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
807 /*
808 * Use m_copydata() to copy the portion up to the
809 * point where we need to start the checksum, and
810 * copy the remainder, checksumming as we go.
811 */
812 if (start != 0) {
813 m_copydata(m, moff, start, baddr);
814 }
815 partial = m_copydata_sum(m, start, (len - start),
816 (baddr + start), 0, NULL);
817 csum = __packet_fold_sum_final(partial);
818
819 /*
820 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
821 * ideally we'd only test for CSUM_ZERO_INVERT
822 * here, but catch cases where the originator
823 * did not set it for UDP.
824 */
825 if (csum == 0 && (m->m_pkthdr.csum_flags &
826 (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
827 csum = 0xffff;
828 }
829
830 /* Insert checksum into packet */
831 ASSERT(stuff <= (len - sizeof(csum)));
832 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
833 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
834 } else {
835 bcopy((void *)&csum, baddr + stuff,
836 sizeof(csum));
837 }
838 } else {
839 m_copydata(m, moff, len, baddr);
840 }
841 pkt->pkt_csum_flags = 0;
842 pkt->pkt_csum_tx_start_off = 0;
843 pkt->pkt_csum_tx_stuff_off = 0;
844
845 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
846 pkt->pkt_csum_flags |= PACKET_TSO_IPV4;
847 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
848 }
849 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
850 pkt->pkt_csum_flags |= PACKET_TSO_IPV6;
851 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
852 }
853
854 /* translate mbuf metadata */
855 pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
856 pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
857 pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
858 pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
859 pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
860 switch (m->m_pkthdr.pkt_proto) {
861 case IPPROTO_QUIC:
862 pkt->pkt_flow_ip_proto = IPPROTO_UDP;
863 pkt->pkt_transport_protocol = IPPROTO_QUIC;
864 break;
865
866 default:
867 pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
868 pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
869 break;
870 }
871 (void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
872 pkt->pkt_svc_class = m_get_service_class(m);
873 pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
874 pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
875 if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
876 pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
877 }
878 if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) != 0) {
879 pkt->pkt_pflags |= PKT_F_L4S;
880 }
881 necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
882 pkt->pkt_policy_id =
883 (uint32_t)necp_get_policy_id_from_packet(m);
884
885 if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
886 if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
887 __packet_set_tx_completion_data(ph,
888 m->m_pkthdr.drv_tx_compl_arg,
889 m->m_pkthdr.drv_tx_compl_data);
890 }
891 pkt->pkt_tx_compl_context =
892 m->m_pkthdr.pkt_compl_context;
893 pkt->pkt_tx_compl_callbacks =
894 m->m_pkthdr.pkt_compl_callbacks;
895 /*
896 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
897 * mbuf can no longer trigger a completion callback.
898 * callback will be invoked when the kernel packet is
899 * completed.
900 */
901 m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
902
903 m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
904 }
905
906 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
907 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
908 sk_proc_name_address(current_proc()),
909 sk_proc_pid(current_proc()), len,
910 (copysum ? (len - start) : 0), csum, start);
911 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
912 " mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
913 SK_KVA(m), m->m_pkthdr.csum_flags,
914 (uint32_t)m->m_pkthdr.csum_tx_start,
915 (uint32_t)m->m_pkthdr.csum_tx_stuff);
916 break;
917
918 default:
919 VERIFY(0);
920 /* NOTREACHED */
921 __builtin_unreachable();
922 }
923 METADATA_ADJUST_LEN(pkt, len, poff);
924
925 if (m->m_flags & M_BCAST) {
926 __packet_set_link_broadcast(ph);
927 } else if (m->m_flags & M_MCAST) {
928 __packet_set_link_multicast(ph);
929 }
930
931 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
932 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
933 (t == NR_RX) ? "RX" : "TX",
934 sk_dump("buf", baddr, len, 128, NULL, 0));
935 }
936
937 /*
938 * Like m_copydata_sum(), but works on a destination kernel packet.
939 */
940 static inline uint32_t
m_copypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint32_t len,boolean_t do_cscum)941 m_copypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
942 uint32_t len, boolean_t do_cscum)
943 {
944 boolean_t needs_swap, started_on_odd = FALSE;
945 int off0 = soff;
946 uint32_t len0 = len;
947 struct mbuf *m0 = m;
948 uint32_t sum = 0, partial;
949 unsigned count0, count, odd, mlen_copied;
950 uint8_t *sbaddr = NULL, *dbaddr = NULL;
951 uint16_t dlen0, dlim, dbcnt = __packet_get_buflet_count(dph);
952 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
953 kern_buflet_t dbuf = NULL, dbufp = NULL;
954
955 while (soff > 0) {
956 if (__improbable(m == NULL)) {
957 panic("%s: invalid mbuf chain %p [off %d, len %d]",
958 __func__, m0, off0, len0);
959 /* NOTREACHED */
960 __builtin_unreachable();
961 }
962 if (soff < m->m_len) {
963 break;
964 }
965 soff -= m->m_len;
966 m = m->m_next;
967 }
968
969 if (__improbable(m == NULL)) {
970 panic("%s: invalid mbuf chain %p [off %d, len %d]",
971 __func__, m0, off0, len0);
972 /* NOTREACHED */
973 __builtin_unreachable();
974 }
975
976 sbaddr = mtod(m, uint8_t *) + soff;
977 count = m->m_len - soff;
978 mlen_copied = 0;
979
980 while (len != 0) {
981 ASSERT(sbaddr == NULL || dbaddr == NULL);
982 if (sbaddr == NULL) {
983 soff = 0;
984 m = m->m_next;
985 if (__improbable(m == NULL)) {
986 panic("%s: invalid mbuf chain %p [off %d, "
987 "len %d]", __func__, m0, off0, len0);
988 /* NOTREACHED */
989 __builtin_unreachable();
990 }
991 sbaddr = mtod(m, uint8_t *);
992 count = m->m_len;
993 mlen_copied = 0;
994 }
995
996 if (__improbable(count == 0)) {
997 sbaddr = NULL;
998 continue;
999 }
1000
1001 if (dbaddr == NULL) {
1002 if (dbufp != NULL) {
1003 __buflet_set_data_length(dbufp, dlen0);
1004 }
1005
1006 PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
1007 if (__improbable(dbuf == NULL)) {
1008 panic("%s: mbuf too large %p [off %d, "
1009 "len %d]", __func__, m0, off0, len0);
1010 /* NOTREACHED */
1011 __builtin_unreachable();
1012 }
1013 dbufp = dbuf;
1014 dlim = __buflet_get_data_limit(dbuf) - doff;
1015 dbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(dbuf) + doff);
1016 dlen0 = dlim;
1017 doff = 0;
1018 }
1019
1020 count = MIN(count, (unsigned)len);
1021 count0 = count = MIN(count, dlim);
1022
1023 if (!do_cscum) {
1024 _pkt_copy(sbaddr, dbaddr, count);
1025 sbaddr += count;
1026 dbaddr += count;
1027 goto skip_csum;
1028 }
1029
1030 partial = 0;
1031 if ((uintptr_t)sbaddr & 1) {
1032 /* Align on word boundary */
1033 started_on_odd = !started_on_odd;
1034 #if BYTE_ORDER == LITTLE_ENDIAN
1035 partial = *sbaddr << 8;
1036 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1037 partial = *sbaddr;
1038 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1039 *dbaddr++ = *sbaddr++;
1040 count -= 1;
1041 }
1042
1043 needs_swap = started_on_odd;
1044 odd = count & 1u;
1045 count -= odd;
1046
1047 if (count) {
1048 partial = __packet_copy_and_sum(sbaddr,
1049 dbaddr, count, partial);
1050 sbaddr += count;
1051 dbaddr += count;
1052 if (__improbable(partial & 0xc0000000)) {
1053 if (needs_swap) {
1054 partial = (partial << 8) +
1055 (partial >> 24);
1056 }
1057 sum += (partial >> 16);
1058 sum += (partial & 0xffff);
1059 partial = 0;
1060 }
1061 }
1062
1063 if (odd) {
1064 #if BYTE_ORDER == LITTLE_ENDIAN
1065 partial += *sbaddr;
1066 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1067 partial += *sbaddr << 8;
1068 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1069 *dbaddr++ = *sbaddr++;
1070 started_on_odd = !started_on_odd;
1071 }
1072
1073 if (needs_swap) {
1074 partial = (partial << 8) + (partial >> 24);
1075 }
1076 sum += (partial >> 16) + (partial & 0xffff);
1077 /*
1078 * Reduce sum to allow potential byte swap
1079 * in the next iteration without carry.
1080 */
1081 sum = (sum >> 16) + (sum & 0xffff);
1082
1083 skip_csum:
1084 dlim -= count0;
1085 len -= count0;
1086 mlen_copied += count0;
1087
1088 if (dlim == 0) {
1089 dbaddr = NULL;
1090 }
1091
1092 count = m->m_len - soff - mlen_copied;
1093 if (count == 0) {
1094 sbaddr = NULL;
1095 }
1096 }
1097
1098 ASSERT(len == 0);
1099 ASSERT(dbuf != NULL);
1100 __buflet_set_data_length(dbuf, (dlen0 - dlim));
1101
1102 if (!do_cscum) {
1103 return 0;
1104 }
1105
1106 /* Final fold (reduce 32-bit to 16-bit) */
1107 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
1108 sum = (sum >> 16) + (sum & 0xffff);
1109 return sum;
1110 }
1111
1112 /*
1113 * This is a multi-buflet variant of pkt_copy_from_mbuf().
1114 *
1115 * start/stuff is relative to moff, within [0, len], such that
1116 * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
1117 */
1118 void
pkt_copy_multi_buflet_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1119 pkt_copy_multi_buflet_from_mbuf(const enum txrx t, kern_packet_t ph,
1120 const uint16_t poff, struct mbuf *m, const uint16_t moff,
1121 const uint32_t len, const boolean_t copysum, const uint16_t start)
1122 {
1123 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1124 uint32_t partial;
1125 uint16_t csum = 0;
1126 uint8_t *baddr;
1127
1128 _CASSERT(sizeof(csum) == sizeof(uint16_t));
1129
1130 /* get buffer address from packet */
1131 MD_BUFLET_ADDR_ABS(pkt, baddr);
1132 ASSERT(baddr != NULL);
1133 baddr += poff;
1134 VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp) *
1135 __packet_get_buflet_count(ph)));
1136
1137 switch (t) {
1138 case NR_RX:
1139 pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
1140 pkt->pkt_csum_rx_start_off = 0;
1141 pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
1142 pkt->pkt_svc_class = m_get_service_class(m);
1143 if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
1144 != CSUM_RX_FULL_FLAGS) && copysum)) {
1145 /*
1146 * Use m_copydata() to copy the portion up to the
1147 * point where we need to start the checksum, and
1148 * copy the remainder, checksumming as we go.
1149 */
1150 if (start != 0) {
1151 m_copydata(m, moff, start, baddr);
1152 }
1153 partial = m_copypkt_sum(m, start, ph, (poff + start),
1154 (len - start), TRUE);
1155 csum = __packet_fold_sum(partial);
1156 __packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
1157 start, csum, FALSE);
1158 METADATA_ADJUST_LEN(pkt, start, poff);
1159 } else {
1160 (void) m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1161 }
1162 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1163 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1164 sk_proc_name_address(current_proc()),
1165 sk_proc_pid(current_proc()), len,
1166 (copysum ? (len - start) : 0), csum, start);
1167 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1168 " mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
1169 SK_KVA(m), m->m_pkthdr.csum_flags,
1170 (uint32_t)m->m_pkthdr.csum_rx_start,
1171 (uint32_t)m->m_pkthdr.csum_rx_val);
1172 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1173 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1174 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1175 (uint32_t)pkt->pkt_csum_rx_start_off,
1176 (uint32_t)pkt->pkt_csum_rx_value);
1177 break;
1178
1179 case NR_TX:
1180 if (__probable(copysum)) {
1181 uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
1182 /*
1183 * Use m_copydata() to copy the portion up to the
1184 * point where we need to start the checksum, and
1185 * copy the remainder, checksumming as we go.
1186 */
1187 if (start != 0) {
1188 m_copydata(m, moff, start, baddr);
1189 }
1190 partial = m_copypkt_sum(m, start, ph, (poff + start),
1191 (len - start), TRUE);
1192 csum = __packet_fold_sum_final(partial);
1193
1194 /*
1195 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
1196 * ideally we'd only test for CSUM_ZERO_INVERT
1197 * here, but catch cases where the originator
1198 * did not set it for UDP.
1199 */
1200 if (csum == 0 && (m->m_pkthdr.csum_flags &
1201 (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
1202 csum = 0xffff;
1203 }
1204
1205 /* Insert checksum into packet */
1206 ASSERT(stuff <= (len - sizeof(csum)));
1207 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
1208 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
1209 } else {
1210 bcopy((void *)&csum, baddr + stuff,
1211 sizeof(csum));
1212 }
1213 METADATA_ADJUST_LEN(pkt, start, poff);
1214 } else {
1215 m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1216 }
1217 pkt->pkt_csum_flags = 0;
1218 pkt->pkt_csum_tx_start_off = 0;
1219 pkt->pkt_csum_tx_stuff_off = 0;
1220
1221 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
1222 pkt->pkt_csum_flags |= PACKET_TSO_IPV4;
1223 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
1224 }
1225 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
1226 pkt->pkt_csum_flags |= PACKET_TSO_IPV6;
1227 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
1228 }
1229
1230 /* translate mbuf metadata */
1231 pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
1232 pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
1233 pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
1234 pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
1235 pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
1236 switch (m->m_pkthdr.pkt_proto) {
1237 case IPPROTO_QUIC:
1238 pkt->pkt_flow_ip_proto = IPPROTO_UDP;
1239 pkt->pkt_transport_protocol = IPPROTO_QUIC;
1240 break;
1241
1242 default:
1243 pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
1244 pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
1245 break;
1246 }
1247 (void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
1248 pkt->pkt_svc_class = m_get_service_class(m);
1249 pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
1250 pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
1251 if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
1252 pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
1253 }
1254 if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) != 0) {
1255 pkt->pkt_pflags |= PKT_F_L4S;
1256 }
1257 necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
1258 pkt->pkt_policy_id =
1259 (uint32_t)necp_get_policy_id_from_packet(m);
1260
1261 if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
1262 if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
1263 __packet_set_tx_completion_data(ph,
1264 m->m_pkthdr.drv_tx_compl_arg,
1265 m->m_pkthdr.drv_tx_compl_data);
1266 }
1267 pkt->pkt_tx_compl_context =
1268 m->m_pkthdr.pkt_compl_context;
1269 pkt->pkt_tx_compl_callbacks =
1270 m->m_pkthdr.pkt_compl_callbacks;
1271 /*
1272 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
1273 * mbuf can no longer trigger a completion callback.
1274 * callback will be invoked when the kernel packet is
1275 * completed.
1276 */
1277 m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
1278
1279 m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
1280 }
1281
1282 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1283 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1284 sk_proc_name_address(current_proc()),
1285 sk_proc_pid(current_proc()), len,
1286 (copysum ? (len - start) : 0), csum, start);
1287 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1288 " mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
1289 SK_KVA(m), m->m_pkthdr.csum_flags,
1290 (uint32_t)m->m_pkthdr.csum_tx_start,
1291 (uint32_t)m->m_pkthdr.csum_tx_stuff);
1292 break;
1293
1294 default:
1295 VERIFY(0);
1296 /* NOTREACHED */
1297 __builtin_unreachable();
1298 }
1299
1300 if (m->m_flags & M_BCAST) {
1301 __packet_set_link_broadcast(ph);
1302 } else if (m->m_flags & M_MCAST) {
1303 __packet_set_link_multicast(ph);
1304 }
1305
1306 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1307 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1308 (t == NR_RX) ? "RX" : "TX",
1309 sk_dump("buf", baddr, len, 128, NULL, 0));
1310 }
1311
1312 /*
1313 * This routine is used for copying from a packet originating from a native
1314 * skywalk interface to an mbuf destined for the host legacy stack (NR_RX),
1315 * as well as for mbufs destined for the compat network interfaces (NR_TX).
1316 *
1317 * Note that this routine does not alter m_data pointer of the mbuf, as the
1318 * caller may want to use the original value upon return. We do, however,
1319 * adjust the length to reflect the total data span.
1320 *
1321 * This routine supports copying into an mbuf chain for RX but not TX.
1322 *
1323 * start/stuff is relative to poff, within [0, len], such that
1324 * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1325 */
1326 void
pkt_copy_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1327 pkt_copy_to_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
1328 struct mbuf *m, const uint16_t moff, const uint32_t len,
1329 const boolean_t copysum, const uint16_t start)
1330 {
1331 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1332 struct mbuf *curr_m;
1333 uint32_t partial = 0;
1334 uint32_t remaining_len = len, copied_len = 0;
1335 uint16_t csum = 0;
1336 uint8_t *baddr;
1337 uint8_t *dp;
1338 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1339
1340 ASSERT(len >= start);
1341 _CASSERT(sizeof(csum) == sizeof(uint16_t));
1342
1343 /* get buffer address from packet */
1344 MD_BUFLET_ADDR_ABS(pkt, baddr);
1345 ASSERT(baddr != NULL);
1346 baddr += poff;
1347 VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
1348
1349 ASSERT((m->m_flags & M_PKTHDR));
1350 m->m_data += moff;
1351
1352 switch (t) {
1353 case NR_RX:
1354 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1355
1356 /*
1357 * Use pkt_copy() to copy the portion up to the
1358 * point where we need to start the checksum, and
1359 * copy the remainder, checksumming as we go.
1360 */
1361 if (__probable(do_sum && start != 0)) {
1362 ASSERT(M_TRAILINGSPACE(m) >= start);
1363 ASSERT(m->m_len == 0);
1364 dp = (uint8_t *)m->m_data;
1365 _pkt_copy(baddr, dp, start);
1366 remaining_len -= start;
1367 copied_len += start;
1368 m->m_len += start;
1369 m->m_pkthdr.len += start;
1370 }
1371 curr_m = m;
1372 while (curr_m != NULL && remaining_len != 0) {
1373 uint32_t tmp_len = MIN(remaining_len,
1374 (uint32_t)M_TRAILINGSPACE(curr_m));
1375 dp = (uint8_t *)curr_m->m_data + curr_m->m_len;
1376 if (__probable(do_sum)) {
1377 partial = __packet_copy_and_sum((baddr + copied_len),
1378 dp, tmp_len, partial);
1379 } else {
1380 _pkt_copy((baddr + copied_len), dp, tmp_len);
1381 }
1382
1383 curr_m->m_len += tmp_len;
1384 m->m_pkthdr.len += tmp_len;
1385 copied_len += tmp_len;
1386 remaining_len -= tmp_len;
1387 curr_m = curr_m->m_next;
1388 }
1389 ASSERT(remaining_len == 0);
1390
1391 if (__probable(do_sum)) {
1392 csum = __packet_fold_sum(partial);
1393
1394 m->m_pkthdr.csum_flags |=
1395 (CSUM_DATA_VALID | CSUM_PARTIAL);
1396 m->m_pkthdr.csum_rx_start = start;
1397 m->m_pkthdr.csum_rx_val = csum;
1398 } else {
1399 m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1400 m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1401 _CASSERT(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1402 m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1403 if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1404 m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1405 }
1406 }
1407
1408 /* translate packet metadata */
1409 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1410 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1411
1412 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1413 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1414 sk_proc_name_address(current_proc()),
1415 sk_proc_pid(current_proc()), len,
1416 (copysum ? (len - start) : 0), csum, start);
1417 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1418 " mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1419 SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1420 (uint32_t)m->m_pkthdr.csum_rx_start,
1421 (uint32_t)m->m_pkthdr.csum_rx_val);
1422 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1423 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1424 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1425 (uint32_t)pkt->pkt_csum_rx_start_off,
1426 (uint32_t)pkt->pkt_csum_rx_value);
1427 break;
1428
1429 case NR_TX:
1430 dp = (uint8_t *)m->m_data;
1431 ASSERT(m->m_next == NULL);
1432
1433 VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1434 (uint32_t)mbuf_maxlen(m));
1435 m->m_len += len;
1436 m->m_pkthdr.len += len;
1437 VERIFY(m->m_len == m->m_pkthdr.len &&
1438 (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1439
1440 if (__probable(copysum)) {
1441 uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1442 /*
1443 * Use pkt_copy() to copy the portion up to the
1444 * point where we need to start the checksum, and
1445 * copy the remainder, checksumming as we go.
1446 */
1447 if (__probable(start != 0)) {
1448 _pkt_copy(baddr, dp, start);
1449 }
1450 partial = __packet_copy_and_sum((baddr + start),
1451 (dp + start), (len - start), 0);
1452 csum = __packet_fold_sum_final(partial);
1453
1454 /* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1455 if (csum == 0 &&
1456 (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1457 csum = 0xffff;
1458 }
1459
1460 /* Insert checksum into packet */
1461 ASSERT(stuff <= (len - sizeof(csum)));
1462 if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1463 *(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1464 } else {
1465 bcopy((void *)&csum, dp + stuff, sizeof(csum));
1466 }
1467 } else {
1468 _pkt_copy(baddr, dp, len);
1469 }
1470 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1471 m->m_pkthdr.csum_tx_start = 0;
1472 m->m_pkthdr.csum_tx_stuff = 0;
1473
1474 /* translate packet metadata */
1475 m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1476 m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1477 m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1478 m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1479 m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1480 m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1481 m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1482 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1483 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1484 m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1485 m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1486 if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1487 m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1488 }
1489
1490 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1491 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1492 sk_proc_name_address(current_proc()),
1493 sk_proc_pid(current_proc()), len,
1494 (copysum ? (len - start) : 0), csum, start);
1495 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1496 " pkt 0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1497 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1498 (uint32_t)pkt->pkt_csum_tx_start_off,
1499 (uint32_t)pkt->pkt_csum_tx_stuff_off);
1500 break;
1501
1502 default:
1503 VERIFY(0);
1504 /* NOTREACHED */
1505 __builtin_unreachable();
1506 }
1507
1508 if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1509 m->m_flags |= M_BCAST;
1510 } else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1511 m->m_flags |= M_MCAST;
1512 }
1513 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1514 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1515 (t == NR_RX) ? "RX" : "TX",
1516 sk_dump("buf", (uint8_t *)dp, m->m_pkthdr.len, 128, NULL, 0));
1517 }
1518
1519 /*
1520 * This is a multi-buflet variant of pkt_copy_to_mbuf().
1521 * NOTE: poff is the offset within the packet.
1522 *
1523 * This routine supports copying into an mbuf chain for RX but not TX.
1524 *
1525 * start/stuff is relative to poff, within [0, len], such that
1526 * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1527 */
1528 void
pkt_copy_multi_buflet_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1529 pkt_copy_multi_buflet_to_mbuf(const enum txrx t, kern_packet_t ph,
1530 const uint16_t poff, struct mbuf *m, const uint16_t moff,
1531 const uint32_t len, const boolean_t copysum, const uint16_t start)
1532 {
1533 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1534 struct mbuf *curr_m;
1535 uint32_t partial = 0;
1536 uint32_t remaining_len = len, copied_len = 0;
1537 uint16_t csum = 0;
1538 uint8_t *baddr;
1539 uint8_t *dp;
1540 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1541
1542 ASSERT(len >= start);
1543 _CASSERT(sizeof(csum) == sizeof(uint16_t));
1544
1545 /* get buffer address from packet */
1546 MD_BUFLET_ADDR_ABS(pkt, baddr);
1547 ASSERT(baddr != NULL);
1548 baddr += poff;
1549 VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp) *
1550 __packet_get_buflet_count(ph)));
1551
1552 ASSERT((m->m_flags & M_PKTHDR));
1553 m->m_data += moff;
1554
1555 switch (t) {
1556 case NR_RX:
1557 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1558 if (__probable(do_sum && start != 0)) {
1559 ASSERT(M_TRAILINGSPACE(m) >= start);
1560 ASSERT(m->m_len == 0);
1561 dp = (uint8_t *)m->m_data;
1562 _pkt_copy(baddr, dp, start);
1563 remaining_len -= start;
1564 copied_len += start;
1565 m->m_len += start;
1566 m->m_pkthdr.len += start;
1567 }
1568 curr_m = m;
1569 while (curr_m != NULL && remaining_len != 0) {
1570 uint32_t tmp_len = MIN(remaining_len,
1571 (uint32_t)M_TRAILINGSPACE(curr_m));
1572 uint16_t soff = poff + (uint16_t)copied_len;
1573 dp = (uint8_t *)curr_m->m_data + curr_m->m_len;
1574
1575 if (__probable(do_sum)) {
1576 partial = _pkt_copyaddr_sum(ph, soff,
1577 dp, tmp_len, TRUE, partial, NULL);
1578 } else {
1579 pkt_copyaddr_sum(ph, soff,
1580 dp, tmp_len, FALSE, 0, NULL);
1581 }
1582
1583 curr_m->m_len += tmp_len;
1584 m->m_pkthdr.len += tmp_len;
1585 copied_len += tmp_len;
1586 remaining_len -= tmp_len;
1587 curr_m = curr_m->m_next;
1588 }
1589 ASSERT(remaining_len == 0);
1590
1591 if (__probable(do_sum)) {
1592 csum = __packet_fold_sum(partial);
1593
1594 m->m_pkthdr.csum_flags |=
1595 (CSUM_DATA_VALID | CSUM_PARTIAL);
1596 m->m_pkthdr.csum_rx_start = start;
1597 m->m_pkthdr.csum_rx_val = csum;
1598 } else {
1599 m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1600 m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1601 _CASSERT(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1602 m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1603 if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1604 m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1605 }
1606 }
1607
1608 /* translate packet metadata */
1609 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1610 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1611
1612 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1613 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1614 sk_proc_name_address(current_proc()),
1615 sk_proc_pid(current_proc()), len,
1616 (copysum ? (len - start) : 0), csum, start);
1617 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1618 " mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1619 SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1620 (uint32_t)m->m_pkthdr.csum_rx_start,
1621 (uint32_t)m->m_pkthdr.csum_rx_val);
1622 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1623 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1624 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1625 (uint32_t)pkt->pkt_csum_rx_start_off,
1626 (uint32_t)pkt->pkt_csum_rx_value);
1627 break;
1628 case NR_TX:
1629 dp = (uint8_t *)m->m_data;
1630 ASSERT(m->m_next == NULL);
1631 VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1632 (uint32_t)mbuf_maxlen(m));
1633 m->m_len += len;
1634 m->m_pkthdr.len += len;
1635 VERIFY(m->m_len == m->m_pkthdr.len &&
1636 (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1637 if (__probable(copysum)) {
1638 uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1639 /*
1640 * Use pkt_copy() to copy the portion up to the
1641 * point where we need to start the checksum, and
1642 * copy the remainder, checksumming as we go.
1643 */
1644 if (__probable(start != 0)) {
1645 _pkt_copy(baddr, dp, start);
1646 }
1647 partial = _pkt_copyaddr_sum(ph, (poff + start),
1648 (dp + start), (len - start), TRUE, 0, NULL);
1649 csum = __packet_fold_sum_final(partial);
1650
1651 /* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1652 if (csum == 0 &&
1653 (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1654 csum = 0xffff;
1655 }
1656
1657 /* Insert checksum into packet */
1658 ASSERT(stuff <= (len - sizeof(csum)));
1659 if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1660 *(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1661 } else {
1662 bcopy((void *)&csum, dp + stuff, sizeof(csum));
1663 }
1664 } else {
1665 (void) _pkt_copyaddr_sum(ph, poff, dp, len, FALSE, 0, NULL);
1666 }
1667 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1668 m->m_pkthdr.csum_tx_start = 0;
1669 m->m_pkthdr.csum_tx_stuff = 0;
1670
1671 /* translate packet metadata */
1672 m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1673 m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1674 m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1675 m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1676 m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1677 m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1678 m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1679 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1680 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1681 m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1682 m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1683 if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1684 m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1685 }
1686
1687 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1688 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1689 sk_proc_name_address(current_proc()),
1690 sk_proc_pid(current_proc()), len,
1691 (copysum ? (len - start) : 0), csum, start);
1692 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1693 " pkt 0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1694 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1695 (uint32_t)pkt->pkt_csum_tx_start_off,
1696 (uint32_t)pkt->pkt_csum_tx_stuff_off);
1697 break;
1698
1699 default:
1700 VERIFY(0);
1701 /* NOTREACHED */
1702 __builtin_unreachable();
1703 }
1704
1705 if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1706 m->m_flags |= M_BCAST;
1707 } else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1708 m->m_flags |= M_MCAST;
1709 }
1710 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1711 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1712 (t == NR_RX) ? "RX" : "TX",
1713 sk_dump("buf", (uint8_t *)dp, m->m_pkthdr.len, 128, NULL, 0));
1714 }
1715
1716 /*
1717 * Like m_copydata(), but computes 16-bit sum as the data is copied.
1718 * Caller can provide an initial sum to be folded into the computed
1719 * sum. The accumulated partial sum (32-bit) is returned to caller;
1720 * caller is responsible for further reducing it to 16-bit if needed,
1721 * as well as to perform the final 1's complement on it.
1722 */
1723 uint32_t
m_copydata_sum(struct mbuf * m,int off,int len,void * vp,uint32_t initial_sum,boolean_t * odd_start)1724 m_copydata_sum(struct mbuf *m, int off, int len, void *vp, uint32_t initial_sum,
1725 boolean_t *odd_start)
1726 {
1727 boolean_t needs_swap, started_on_odd = FALSE;
1728 int off0 = off, len0 = len;
1729 struct mbuf *m0 = m;
1730 uint64_t sum, partial;
1731 unsigned count, odd;
1732 char *cp = vp;
1733
1734 if (__improbable(off < 0 || len < 0)) {
1735 panic("%s: invalid offset %d or len %d", __func__, off, len);
1736 /* NOTREACHED */
1737 __builtin_unreachable();
1738 }
1739
1740 while (off > 0) {
1741 if (__improbable(m == NULL)) {
1742 panic("%s: invalid mbuf chain %p [off %d, len %d]",
1743 __func__, m0, off0, len0);
1744 /* NOTREACHED */
1745 __builtin_unreachable();
1746 }
1747 if (off < m->m_len) {
1748 break;
1749 }
1750 off -= m->m_len;
1751 m = m->m_next;
1752 }
1753
1754 if (odd_start) {
1755 started_on_odd = *odd_start;
1756 }
1757 sum = initial_sum;
1758
1759 for (; len > 0; m = m->m_next) {
1760 uint8_t *datap;
1761
1762 if (__improbable(m == NULL)) {
1763 panic("%s: invalid mbuf chain %p [off %d, len %d]",
1764 __func__, m0, off0, len0);
1765 /* NOTREACHED */
1766 __builtin_unreachable();
1767 }
1768
1769 datap = mtod(m, uint8_t *) + off;
1770 count = m->m_len;
1771
1772 if (__improbable(count == 0)) {
1773 continue;
1774 }
1775
1776 count = MIN(count - off, (unsigned)len);
1777 partial = 0;
1778
1779 if ((uintptr_t)datap & 1) {
1780 /* Align on word boundary */
1781 started_on_odd = !started_on_odd;
1782 #if BYTE_ORDER == LITTLE_ENDIAN
1783 partial = *datap << 8;
1784 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1785 partial = *datap;
1786 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1787 *cp++ = *datap++;
1788 count -= 1;
1789 len -= 1;
1790 }
1791
1792 needs_swap = started_on_odd;
1793 odd = count & 1u;
1794 count -= odd;
1795
1796 if (count) {
1797 partial = __packet_copy_and_sum(datap,
1798 cp, count, (uint32_t)partial);
1799 datap += count;
1800 cp += count;
1801 len -= count;
1802 if (__improbable((partial & (3ULL << 62)) != 0)) {
1803 if (needs_swap) {
1804 partial = (partial << 8) +
1805 (partial >> 56);
1806 }
1807 sum += (partial >> 32);
1808 sum += (partial & 0xffffffff);
1809 partial = 0;
1810 }
1811 }
1812
1813 if (odd) {
1814 #if BYTE_ORDER == LITTLE_ENDIAN
1815 partial += *datap;
1816 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1817 partial += *datap << 8;
1818 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1819 *cp++ = *datap++;
1820 len -= 1;
1821 started_on_odd = !started_on_odd;
1822 }
1823 off = 0;
1824
1825 if (needs_swap) {
1826 partial = (partial << 8) + (partial >> 24);
1827 }
1828 sum += (partial >> 32) + (partial & 0xffffffff);
1829 /*
1830 * Reduce sum to allow potential byte swap
1831 * in the next iteration without carry.
1832 */
1833 sum = (sum >> 32) + (sum & 0xffffffff);
1834 }
1835
1836 if (odd_start) {
1837 *odd_start = started_on_odd;
1838 }
1839
1840 /* Final fold (reduce 64-bit to 32-bit) */
1841 sum = (sum >> 32) + (sum & 0xffffffff); /* 33-bit */
1842 sum = (sum >> 16) + (sum & 0xffff); /* 17-bit + carry */
1843
1844 /* return 32-bit partial sum to caller */
1845 return (uint32_t)sum;
1846 }
1847
1848 #if DEBUG || DEVELOPMENT
1849 #define TRAILERS_MAX 16 /* max trailing bytes */
1850 #define TRAILERS_REGEN (64 * 1024) /* regeneration threshold */
1851 static uint8_t tb[TRAILERS_MAX]; /* random trailing bytes */
1852 static uint32_t regen = TRAILERS_REGEN; /* regeneration counter */
1853
1854 uint32_t
pkt_add_trailers(kern_packet_t ph,const uint32_t len,const uint16_t start)1855 pkt_add_trailers(kern_packet_t ph, const uint32_t len, const uint16_t start)
1856 {
1857 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1858 uint32_t extra;
1859 uint8_t *baddr;
1860
1861 /* get buffer address from packet */
1862 MD_BUFLET_ADDR_ABS(pkt, baddr);
1863 ASSERT(baddr != NULL);
1864 ASSERT(len <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
1865
1866 extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
1867 if (extra == 0 || extra > sizeof(tb) ||
1868 (len + extra) > PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp)) {
1869 return 0;
1870 }
1871
1872 /* generate random bytes once per TRAILERS_REGEN packets (approx.) */
1873 if (regen++ == TRAILERS_REGEN) {
1874 read_frandom(&tb[0], sizeof(tb));
1875 regen = 0;
1876 }
1877
1878 bcopy(&tb[0], (baddr + len), extra);
1879
1880 /* recompute partial sum (also to exercise related logic) */
1881 pkt->pkt_csum_flags |= PACKET_CSUM_PARTIAL;
1882 pkt->pkt_csum_rx_value = (uint16_t)__packet_cksum((baddr + start),
1883 ((len + extra) - start), 0);
1884 pkt->pkt_csum_rx_start_off = start;
1885
1886 return extra;
1887 }
1888
1889 uint32_t
pkt_add_trailers_mbuf(struct mbuf * m,const uint16_t start)1890 pkt_add_trailers_mbuf(struct mbuf *m, const uint16_t start)
1891 {
1892 uint32_t extra;
1893
1894 extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
1895 if (extra == 0 || extra > sizeof(tb)) {
1896 return 0;
1897 }
1898
1899 if (mbuf_copyback(m, m_pktlen(m), extra, &tb[0], M_NOWAIT) != 0) {
1900 return 0;
1901 }
1902
1903 /* generate random bytes once per TRAILERS_REGEN packets (approx.) */
1904 if (regen++ == TRAILERS_REGEN) {
1905 read_frandom(&tb[0], sizeof(tb));
1906 regen = 0;
1907 }
1908
1909 /* recompute partial sum (also to exercise related logic) */
1910 m->m_pkthdr.csum_rx_val = m_sum16(m, start, (m_pktlen(m) - start));
1911 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1912 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
1913 m->m_pkthdr.csum_rx_start = start;
1914
1915 return extra;
1916 }
1917 #endif /* DEBUG || DEVELOPMENT */
1918
1919 void
pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint16_t len,uint32_t * partial,boolean_t do_csum)1920 pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
1921 uint16_t doff, uint16_t len, uint32_t *partial, boolean_t do_csum)
1922 {
1923 VERIFY(_pkt_copypkt_sum(sph, soff, dph, doff, len, partial, do_csum));
1924 }
1925
1926 uint32_t
pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)1927 pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
1928 uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
1929 {
1930 return _pkt_copyaddr_sum(sph, soff, dbaddr, len, do_csum, initial_sum, odd_start);
1931 }
1932
1933 uint32_t
pkt_mcopypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint16_t len,boolean_t do_cscum)1934 pkt_mcopypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
1935 uint16_t len, boolean_t do_cscum)
1936 {
1937 return m_copypkt_sum(m, soff, dph, doff, len, do_cscum);
1938 }
1939
1940 void
pkt_copy(void * src,void * dst,size_t len)1941 pkt_copy(void *src, void *dst, size_t len)
1942 {
1943 return _pkt_copy(src, dst, len);
1944 }
1945