1 /*
2 * Copyright (c) 2017-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <skywalk/os_skywalk_private.h>
30 #include <machine/endian.h>
31 #include <net/necp.h>
32
33 #if (DEVELOPMENT || DEBUG)
34 SYSCTL_NODE(_kern_skywalk, OID_AUTO, packet,
35 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Skywalk packet");
36 int pkt_trailers = 0; /* for testing trailing bytes */
37 SYSCTL_INT(_kern_skywalk_packet, OID_AUTO, trailers,
38 CTLFLAG_RW | CTLFLAG_LOCKED, &pkt_trailers, 0, "");
39 #endif /* !DEVELOPMENT && !DEBUG */
40
41
42 __attribute__((always_inline))
43 static inline void
_pkt_copy(void * src,void * dst,size_t len)44 _pkt_copy(void *src, void *dst, size_t len)
45 {
46 if (__probable(IS_P2ALIGNED(src, 8) && IS_P2ALIGNED(dst, 8))) {
47 switch (len) {
48 case 20: /* standard IPv4 header */
49 sk_copy64_20(src, dst);
50 return;
51
52 case 40: /* IPv6 header */
53 sk_copy64_40(src, dst);
54 return;
55
56 default:
57 if (IS_P2ALIGNED(len, 64)) {
58 sk_copy64_64x(src, dst, len);
59 return;
60 } else if (IS_P2ALIGNED(len, 32)) {
61 sk_copy64_32x(src, dst, len);
62 return;
63 } else if (IS_P2ALIGNED(len, 8)) {
64 sk_copy64_8x(src, dst, len);
65 return;
66 } else if (IS_P2ALIGNED(len, 4)) {
67 sk_copy64_4x(src, dst, len);
68 return;
69 }
70 break;
71 }
72 }
73 bcopy(src, dst, len);
74 }
75
76 /*
77 * This routine is used for copying data across two kernel packets.
78 * Can also optionally compute 16-bit partial inet checksum as the
79 * data is copied.
80 * This routine is used by flowswitch while copying packet from vp
81 * adapter pool to packet in native netif pool and vice-a-versa.
82 *
83 * start/stuff is relative to soff, within [0, len], such that
84 * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
85 */
86 void
pkt_copy_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)87 pkt_copy_from_pkt(const enum txrx t, kern_packet_t dph, const uint16_t doff,
88 kern_packet_t sph, const uint16_t soff, const uint32_t len,
89 const boolean_t copysum, const uint16_t start, const uint16_t stuff,
90 const boolean_t invert)
91 {
92 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
93 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
94 uint32_t partial;
95 uint16_t csum = 0;
96 uint8_t *sbaddr, *dbaddr;
97
98 _CASSERT(sizeof(csum) == sizeof(uint16_t));
99
100 /* get buffer address from packet */
101 MD_BUFLET_ADDR_ABS(spkt, sbaddr);
102 ASSERT(sbaddr != NULL);
103 sbaddr += soff;
104 MD_BUFLET_ADDR_ABS(dpkt, dbaddr);
105 ASSERT(dbaddr != NULL);
106 dbaddr += doff;
107 VERIFY((doff + len) <= dpkt->pkt_qum.qum_pp->pp_buflet_size);
108
109 switch (t) {
110 case NR_RX:
111 dpkt->pkt_csum_flags = 0;
112 if (__probable(copysum)) {
113 /*
114 * Use pkt_copy() to copy the portion up to the
115 * point where we need to start the checksum, and
116 * copy the remainder, checksumming as we go.
117 */
118 if (__probable(start != 0)) {
119 _pkt_copy(sbaddr, dbaddr, start);
120 }
121 partial = __packet_copy_and_sum((sbaddr + start),
122 (dbaddr + start), (len - start), 0);
123 csum = __packet_fold_sum(partial);
124
125 __packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
126 start, csum, FALSE);
127 } else {
128 _pkt_copy(sbaddr, dbaddr, len);
129 dpkt->pkt_csum_rx_start_off = 0;
130 dpkt->pkt_csum_rx_value = 0;
131 }
132
133 SK_DF(SK_VERB_COPY | SK_VERB_RX,
134 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
135 sk_proc_name_address(current_proc()),
136 sk_proc_pid(current_proc()), len,
137 (copysum ? (len - start) : 0), csum, start);
138 SK_DF(SK_VERB_COPY | SK_VERB_RX,
139 " pkt 0x%llx doff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
140 SK_KVA(dpkt), doff, dpkt->pkt_csum_flags,
141 (uint32_t)dpkt->pkt_csum_rx_start_off,
142 (uint32_t)dpkt->pkt_csum_rx_value);
143 break;
144
145 case NR_TX:
146 if (__probable(copysum)) {
147 /*
148 * Use pkt_copy() to copy the portion up to the
149 * point where we need to start the checksum, and
150 * copy the remainder, checksumming as we go.
151 */
152 if (__probable(start != 0)) {
153 _pkt_copy(sbaddr, dbaddr, start);
154 }
155 partial = __packet_copy_and_sum((sbaddr + start),
156 (dbaddr + start), (len - start), 0);
157 csum = __packet_fold_sum_final(partial);
158
159 /* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
160 if (csum == 0 && invert) {
161 csum = 0xffff;
162 }
163
164 /* Insert checksum into packet */
165 ASSERT(stuff <= (len - sizeof(csum)));
166 if (IS_P2ALIGNED(dbaddr + stuff, sizeof(csum))) {
167 *(uint16_t *)(uintptr_t)(dbaddr + stuff) = csum;
168 } else {
169 bcopy((void *)&csum, dbaddr + stuff,
170 sizeof(csum));
171 }
172 } else {
173 _pkt_copy(sbaddr, dbaddr, len);
174 }
175 dpkt->pkt_csum_flags = 0;
176 dpkt->pkt_csum_tx_start_off = 0;
177 dpkt->pkt_csum_tx_stuff_off = 0;
178
179 SK_DF(SK_VERB_COPY | SK_VERB_TX,
180 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
181 sk_proc_name_address(current_proc()),
182 sk_proc_pid(current_proc()), len,
183 (copysum ? (len - start) : 0), csum, start);
184 break;
185
186 default:
187 VERIFY(0);
188 /* NOTREACHED */
189 __builtin_unreachable();
190 }
191 METADATA_ADJUST_LEN(dpkt, len, doff);
192
193 SK_DF(SK_VERB_COPY | SK_VERB_DUMP, "%s(%d) %s %s",
194 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
195 (t == NR_RX) ? "RX" : "TX",
196 sk_dump("buf", dbaddr, len, 128, NULL, 0));
197 }
198
199 /*
200 * NOTE: soff is the offset within the packet
201 * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
202 * caller is responsible for further reducing it to 16-bit if needed,
203 * as well as to perform the final 1's complement on it.
204 */
205 uint32_t static inline
_pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)206 _pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
207 uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
208 {
209 uint8_t odd = 0;
210 uint8_t *sbaddr = NULL;
211 uint32_t sum = initial_sum, partial;
212 uint32_t len0 = len;
213 boolean_t needs_swap, started_on_odd = FALSE;
214 uint16_t clen, sboff, sblen, sbcnt, off0 = soff;
215 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
216 kern_buflet_t sbuf = NULL, sbufp = NULL;
217
218 sbcnt = __packet_get_buflet_count(sph);
219
220 if (odd_start) {
221 started_on_odd = *odd_start;
222 }
223
224 /* fastpath (copy+sum, single buflet, even aligned, even length) */
225 if (do_csum && sbcnt == 1 && len != 0) {
226 PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
227 ASSERT(sbuf != NULL);
228 sboff = __buflet_get_data_offset(sbuf);
229 sblen = __buflet_get_data_length(sbuf);
230 ASSERT(sboff <= soff);
231 ASSERT(soff < sboff + sblen);
232 sblen -= (soff - sboff);
233 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
234
235 clen = (uint16_t)MIN(len, sblen);
236
237 if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
238 sum = __packet_copy_and_sum(sbaddr, dbaddr, clen, sum);
239 return __packet_fold_sum(sum);
240 }
241
242 sbaddr = NULL;
243 sbuf = sbufp = NULL;
244 }
245
246 while (len != 0) {
247 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
248 if (__improbable(sbuf == NULL)) {
249 panic("%s: bad packet, 0x%llx [off %d, len %d]",
250 __func__, SK_KVA(spkt), off0, len0);
251 /* NOTREACHED */
252 __builtin_unreachable();
253 }
254 sbufp = sbuf;
255 sboff = __buflet_get_data_offset(sbuf);
256 sblen = __buflet_get_data_length(sbuf);
257 ASSERT((sboff <= soff) && (soff < sboff + sblen));
258 sblen -= (soff - sboff);
259 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
260 soff = 0;
261 clen = (uint16_t)MIN(len, sblen);
262 if (__probable(do_csum)) {
263 partial = 0;
264 if (__improbable((uintptr_t)sbaddr & 1)) {
265 /* Align on word boundary */
266 started_on_odd = !started_on_odd;
267 #if BYTE_ORDER == LITTLE_ENDIAN
268 partial = (uint8_t)*sbaddr << 8;
269 #else /* BYTE_ORDER != LITTLE_ENDIAN */
270 partial = (uint8_t)*sbaddr;
271 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
272 *dbaddr++ = *sbaddr++;
273 sblen -= 1;
274 clen -= 1;
275 len -= 1;
276 }
277 needs_swap = started_on_odd;
278
279 odd = clen & 1u;
280 clen -= odd;
281
282 if (clen != 0) {
283 partial = __packet_copy_and_sum(sbaddr, dbaddr,
284 clen, partial);
285 }
286
287 if (__improbable(partial & 0xc0000000)) {
288 if (needs_swap) {
289 partial = (partial << 8) +
290 (partial >> 24);
291 }
292 sum += (partial >> 16);
293 sum += (partial & 0xffff);
294 partial = 0;
295 }
296 } else {
297 _pkt_copy(sbaddr, dbaddr, clen);
298 }
299
300 dbaddr += clen;
301 sbaddr += clen;
302
303 if (__probable(do_csum)) {
304 if (odd != 0) {
305 #if BYTE_ORDER == LITTLE_ENDIAN
306 partial += (uint8_t)*sbaddr;
307 #else /* BYTE_ORDER != LITTLE_ENDIAN */
308 partial += (uint8_t)*sbaddr << 8;
309 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
310 *dbaddr++ = *sbaddr++;
311 started_on_odd = !started_on_odd;
312 }
313
314 if (needs_swap) {
315 partial = (partial << 8) + (partial >> 24);
316 }
317 sum += (partial >> 16) + (partial & 0xffff);
318 /*
319 * Reduce sum to allow potential byte swap
320 * in the next iteration without carry.
321 */
322 sum = (sum >> 16) + (sum & 0xffff);
323 }
324
325 sblen -= clen + odd;
326 len -= clen + odd;
327 ASSERT(sblen == 0 || len == 0);
328 }
329
330 if (odd_start) {
331 *odd_start = started_on_odd;
332 }
333
334 if (__probable(do_csum)) {
335 /* Final fold (reduce 32-bit to 16-bit) */
336 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
337 sum = (sum >> 16) + (sum & 0xffff);
338 }
339 return sum;
340 }
341
342 /*
343 * NOTE: Caller of this function is responsible to adjust the length and offset
344 * of the first buflet of the destination packet if (doff != 0),
345 * i.e. additional data is being prependend to the packet.
346 * It should also finalize the packet.
347 * To simplify & optimize the routine, we have also assumed that soff & doff
348 * will lie within the first buffer, which is true for the current use cases
349 * where, doff is the offset of the checksum field in the TCP/IP header and
350 * soff is the L3 offset.
351 * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
352 * caller is responsible for further reducing it to 16-bit if needed,
353 * as well as to perform the final 1's complement on it.
354 */
355 static inline boolean_t
_pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint32_t len,uint32_t * csum_partial,boolean_t do_csum)356 _pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
357 uint16_t doff, uint32_t len, uint32_t *csum_partial, boolean_t do_csum)
358 {
359 uint8_t odd = 0;
360 uint32_t sum = 0, partial;
361 boolean_t needs_swap, started_on_odd = FALSE;
362 uint8_t *sbaddr = NULL, *dbaddr = NULL;
363 uint16_t clen, sblen, dlen0, dlim, sbcnt, dbcnt, sboff;
364 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
365 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
366 kern_buflet_t sbuf = NULL, sbufp = NULL, dbuf = NULL, dbufp = NULL;
367
368 ASSERT(csum_partial != NULL || !do_csum);
369 sbcnt = __packet_get_buflet_count(sph);
370 dbcnt = __packet_get_buflet_count(dph);
371
372 while (len != 0) {
373 ASSERT(sbaddr == NULL || dbaddr == NULL);
374 if (sbaddr == NULL) {
375 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
376 if (__improbable(sbuf == NULL)) {
377 break;
378 }
379 sbufp = sbuf;
380 sblen = __buflet_get_data_length(sbuf);
381 sboff = __buflet_get_data_offset(sbuf);
382 ASSERT(soff >= sboff);
383 ASSERT(sboff + sblen > soff);
384 sblen -= (soff - sboff);
385 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
386 soff = 0;
387 }
388
389 if (dbaddr == NULL) {
390 if (dbufp != NULL) {
391 __buflet_set_data_length(dbufp, dlen0);
392 }
393
394 PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
395 if (__improbable(dbuf == NULL)) {
396 break;
397 }
398 dbufp = dbuf;
399 dlim = __buflet_get_data_limit(dbuf);
400 ASSERT(dlim > doff);
401 dlim -= doff;
402 if (doff != 0) {
403 VERIFY(__buflet_set_data_offset(dbuf, doff) == 0);
404 }
405 dbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(dbuf) + doff);
406 dlen0 = dlim;
407 doff = 0;
408 }
409
410 clen = (uint16_t)MIN(len, sblen);
411 clen = MIN(clen, dlim);
412
413 if (__probable(do_csum)) {
414 partial = 0;
415 if (__improbable((uintptr_t)sbaddr & 1)) {
416 /* Align on word boundary */
417 started_on_odd = !started_on_odd;
418 #if BYTE_ORDER == LITTLE_ENDIAN
419 partial = (uint8_t)*sbaddr << 8;
420 #else /* BYTE_ORDER != LITTLE_ENDIAN */
421 partial = (uint8_t)*sbaddr;
422 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
423 *dbaddr++ = *sbaddr++;
424 clen -= 1;
425 dlim -= 1;
426 len -= 1;
427 }
428 needs_swap = started_on_odd;
429
430 odd = clen & 1u;
431 clen -= odd;
432
433 if (clen != 0) {
434 partial = __packet_copy_and_sum(sbaddr, dbaddr,
435 clen, partial);
436 }
437
438 if (__improbable(partial & 0xc0000000)) {
439 if (needs_swap) {
440 partial = (partial << 8) +
441 (partial >> 24);
442 }
443 sum += (partial >> 16);
444 sum += (partial & 0xffff);
445 partial = 0;
446 }
447 } else {
448 _pkt_copy(sbaddr, dbaddr, clen);
449 }
450 sbaddr += clen;
451 dbaddr += clen;
452
453 if (__probable(do_csum)) {
454 if (odd != 0) {
455 #if BYTE_ORDER == LITTLE_ENDIAN
456 partial += (uint8_t)*sbaddr;
457 #else /* BYTE_ORDER != LITTLE_ENDIAN */
458 partial += (uint8_t)*sbaddr << 8;
459 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
460 *dbaddr++ = *sbaddr++;
461 started_on_odd = !started_on_odd;
462 }
463
464 if (needs_swap) {
465 partial = (partial << 8) + (partial >> 24);
466 }
467 sum += (partial >> 16) + (partial & 0xffff);
468 /*
469 * Reduce sum to allow potential byte swap
470 * in the next iteration without carry.
471 */
472 sum = (sum >> 16) + (sum & 0xffff);
473 }
474
475 sblen -= clen + odd;
476 dlim -= clen + odd;
477 len -= clen + odd;
478
479 if (sblen == 0) {
480 sbaddr = NULL;
481 }
482
483 if (dlim == 0) {
484 dbaddr = NULL;
485 }
486 }
487
488 if (__probable(dbuf != NULL)) {
489 __buflet_set_data_length(dbuf, (dlen0 - dlim));
490 }
491 if (__probable(do_csum)) {
492 /* Final fold (reduce 32-bit to 16-bit) */
493 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
494 sum = (sum >> 16) + (sum & 0xffff);
495 *csum_partial = (uint32_t)sum;
496 }
497 return len == 0;
498 }
499
500 uint32_t
pkt_sum(kern_packet_t sph,uint16_t soff,uint16_t len)501 pkt_sum(kern_packet_t sph, uint16_t soff, uint16_t len)
502 {
503 uint8_t odd = 0;
504 uint32_t sum = 0, partial;
505 boolean_t needs_swap, started_on_odd = FALSE;
506 uint8_t *sbaddr = NULL;
507 uint16_t clen, sblen, sbcnt, sboff;
508 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
509 kern_buflet_t sbuf = NULL, sbufp = NULL;
510
511 sbcnt = __packet_get_buflet_count(sph);
512
513 /* fastpath (single buflet, even aligned, even length) */
514 if (sbcnt == 1 && len != 0) {
515 PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
516 ASSERT(sbuf != NULL);
517 sblen = __buflet_get_data_length(sbuf);
518 sboff = __buflet_get_data_offset(sbuf);
519 ASSERT(soff >= sboff);
520 ASSERT(sboff + sblen > soff);
521 sblen -= (soff - sboff);
522 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
523
524 clen = MIN(len, sblen);
525
526 if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
527 sum = __packet_cksum(sbaddr, clen, 0);
528 return __packet_fold_sum(sum);
529 }
530
531 sbaddr = NULL;
532 sbuf = sbufp = NULL;
533 }
534
535 /* slowpath */
536 while (len != 0) {
537 ASSERT(sbaddr == NULL);
538 if (sbaddr == NULL) {
539 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
540 if (__improbable(sbuf == NULL)) {
541 break;
542 }
543 sbufp = sbuf;
544 sblen = __buflet_get_data_length(sbuf);
545 sboff = __buflet_get_data_offset(sbuf);
546 ASSERT(soff >= sboff);
547 ASSERT(sboff + sblen > soff);
548 sblen -= (soff - sboff);
549 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
550 soff = 0;
551 }
552
553 clen = MIN(len, sblen);
554
555 partial = 0;
556 if (__improbable((uintptr_t)sbaddr & 1)) {
557 /* Align on word boundary */
558 started_on_odd = !started_on_odd;
559 #if BYTE_ORDER == LITTLE_ENDIAN
560 partial = (uint8_t)*sbaddr << 8;
561 #else /* BYTE_ORDER != LITTLE_ENDIAN */
562 partial = (uint8_t)*sbaddr;
563 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
564 clen -= 1;
565 len -= 1;
566 }
567 needs_swap = started_on_odd;
568
569 odd = clen & 1u;
570 clen -= odd;
571
572 if (clen != 0) {
573 partial = __packet_cksum(sbaddr,
574 clen, partial);
575 }
576
577 if (__improbable(partial & 0xc0000000)) {
578 if (needs_swap) {
579 partial = (partial << 8) +
580 (partial >> 24);
581 }
582 sum += (partial >> 16);
583 sum += (partial & 0xffff);
584 partial = 0;
585 }
586 sbaddr += clen;
587
588 if (odd != 0) {
589 #if BYTE_ORDER == LITTLE_ENDIAN
590 partial += (uint8_t)*sbaddr;
591 #else /* BYTE_ORDER != LITTLE_ENDIAN */
592 partial += (uint8_t)*sbaddr << 8;
593 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
594 started_on_odd = !started_on_odd;
595 }
596
597 if (needs_swap) {
598 partial = (partial << 8) + (partial >> 24);
599 }
600 sum += (partial >> 16) + (partial & 0xffff);
601 /*
602 * Reduce sum to allow potential byte swap
603 * in the next iteration without carry.
604 */
605 sum = (sum >> 16) + (sum & 0xffff);
606
607 sblen -= clen + odd;
608 len -= clen + odd;
609
610 if (sblen == 0) {
611 sbaddr = NULL;
612 }
613 }
614
615 /* Final fold (reduce 32-bit to 16-bit) */
616 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
617 sum = (sum >> 16) + (sum & 0xffff);
618 return (uint32_t)sum;
619 }
620
621
622 /*
623 * This is a multi-buflet variant of pkt_copy_from_pkt().
624 *
625 * start/stuff is relative to soff, within [0, len], such that
626 * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
627 */
628 void
pkt_copy_multi_buflet_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)629 pkt_copy_multi_buflet_from_pkt(const enum txrx t, kern_packet_t dph,
630 const uint16_t doff, kern_packet_t sph, const uint16_t soff,
631 const uint32_t len, const boolean_t copysum, const uint16_t start,
632 const uint16_t stuff, const boolean_t invert)
633 {
634 boolean_t rc;
635 uint32_t partial;
636 uint16_t csum = 0;
637 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
638
639 VERIFY((doff + len) <=
640 (dpkt->pkt_qum.qum_pp->pp_buflet_size *
641 __packet_get_buflet_count(dph)));
642
643 switch (t) {
644 case NR_RX:
645 dpkt->pkt_csum_flags = 0;
646 if (__probable(copysum)) {
647 /*
648 * copy the portion up to the point where we need to
649 * start the checksum, and copy the remainder,
650 * checksumming as we go.
651 */
652 if (__probable(start != 0)) {
653 rc = _pkt_copypkt_sum(sph, soff, dph, doff,
654 start, NULL, FALSE);
655 ASSERT(rc);
656 }
657 _pkt_copypkt_sum(sph, (soff + start), dph,
658 (doff + start), (len - start), &partial, TRUE);
659 csum = __packet_fold_sum(partial);
660 __packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
661 start, csum, FALSE);
662 METADATA_ADJUST_LEN(dpkt, start, doff);
663 } else {
664 rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
665 FALSE);
666 ASSERT(rc);
667 dpkt->pkt_csum_rx_start_off = 0;
668 dpkt->pkt_csum_rx_value = 0;
669 }
670 break;
671
672 case NR_TX:
673 if (__probable(copysum)) {
674 uint8_t *baddr;
675 /*
676 * copy the portion up to the point where we need to
677 * start the checksum, and copy the remainder,
678 * checksumming as we go.
679 */
680 if (__probable(start != 0)) {
681 rc = _pkt_copypkt_sum(sph, soff, dph, doff,
682 start, NULL, FALSE);
683 ASSERT(rc);
684 }
685 rc = _pkt_copypkt_sum(sph, (soff + start), dph,
686 (doff + start), (len - start), &partial, TRUE);
687 ASSERT(rc);
688 csum = __packet_fold_sum_final(partial);
689
690 /* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
691 if (csum == 0 && invert) {
692 csum = 0xffff;
693 }
694
695 /*
696 * Insert checksum into packet.
697 * Here we assume that checksum will be in the
698 * first buffer.
699 */
700 ASSERT((stuff + doff + sizeof(csum)) <=
701 dpkt->pkt_qum.qum_pp->pp_buflet_size);
702 ASSERT(stuff <= (len - sizeof(csum)));
703
704 /* get first buflet buffer address from packet */
705 MD_BUFLET_ADDR_ABS(dpkt, baddr);
706 ASSERT(baddr != NULL);
707 baddr += doff;
708 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
709 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
710 } else {
711 bcopy((void *)&csum, baddr + stuff,
712 sizeof(csum));
713 }
714 METADATA_ADJUST_LEN(dpkt, start, doff);
715 } else {
716 rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
717 FALSE);
718 ASSERT(rc);
719 }
720 dpkt->pkt_csum_flags = 0;
721 dpkt->pkt_csum_tx_start_off = 0;
722 dpkt->pkt_csum_tx_stuff_off = 0;
723 break;
724
725 default:
726 VERIFY(0);
727 /* NOTREACHED */
728 __builtin_unreachable();
729 }
730 }
731
732 /*
733 * This routine is used for copying an mbuf which originated in the host
734 * stack destined to a native skywalk interface (NR_TX), as well as for
735 * mbufs originating on compat network interfaces (NR_RX).
736 *
737 * start/stuff is relative to moff, within [0, len], such that
738 * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
739 */
740 void
pkt_copy_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)741 pkt_copy_from_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
742 struct mbuf *m, const uint16_t moff, const uint32_t len,
743 const boolean_t copysum, const uint16_t start)
744 {
745 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
746 uint32_t partial;
747 uint16_t csum = 0;
748 uint8_t *baddr;
749
750 _CASSERT(sizeof(csum) == sizeof(uint16_t));
751
752 /* get buffer address from packet */
753 MD_BUFLET_ADDR_ABS(pkt, baddr);
754 ASSERT(baddr != NULL);
755 baddr += poff;
756 VERIFY((poff + len) <= pkt->pkt_qum.qum_pp->pp_buflet_size);
757
758 switch (t) {
759 case NR_RX:
760 pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
761 pkt->pkt_csum_rx_start_off = 0;
762 pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
763 pkt->pkt_svc_class = m_get_service_class(m);
764 if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
765 != CSUM_RX_FULL_FLAGS) && copysum)) {
766 /*
767 * Use m_copydata() to copy the portion up to the
768 * point where we need to start the checksum, and
769 * copy the remainder, checksumming as we go.
770 */
771 if (start != 0) {
772 m_copydata(m, moff, start, baddr);
773 }
774 partial = m_copydata_sum(m, start, (len - start),
775 (baddr + start), 0, NULL);
776 csum = __packet_fold_sum(partial);
777
778 __packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
779 start, csum, FALSE);
780 } else {
781 m_copydata(m, moff, len, baddr);
782 }
783 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
784 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
785 sk_proc_name_address(current_proc()),
786 sk_proc_pid(current_proc()), len,
787 (copysum ? (len - start) : 0), csum, start);
788 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
789 " mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
790 SK_KVA(m), m->m_pkthdr.csum_flags,
791 (uint32_t)m->m_pkthdr.csum_rx_start,
792 (uint32_t)m->m_pkthdr.csum_rx_val);
793 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
794 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
795 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
796 (uint32_t)pkt->pkt_csum_rx_start_off,
797 (uint32_t)pkt->pkt_csum_rx_value);
798 break;
799
800 case NR_TX:
801 if (__probable(copysum)) {
802 uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
803 /*
804 * Use m_copydata() to copy the portion up to the
805 * point where we need to start the checksum, and
806 * copy the remainder, checksumming as we go.
807 */
808 if (start != 0) {
809 m_copydata(m, moff, start, baddr);
810 }
811 partial = m_copydata_sum(m, start, (len - start),
812 (baddr + start), 0, NULL);
813 csum = __packet_fold_sum_final(partial);
814
815 /*
816 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
817 * ideally we'd only test for CSUM_ZERO_INVERT
818 * here, but catch cases where the originator
819 * did not set it for UDP.
820 */
821 if (csum == 0 && (m->m_pkthdr.csum_flags &
822 (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
823 csum = 0xffff;
824 }
825
826 /* Insert checksum into packet */
827 ASSERT(stuff <= (len - sizeof(csum)));
828 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
829 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
830 } else {
831 bcopy((void *)&csum, baddr + stuff,
832 sizeof(csum));
833 }
834 } else {
835 m_copydata(m, moff, len, baddr);
836 }
837 pkt->pkt_csum_flags = 0;
838 pkt->pkt_csum_tx_start_off = 0;
839 pkt->pkt_csum_tx_stuff_off = 0;
840
841 /* translate mbuf metadata */
842 pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
843 pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
844 pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
845 switch (m->m_pkthdr.pkt_proto) {
846 case IPPROTO_QUIC:
847 pkt->pkt_flow_ip_proto = IPPROTO_UDP;
848 pkt->pkt_transport_protocol = IPPROTO_QUIC;
849 break;
850
851 default:
852 pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
853 pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
854 break;
855 }
856 (void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
857 pkt->pkt_svc_class = m_get_service_class(m);
858 pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
859 pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
860 if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
861 pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
862 }
863 necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
864 pkt->pkt_policy_id =
865 (uint32_t)necp_get_policy_id_from_packet(m);
866
867 if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
868 if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
869 __packet_set_tx_completion_data(ph,
870 m->m_pkthdr.drv_tx_compl_arg,
871 m->m_pkthdr.drv_tx_compl_data);
872 }
873 pkt->pkt_tx_compl_context =
874 m->m_pkthdr.pkt_compl_context;
875 pkt->pkt_tx_compl_callbacks =
876 m->m_pkthdr.pkt_compl_callbacks;
877 /*
878 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
879 * mbuf can no longer trigger a completion callback.
880 * callback will be invoked when the kernel packet is
881 * completed.
882 */
883 m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
884
885 m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
886 }
887
888 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
889 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
890 sk_proc_name_address(current_proc()),
891 sk_proc_pid(current_proc()), len,
892 (copysum ? (len - start) : 0), csum, start);
893 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
894 " mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
895 SK_KVA(m), m->m_pkthdr.csum_flags,
896 (uint32_t)m->m_pkthdr.csum_tx_start,
897 (uint32_t)m->m_pkthdr.csum_tx_stuff);
898 break;
899
900 default:
901 VERIFY(0);
902 /* NOTREACHED */
903 __builtin_unreachable();
904 }
905 METADATA_ADJUST_LEN(pkt, len, poff);
906
907 if (m->m_flags & M_BCAST) {
908 __packet_set_link_broadcast(ph);
909 } else if (m->m_flags & M_MCAST) {
910 __packet_set_link_multicast(ph);
911 }
912
913 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
914 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
915 (t == NR_RX) ? "RX" : "TX",
916 sk_dump("buf", baddr, len, 128, NULL, 0));
917 }
918
919 /*
920 * Like m_copydata_sum(), but works on a destination kernel packet.
921 */
922 static inline uint32_t
m_copypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint32_t len,boolean_t do_cscum)923 m_copypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
924 uint32_t len, boolean_t do_cscum)
925 {
926 boolean_t needs_swap, started_on_odd = FALSE;
927 int off0 = soff;
928 uint32_t len0 = len;
929 struct mbuf *m0 = m;
930 uint32_t sum = 0, partial;
931 unsigned count0, count, odd, mlen_copied;
932 uint8_t *sbaddr = NULL, *dbaddr = NULL;
933 uint16_t dlen0, dlim, dbcnt = __packet_get_buflet_count(dph);
934 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
935 kern_buflet_t dbuf = NULL, dbufp = NULL;
936
937 while (soff > 0) {
938 if (__improbable(m == NULL)) {
939 panic("%s: invalid mbuf chain %p [off %d, len %d]",
940 __func__, m0, off0, len0);
941 /* NOTREACHED */
942 __builtin_unreachable();
943 }
944 if (soff < m->m_len) {
945 break;
946 }
947 soff -= m->m_len;
948 m = m->m_next;
949 }
950
951 if (__improbable(m == NULL)) {
952 panic("%s: invalid mbuf chain %p [off %d, len %d]",
953 __func__, m0, off0, len0);
954 /* NOTREACHED */
955 __builtin_unreachable();
956 }
957
958 sbaddr = mtod(m, uint8_t *) + soff;
959 count = m->m_len - soff;
960 mlen_copied = 0;
961
962 while (len != 0) {
963 ASSERT(sbaddr == NULL || dbaddr == NULL);
964 if (sbaddr == NULL) {
965 soff = 0;
966 m = m->m_next;
967 if (__improbable(m == NULL)) {
968 panic("%s: invalid mbuf chain %p [off %d, "
969 "len %d]", __func__, m0, off0, len0);
970 /* NOTREACHED */
971 __builtin_unreachable();
972 }
973 sbaddr = mtod(m, uint8_t *);
974 count = m->m_len;
975 mlen_copied = 0;
976 }
977
978 if (__improbable(count == 0)) {
979 sbaddr = NULL;
980 continue;
981 }
982
983 if (dbaddr == NULL) {
984 if (dbufp != NULL) {
985 __buflet_set_data_length(dbufp, dlen0);
986 }
987
988 PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
989 if (__improbable(dbuf == NULL)) {
990 panic("%s: mbuf too large %p [off %d, "
991 "len %d]", __func__, m0, off0, len0);
992 /* NOTREACHED */
993 __builtin_unreachable();
994 }
995 dbufp = dbuf;
996 dlim = __buflet_get_data_limit(dbuf) - doff;
997 dbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(dbuf) + doff);
998 dlen0 = dlim;
999 doff = 0;
1000 }
1001
1002 count = MIN(count, (unsigned)len);
1003 count0 = count = MIN(count, dlim);
1004
1005 if (!do_cscum) {
1006 _pkt_copy(sbaddr, dbaddr, count);
1007 sbaddr += count;
1008 dbaddr += count;
1009 goto skip_csum;
1010 }
1011
1012 partial = 0;
1013 if ((uintptr_t)sbaddr & 1) {
1014 /* Align on word boundary */
1015 started_on_odd = !started_on_odd;
1016 #if BYTE_ORDER == LITTLE_ENDIAN
1017 partial = *sbaddr << 8;
1018 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1019 partial = *sbaddr;
1020 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1021 *dbaddr++ = *sbaddr++;
1022 count -= 1;
1023 }
1024
1025 needs_swap = started_on_odd;
1026 odd = count & 1u;
1027 count -= odd;
1028
1029 if (count) {
1030 partial = __packet_copy_and_sum(sbaddr,
1031 dbaddr, count, partial);
1032 sbaddr += count;
1033 dbaddr += count;
1034 if (__improbable(partial & 0xc0000000)) {
1035 if (needs_swap) {
1036 partial = (partial << 8) +
1037 (partial >> 24);
1038 }
1039 sum += (partial >> 16);
1040 sum += (partial & 0xffff);
1041 partial = 0;
1042 }
1043 }
1044
1045 if (odd) {
1046 #if BYTE_ORDER == LITTLE_ENDIAN
1047 partial += *sbaddr;
1048 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1049 partial += *sbaddr << 8;
1050 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1051 *dbaddr++ = *sbaddr++;
1052 started_on_odd = !started_on_odd;
1053 }
1054
1055 if (needs_swap) {
1056 partial = (partial << 8) + (partial >> 24);
1057 }
1058 sum += (partial >> 16) + (partial & 0xffff);
1059 /*
1060 * Reduce sum to allow potential byte swap
1061 * in the next iteration without carry.
1062 */
1063 sum = (sum >> 16) + (sum & 0xffff);
1064
1065 skip_csum:
1066 dlim -= count0;
1067 len -= count0;
1068 mlen_copied += count0;
1069
1070 if (dlim == 0) {
1071 dbaddr = NULL;
1072 }
1073
1074 count = m->m_len - soff - mlen_copied;
1075 if (count == 0) {
1076 sbaddr = NULL;
1077 }
1078 }
1079
1080 ASSERT(len == 0);
1081 ASSERT(dbuf != NULL);
1082 __buflet_set_data_length(dbuf, (dlen0 - dlim));
1083
1084 if (!do_cscum) {
1085 return 0;
1086 }
1087
1088 /* Final fold (reduce 32-bit to 16-bit) */
1089 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
1090 sum = (sum >> 16) + (sum & 0xffff);
1091 return sum;
1092 }
1093
1094 /*
1095 * This is a multi-buflet variant of pkt_copy_from_mbuf().
1096 *
1097 * start/stuff is relative to moff, within [0, len], such that
1098 * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
1099 */
1100 void
pkt_copy_multi_buflet_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1101 pkt_copy_multi_buflet_from_mbuf(const enum txrx t, kern_packet_t ph,
1102 const uint16_t poff, struct mbuf *m, const uint16_t moff,
1103 const uint32_t len, const boolean_t copysum, const uint16_t start)
1104 {
1105 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1106 uint32_t partial;
1107 uint16_t csum = 0;
1108 uint8_t *baddr;
1109
1110 _CASSERT(sizeof(csum) == sizeof(uint16_t));
1111
1112 /* get buffer address from packet */
1113 MD_BUFLET_ADDR_ABS(pkt, baddr);
1114 ASSERT(baddr != NULL);
1115 baddr += poff;
1116 VERIFY((poff + len) <= (pkt->pkt_qum.qum_pp->pp_buflet_size *
1117 __packet_get_buflet_count(ph)));
1118
1119 switch (t) {
1120 case NR_RX:
1121 pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
1122 pkt->pkt_csum_rx_start_off = 0;
1123 pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
1124 pkt->pkt_svc_class = m_get_service_class(m);
1125 if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
1126 != CSUM_RX_FULL_FLAGS) && copysum)) {
1127 /*
1128 * Use m_copydata() to copy the portion up to the
1129 * point where we need to start the checksum, and
1130 * copy the remainder, checksumming as we go.
1131 */
1132 if (start != 0) {
1133 m_copydata(m, moff, start, baddr);
1134 }
1135 partial = m_copypkt_sum(m, start, ph, (poff + start),
1136 (len - start), TRUE);
1137 csum = __packet_fold_sum(partial);
1138 __packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
1139 start, csum, FALSE);
1140 METADATA_ADJUST_LEN(pkt, start, poff);
1141 } else {
1142 (void) m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1143 }
1144 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1145 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1146 sk_proc_name_address(current_proc()),
1147 sk_proc_pid(current_proc()), len,
1148 (copysum ? (len - start) : 0), csum, start);
1149 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1150 " mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
1151 SK_KVA(m), m->m_pkthdr.csum_flags,
1152 (uint32_t)m->m_pkthdr.csum_rx_start,
1153 (uint32_t)m->m_pkthdr.csum_rx_val);
1154 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1155 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1156 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1157 (uint32_t)pkt->pkt_csum_rx_start_off,
1158 (uint32_t)pkt->pkt_csum_rx_value);
1159 break;
1160
1161 case NR_TX:
1162 if (__probable(copysum)) {
1163 uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
1164 /*
1165 * Use m_copydata() to copy the portion up to the
1166 * point where we need to start the checksum, and
1167 * copy the remainder, checksumming as we go.
1168 */
1169 if (start != 0) {
1170 m_copydata(m, moff, start, baddr);
1171 }
1172 partial = m_copypkt_sum(m, start, ph, (poff + start),
1173 (len - start), TRUE);
1174 csum = __packet_fold_sum_final(partial);
1175
1176 /*
1177 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
1178 * ideally we'd only test for CSUM_ZERO_INVERT
1179 * here, but catch cases where the originator
1180 * did not set it for UDP.
1181 */
1182 if (csum == 0 && (m->m_pkthdr.csum_flags &
1183 (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
1184 csum = 0xffff;
1185 }
1186
1187 /* Insert checksum into packet */
1188 ASSERT(stuff <= (len - sizeof(csum)));
1189 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
1190 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
1191 } else {
1192 bcopy((void *)&csum, baddr + stuff,
1193 sizeof(csum));
1194 }
1195 METADATA_ADJUST_LEN(pkt, start, poff);
1196 } else {
1197 m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1198 }
1199 pkt->pkt_csum_flags = 0;
1200 pkt->pkt_csum_tx_start_off = 0;
1201 pkt->pkt_csum_tx_stuff_off = 0;
1202
1203 /* translate mbuf metadata */
1204 pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
1205 pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
1206 pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
1207 switch (m->m_pkthdr.pkt_proto) {
1208 case IPPROTO_QUIC:
1209 pkt->pkt_flow_ip_proto = IPPROTO_UDP;
1210 pkt->pkt_transport_protocol = IPPROTO_QUIC;
1211 break;
1212
1213 default:
1214 pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
1215 pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
1216 break;
1217 }
1218 (void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
1219 pkt->pkt_svc_class = m_get_service_class(m);
1220 pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
1221 pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
1222 if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
1223 pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
1224 }
1225 necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
1226 pkt->pkt_policy_id =
1227 (uint32_t)necp_get_policy_id_from_packet(m);
1228
1229 if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
1230 if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
1231 __packet_set_tx_completion_data(ph,
1232 m->m_pkthdr.drv_tx_compl_arg,
1233 m->m_pkthdr.drv_tx_compl_data);
1234 }
1235 pkt->pkt_tx_compl_context =
1236 m->m_pkthdr.pkt_compl_context;
1237 pkt->pkt_tx_compl_callbacks =
1238 m->m_pkthdr.pkt_compl_callbacks;
1239 /*
1240 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
1241 * mbuf can no longer trigger a completion callback.
1242 * callback will be invoked when the kernel packet is
1243 * completed.
1244 */
1245 m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
1246
1247 m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
1248 }
1249
1250 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1251 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1252 sk_proc_name_address(current_proc()),
1253 sk_proc_pid(current_proc()), len,
1254 (copysum ? (len - start) : 0), csum, start);
1255 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1256 " mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
1257 SK_KVA(m), m->m_pkthdr.csum_flags,
1258 (uint32_t)m->m_pkthdr.csum_tx_start,
1259 (uint32_t)m->m_pkthdr.csum_tx_stuff);
1260 break;
1261
1262 default:
1263 VERIFY(0);
1264 /* NOTREACHED */
1265 __builtin_unreachable();
1266 }
1267
1268 if (m->m_flags & M_BCAST) {
1269 __packet_set_link_broadcast(ph);
1270 } else if (m->m_flags & M_MCAST) {
1271 __packet_set_link_multicast(ph);
1272 }
1273
1274 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1275 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1276 (t == NR_RX) ? "RX" : "TX",
1277 sk_dump("buf", baddr, len, 128, NULL, 0));
1278 }
1279
1280 /*
1281 * This routine is used for copying from a packet originating from a native
1282 * skywalk interface to an mbuf destined for the host legacy stack (NR_RX),
1283 * as well as for mbufs destined for the compat network interfaces (NR_TX).
1284 *
1285 * Note that this routine does not alter m_data pointer of the mbuf, as the
1286 * caller may want to use the original value upon return. We do, however,
1287 * adjust the length to reflect the total data span.
1288 *
1289 * start/stuff is relative to poff, within [0, len], such that
1290 * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1291 */
1292 void
pkt_copy_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1293 pkt_copy_to_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
1294 struct mbuf *m, const uint16_t moff, const uint32_t len,
1295 const boolean_t copysum, const uint16_t start)
1296 {
1297 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1298 uint32_t partial;
1299 uint16_t csum = 0;
1300 uint8_t *baddr;
1301 uint8_t *dp;
1302
1303 _CASSERT(sizeof(csum) == sizeof(uint16_t));
1304
1305 /* get buffer address from packet */
1306 MD_BUFLET_ADDR_ABS(pkt, baddr);
1307 ASSERT(baddr != NULL);
1308 baddr += poff;
1309 VERIFY((poff + len) <= pkt->pkt_qum.qum_pp->pp_buflet_size);
1310
1311 ASSERT((m->m_flags & M_PKTHDR) && m->m_next == NULL);
1312 m->m_data += moff;
1313 dp = (uint8_t *)m->m_data;
1314 VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1315 (uint32_t)mbuf_maxlen(m));
1316 m->m_len += len;
1317 m->m_pkthdr.len += len;
1318 VERIFY(m->m_len == m->m_pkthdr.len &&
1319 (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1320
1321 switch (t) {
1322 case NR_RX:
1323 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1324 if (__probable(copysum)) {
1325 /*
1326 * Use pkt_copy() to copy the portion up to the
1327 * point where we need to start the checksum, and
1328 * copy the remainder, checksumming as we go.
1329 */
1330 if (__probable(start != 0)) {
1331 _pkt_copy(baddr, dp, start);
1332 }
1333 partial = __packet_copy_and_sum((baddr + start),
1334 (dp + start), (len - start), 0);
1335 csum = __packet_fold_sum(partial);
1336
1337 m->m_pkthdr.csum_flags |=
1338 (CSUM_DATA_VALID | CSUM_PARTIAL);
1339 m->m_pkthdr.csum_rx_start = start;
1340 m->m_pkthdr.csum_rx_val = csum;
1341 } else {
1342 _pkt_copy(baddr, dp, len);
1343 m->m_pkthdr.csum_rx_start = 0;
1344 m->m_pkthdr.csum_rx_val = 0;
1345 }
1346
1347 /* translate packet metadata */
1348 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1349 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1350
1351 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1352 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1353 sk_proc_name_address(current_proc()),
1354 sk_proc_pid(current_proc()), len,
1355 (copysum ? (len - start) : 0), csum, start);
1356 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1357 " mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1358 SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1359 (uint32_t)m->m_pkthdr.csum_rx_start,
1360 (uint32_t)m->m_pkthdr.csum_rx_val);
1361 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1362 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1363 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1364 (uint32_t)pkt->pkt_csum_rx_start_off,
1365 (uint32_t)pkt->pkt_csum_rx_value);
1366 break;
1367
1368 case NR_TX:
1369 if (__probable(copysum)) {
1370 uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1371 /*
1372 * Use pkt_copy() to copy the portion up to the
1373 * point where we need to start the checksum, and
1374 * copy the remainder, checksumming as we go.
1375 */
1376 if (__probable(start != 0)) {
1377 _pkt_copy(baddr, dp, start);
1378 }
1379 partial = __packet_copy_and_sum((baddr + start),
1380 (dp + start), (len - start), 0);
1381 csum = __packet_fold_sum_final(partial);
1382
1383 /* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1384 if (csum == 0 &&
1385 (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1386 csum = 0xffff;
1387 }
1388
1389 /* Insert checksum into packet */
1390 ASSERT(stuff <= (len - sizeof(csum)));
1391 if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1392 *(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1393 } else {
1394 bcopy((void *)&csum, dp + stuff, sizeof(csum));
1395 }
1396 } else {
1397 _pkt_copy(baddr, dp, len);
1398 }
1399 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1400 m->m_pkthdr.csum_tx_start = 0;
1401 m->m_pkthdr.csum_tx_stuff = 0;
1402
1403 /* translate packet metadata */
1404 m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1405 m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1406 m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1407 m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1408 m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1409 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1410 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1411 m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1412 m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1413 if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1414 m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1415 }
1416
1417 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1418 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1419 sk_proc_name_address(current_proc()),
1420 sk_proc_pid(current_proc()), len,
1421 (copysum ? (len - start) : 0), csum, start);
1422 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1423 " pkt 0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1424 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1425 (uint32_t)pkt->pkt_csum_tx_start_off,
1426 (uint32_t)pkt->pkt_csum_tx_stuff_off);
1427 break;
1428
1429 default:
1430 VERIFY(0);
1431 /* NOTREACHED */
1432 __builtin_unreachable();
1433 }
1434
1435 if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1436 m->m_flags |= M_BCAST;
1437 } else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1438 m->m_flags |= M_MCAST;
1439 }
1440 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1441 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1442 (t == NR_RX) ? "RX" : "TX",
1443 sk_dump("buf", (uint8_t *)dp, m->m_pkthdr.len, 128, NULL, 0));
1444 }
1445
1446 /*
1447 * This is a multi-buflet variant of pkt_copy_to_mbuf().
1448 * NOTE: poff is the offset within the packet.
1449 *
1450 * start/stuff is relative to poff, within [0, len], such that
1451 * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1452 */
1453 void
pkt_copy_multi_buflet_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1454 pkt_copy_multi_buflet_to_mbuf(const enum txrx t, kern_packet_t ph,
1455 const uint16_t poff, struct mbuf *m, const uint16_t moff,
1456 const uint32_t len, const boolean_t copysum, const uint16_t start)
1457 {
1458 #pragma unused(moff) /* may be PROC_NULL */
1459 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1460 uint32_t partial;
1461 uint16_t csum = 0;
1462 uint8_t *baddr;
1463 uint8_t *dp;
1464
1465 _CASSERT(sizeof(csum) == sizeof(uint16_t));
1466
1467 /* get buffer address from packet */
1468 MD_BUFLET_ADDR_ABS(pkt, baddr);
1469 ASSERT(baddr != NULL);
1470 baddr += poff;
1471 VERIFY((poff + len) <= (pkt->pkt_qum.qum_pp->pp_buflet_size *
1472 __packet_get_buflet_count(ph)));
1473
1474 ASSERT((m->m_flags & M_PKTHDR) && m->m_next == NULL);
1475 m->m_data += moff;
1476 dp = (uint8_t *)m->m_data;
1477 VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1478 (uint32_t)mbuf_maxlen(m));
1479 m->m_len += len;
1480 m->m_pkthdr.len += len;
1481 VERIFY(m->m_len == m->m_pkthdr.len &&
1482 (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1483
1484 switch (t) {
1485 case NR_RX:
1486 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1487 if (__probable(copysum)) {
1488 /*
1489 * Use pkt_copy() to copy the portion up to the
1490 * point where we need to start the checksum, and
1491 * copy the remainder, checksumming as we go.
1492 */
1493 if (__probable(start != 0)) {
1494 _pkt_copy(baddr, dp, start);
1495 }
1496 partial = _pkt_copyaddr_sum(ph, (poff + start),
1497 (dp + start), (len - start), TRUE, 0, NULL);
1498 csum = __packet_fold_sum(partial);
1499
1500 m->m_pkthdr.csum_flags |=
1501 (CSUM_DATA_VALID | CSUM_PARTIAL);
1502 m->m_pkthdr.csum_rx_start = start;
1503 m->m_pkthdr.csum_rx_val = csum;
1504 } else {
1505 (void) _pkt_copyaddr_sum(ph, poff, dp, len, FALSE, 0, NULL);
1506 m->m_pkthdr.csum_rx_start = 0;
1507 m->m_pkthdr.csum_rx_val = 0;
1508 }
1509
1510 /* translate packet metadata */
1511 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1512 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1513
1514 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1515 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1516 sk_proc_name_address(current_proc()),
1517 sk_proc_pid(current_proc()), len,
1518 (copysum ? (len - start) : 0), csum, start);
1519 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1520 " mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1521 SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1522 (uint32_t)m->m_pkthdr.csum_rx_start,
1523 (uint32_t)m->m_pkthdr.csum_rx_val);
1524 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1525 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1526 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1527 (uint32_t)pkt->pkt_csum_rx_start_off,
1528 (uint32_t)pkt->pkt_csum_rx_value);
1529 break;
1530
1531 case NR_TX:
1532 if (__probable(copysum)) {
1533 uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1534 /*
1535 * Use pkt_copy() to copy the portion up to the
1536 * point where we need to start the checksum, and
1537 * copy the remainder, checksumming as we go.
1538 */
1539 if (__probable(start != 0)) {
1540 _pkt_copy(baddr, dp, start);
1541 }
1542 partial = _pkt_copyaddr_sum(ph, (poff + start),
1543 (dp + start), (len - start), TRUE, 0, NULL);
1544 csum = __packet_fold_sum_final(partial);
1545
1546 /* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1547 if (csum == 0 &&
1548 (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1549 csum = 0xffff;
1550 }
1551
1552 /* Insert checksum into packet */
1553 ASSERT(stuff <= (len - sizeof(csum)));
1554 if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1555 *(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1556 } else {
1557 bcopy((void *)&csum, dp + stuff, sizeof(csum));
1558 }
1559 } else {
1560 (void) _pkt_copyaddr_sum(ph, poff, dp, len, FALSE, 0, NULL);
1561 }
1562 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1563 m->m_pkthdr.csum_tx_start = 0;
1564 m->m_pkthdr.csum_tx_stuff = 0;
1565
1566 /* translate packet metadata */
1567 m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1568 m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1569 m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1570 m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1571 m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1572 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1573 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1574 m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1575 m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1576 if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1577 m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1578 }
1579
1580 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1581 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1582 sk_proc_name_address(current_proc()),
1583 sk_proc_pid(current_proc()), len,
1584 (copysum ? (len - start) : 0), csum, start);
1585 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1586 " pkt 0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1587 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1588 (uint32_t)pkt->pkt_csum_tx_start_off,
1589 (uint32_t)pkt->pkt_csum_tx_stuff_off);
1590 break;
1591
1592 default:
1593 VERIFY(0);
1594 /* NOTREACHED */
1595 __builtin_unreachable();
1596 }
1597
1598 if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1599 m->m_flags |= M_BCAST;
1600 } else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1601 m->m_flags |= M_MCAST;
1602 }
1603 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1604 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1605 (t == NR_RX) ? "RX" : "TX",
1606 sk_dump("buf", (uint8_t *)dp, m->m_pkthdr.len, 128, NULL, 0));
1607 }
1608
1609 /*
1610 * Like m_copydata(), but computes 16-bit sum as the data is copied.
1611 * Caller can provide an initial sum to be folded into the computed
1612 * sum. The accumulated partial sum (32-bit) is returned to caller;
1613 * caller is responsible for further reducing it to 16-bit if needed,
1614 * as well as to perform the final 1's complement on it.
1615 */
1616 uint32_t
m_copydata_sum(struct mbuf * m,int off,int len,void * vp,uint32_t initial_sum,boolean_t * odd_start)1617 m_copydata_sum(struct mbuf *m, int off, int len, void *vp, uint32_t initial_sum,
1618 boolean_t *odd_start)
1619 {
1620 boolean_t needs_swap, started_on_odd = FALSE;
1621 int off0 = off, len0 = len;
1622 struct mbuf *m0 = m;
1623 uint64_t sum, partial;
1624 unsigned count, odd;
1625 char *cp = vp;
1626
1627 if (__improbable(off < 0 || len < 0)) {
1628 panic("%s: invalid offset %d or len %d", __func__, off, len);
1629 /* NOTREACHED */
1630 __builtin_unreachable();
1631 }
1632
1633 while (off > 0) {
1634 if (__improbable(m == NULL)) {
1635 panic("%s: invalid mbuf chain %p [off %d, len %d]",
1636 __func__, m0, off0, len0);
1637 /* NOTREACHED */
1638 __builtin_unreachable();
1639 }
1640 if (off < m->m_len) {
1641 break;
1642 }
1643 off -= m->m_len;
1644 m = m->m_next;
1645 }
1646
1647 if (odd_start) {
1648 started_on_odd = *odd_start;
1649 }
1650 sum = initial_sum;
1651
1652 for (; len > 0; m = m->m_next) {
1653 uint8_t *datap;
1654
1655 if (__improbable(m == NULL)) {
1656 panic("%s: invalid mbuf chain %p [off %d, len %d]",
1657 __func__, m0, off0, len0);
1658 /* NOTREACHED */
1659 __builtin_unreachable();
1660 }
1661
1662 datap = mtod(m, uint8_t *) + off;
1663 count = m->m_len;
1664
1665 if (__improbable(count == 0)) {
1666 continue;
1667 }
1668
1669 count = MIN(count - off, (unsigned)len);
1670 partial = 0;
1671
1672 if ((uintptr_t)datap & 1) {
1673 /* Align on word boundary */
1674 started_on_odd = !started_on_odd;
1675 #if BYTE_ORDER == LITTLE_ENDIAN
1676 partial = *datap << 8;
1677 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1678 partial = *datap;
1679 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1680 *cp++ = *datap++;
1681 count -= 1;
1682 len -= 1;
1683 }
1684
1685 needs_swap = started_on_odd;
1686 odd = count & 1u;
1687 count -= odd;
1688
1689 if (count) {
1690 partial = __packet_copy_and_sum(datap,
1691 cp, count, (uint32_t)partial);
1692 datap += count;
1693 cp += count;
1694 len -= count;
1695 if (__improbable((partial & (3ULL << 62)) != 0)) {
1696 if (needs_swap) {
1697 partial = (partial << 8) +
1698 (partial >> 56);
1699 }
1700 sum += (partial >> 32);
1701 sum += (partial & 0xffffffff);
1702 partial = 0;
1703 }
1704 }
1705
1706 if (odd) {
1707 #if BYTE_ORDER == LITTLE_ENDIAN
1708 partial += *datap;
1709 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1710 partial += *datap << 8;
1711 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1712 *cp++ = *datap++;
1713 len -= 1;
1714 started_on_odd = !started_on_odd;
1715 }
1716 off = 0;
1717
1718 if (needs_swap) {
1719 partial = (partial << 8) + (partial >> 24);
1720 }
1721 sum += (partial >> 32) + (partial & 0xffffffff);
1722 /*
1723 * Reduce sum to allow potential byte swap
1724 * in the next iteration without carry.
1725 */
1726 sum = (sum >> 32) + (sum & 0xffffffff);
1727 }
1728
1729 if (odd_start) {
1730 *odd_start = started_on_odd;
1731 }
1732
1733 /* Final fold (reduce 64-bit to 32-bit) */
1734 sum = (sum >> 32) + (sum & 0xffffffff); /* 33-bit */
1735 sum = (sum >> 16) + (sum & 0xffff); /* 17-bit + carry */
1736
1737 /* return 32-bit partial sum to caller */
1738 return (uint32_t)sum;
1739 }
1740
1741 #if DEBUG || DEVELOPMENT
1742 #define TRAILERS_MAX 16 /* max trailing bytes */
1743 #define TRAILERS_REGEN (64 * 1024) /* regeneration threshold */
1744 static uint8_t tb[TRAILERS_MAX]; /* random trailing bytes */
1745 static uint32_t regen = TRAILERS_REGEN; /* regeneration counter */
1746
1747 uint32_t
pkt_add_trailers(kern_packet_t ph,const uint32_t len,const uint16_t start)1748 pkt_add_trailers(kern_packet_t ph, const uint32_t len, const uint16_t start)
1749 {
1750 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1751 uint32_t extra;
1752 uint8_t *baddr;
1753
1754 /* get buffer address from packet */
1755 MD_BUFLET_ADDR_ABS(pkt, baddr);
1756 ASSERT(baddr != NULL);
1757 ASSERT(len <= pkt->pkt_qum.qum_pp->pp_buflet_size);
1758
1759 extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
1760 if (extra == 0 || extra > sizeof(tb) ||
1761 (len + extra) > pkt->pkt_qum.qum_pp->pp_buflet_size) {
1762 return 0;
1763 }
1764
1765 /* generate random bytes once per TRAILERS_REGEN packets (approx.) */
1766 if (regen++ == TRAILERS_REGEN) {
1767 read_frandom(&tb[0], sizeof(tb));
1768 regen = 0;
1769 }
1770
1771 bcopy(&tb[0], (baddr + len), extra);
1772
1773 /* recompute partial sum (also to exercise related logic) */
1774 pkt->pkt_csum_flags |= PACKET_CSUM_PARTIAL;
1775 pkt->pkt_csum_rx_value = (uint16_t)__packet_cksum((baddr + start),
1776 ((len + extra) - start), 0);
1777 pkt->pkt_csum_rx_start_off = start;
1778
1779 return extra;
1780 }
1781
1782 uint32_t
pkt_add_trailers_mbuf(struct mbuf * m,const uint16_t start)1783 pkt_add_trailers_mbuf(struct mbuf *m, const uint16_t start)
1784 {
1785 uint32_t extra;
1786
1787 extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
1788 if (extra == 0 || extra > sizeof(tb)) {
1789 return 0;
1790 }
1791
1792 if (mbuf_copyback(m, m_pktlen(m), extra, &tb[0], M_NOWAIT) != 0) {
1793 return 0;
1794 }
1795
1796 /* generate random bytes once per TRAILERS_REGEN packets (approx.) */
1797 if (regen++ == TRAILERS_REGEN) {
1798 read_frandom(&tb[0], sizeof(tb));
1799 regen = 0;
1800 }
1801
1802 /* recompute partial sum (also to exercise related logic) */
1803 m->m_pkthdr.csum_rx_val = m_sum16(m, start, (m_pktlen(m) - start));
1804 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1805 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
1806 m->m_pkthdr.csum_rx_start = start;
1807
1808 return extra;
1809 }
1810 #endif /* DEBUG || DEVELOPMENT */
1811
1812 void
pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint16_t len,uint32_t * partial,boolean_t do_csum)1813 pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
1814 uint16_t doff, uint16_t len, uint32_t *partial, boolean_t do_csum)
1815 {
1816 VERIFY(_pkt_copypkt_sum(sph, soff, dph, doff, len, partial, do_csum));
1817 }
1818
1819 uint32_t
pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * dbaddr,uint16_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)1820 pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
1821 uint16_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
1822 {
1823 return _pkt_copyaddr_sum(sph, soff, dbaddr, len, do_csum, initial_sum, odd_start);
1824 }
1825
1826 uint32_t
pkt_mcopypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint16_t len,boolean_t do_cscum)1827 pkt_mcopypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
1828 uint16_t len, boolean_t do_cscum)
1829 {
1830 return m_copypkt_sum(m, soff, dph, doff, len, do_cscum);
1831 }
1832
1833 void
pkt_copy(void * src,void * dst,size_t len)1834 pkt_copy(void *src, void *dst, size_t len)
1835 {
1836 return _pkt_copy(src, dst, len);
1837 }
1838