1 /*
2 * Copyright (c) 2017-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <skywalk/os_skywalk_private.h>
30 #include <machine/endian.h>
31 #include <net/necp.h>
32
33 #if (DEVELOPMENT || DEBUG)
34 SYSCTL_NODE(_kern_skywalk, OID_AUTO, packet,
35 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Skywalk packet");
36 int pkt_trailers = 0; /* for testing trailing bytes */
37 SYSCTL_INT(_kern_skywalk_packet, OID_AUTO, trailers,
38 CTLFLAG_RW | CTLFLAG_LOCKED, &pkt_trailers, 0, "");
39 #endif /* !DEVELOPMENT && !DEBUG */
40
41
42 __attribute__((always_inline))
43 static inline void
_pkt_copy(void * src,void * dst,size_t len)44 _pkt_copy(void *src, void *dst, size_t len)
45 {
46 if (__probable(IS_P2ALIGNED(src, 8) && IS_P2ALIGNED(dst, 8))) {
47 switch (len) {
48 case 20: /* standard IPv4 header */
49 sk_copy64_20(src, dst);
50 return;
51
52 case 40: /* IPv6 header */
53 sk_copy64_40(src, dst);
54 return;
55
56 default:
57 if (IS_P2ALIGNED(len, 64)) {
58 sk_copy64_64x(src, dst, len);
59 return;
60 } else if (IS_P2ALIGNED(len, 32)) {
61 sk_copy64_32x(src, dst, len);
62 return;
63 } else if (IS_P2ALIGNED(len, 8)) {
64 sk_copy64_8x(src, dst, len);
65 return;
66 } else if (IS_P2ALIGNED(len, 4)) {
67 sk_copy64_4x(src, dst, len);
68 return;
69 }
70 break;
71 }
72 }
73 bcopy(src, dst, len);
74 }
75
76 /*
77 * This routine is used for copying data across two kernel packets.
78 * Can also optionally compute 16-bit partial inet checksum as the
79 * data is copied.
80 * This routine is used by flowswitch while copying packet from vp
81 * adapter pool to packet in native netif pool and vice-a-versa.
82 *
83 * start/stuff is relative to soff, within [0, len], such that
84 * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
85 */
86 void
pkt_copy_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)87 pkt_copy_from_pkt(const enum txrx t, kern_packet_t dph, const uint16_t doff,
88 kern_packet_t sph, const uint16_t soff, const uint32_t len,
89 const boolean_t copysum, const uint16_t start, const uint16_t stuff,
90 const boolean_t invert)
91 {
92 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
93 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
94 uint32_t partial;
95 uint16_t csum = 0;
96 uint8_t *sbaddr, *dbaddr;
97
98 _CASSERT(sizeof(csum) == sizeof(uint16_t));
99
100 /* get buffer address from packet */
101 MD_BUFLET_ADDR_ABS(spkt, sbaddr);
102 ASSERT(sbaddr != NULL);
103 sbaddr += soff;
104 MD_BUFLET_ADDR_ABS(dpkt, dbaddr);
105 ASSERT(dbaddr != NULL);
106 dbaddr += doff;
107 VERIFY((doff + len) <= dpkt->pkt_qum.qum_pp->pp_buflet_size);
108
109 switch (t) {
110 case NR_RX:
111 dpkt->pkt_csum_flags = 0;
112 if (__probable(copysum)) {
113 /*
114 * Use pkt_copy() to copy the portion up to the
115 * point where we need to start the checksum, and
116 * copy the remainder, checksumming as we go.
117 */
118 if (__probable(start != 0)) {
119 _pkt_copy(sbaddr, dbaddr, start);
120 }
121 partial = __packet_copy_and_sum((sbaddr + start),
122 (dbaddr + start), (len - start), 0);
123 csum = __packet_fold_sum(partial);
124
125 __packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
126 start, csum, FALSE);
127 } else {
128 _pkt_copy(sbaddr, dbaddr, len);
129 dpkt->pkt_csum_rx_start_off = 0;
130 dpkt->pkt_csum_rx_value = 0;
131 }
132
133 SK_DF(SK_VERB_COPY | SK_VERB_RX,
134 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
135 sk_proc_name_address(current_proc()),
136 sk_proc_pid(current_proc()), len,
137 (copysum ? (len - start) : 0), csum, start);
138 SK_DF(SK_VERB_COPY | SK_VERB_RX,
139 " pkt 0x%llx doff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
140 SK_KVA(dpkt), doff, dpkt->pkt_csum_flags,
141 (uint32_t)dpkt->pkt_csum_rx_start_off,
142 (uint32_t)dpkt->pkt_csum_rx_value);
143 break;
144
145 case NR_TX:
146 if (__probable(copysum)) {
147 /*
148 * Use pkt_copy() to copy the portion up to the
149 * point where we need to start the checksum, and
150 * copy the remainder, checksumming as we go.
151 */
152 if (__probable(start != 0)) {
153 _pkt_copy(sbaddr, dbaddr, start);
154 }
155 partial = __packet_copy_and_sum((sbaddr + start),
156 (dbaddr + start), (len - start), 0);
157 csum = __packet_fold_sum_final(partial);
158
159 /* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
160 if (csum == 0 && invert) {
161 csum = 0xffff;
162 }
163
164 /* Insert checksum into packet */
165 ASSERT(stuff <= (len - sizeof(csum)));
166 if (IS_P2ALIGNED(dbaddr + stuff, sizeof(csum))) {
167 *(uint16_t *)(uintptr_t)(dbaddr + stuff) = csum;
168 } else {
169 bcopy((void *)&csum, dbaddr + stuff,
170 sizeof(csum));
171 }
172 } else {
173 _pkt_copy(sbaddr, dbaddr, len);
174 }
175 dpkt->pkt_csum_flags = 0;
176 dpkt->pkt_csum_tx_start_off = 0;
177 dpkt->pkt_csum_tx_stuff_off = 0;
178
179 SK_DF(SK_VERB_COPY | SK_VERB_TX,
180 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
181 sk_proc_name_address(current_proc()),
182 sk_proc_pid(current_proc()), len,
183 (copysum ? (len - start) : 0), csum, start);
184 break;
185
186 default:
187 VERIFY(0);
188 /* NOTREACHED */
189 __builtin_unreachable();
190 }
191 METADATA_ADJUST_LEN(dpkt, len, doff);
192
193 SK_DF(SK_VERB_COPY | SK_VERB_DUMP, "%s(%d) %s %s",
194 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
195 (t == NR_RX) ? "RX" : "TX",
196 sk_dump("buf", dbaddr, len, 128, NULL, 0));
197 }
198
199 /*
200 * NOTE: soff is the offset within the packet
201 * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
202 * caller is responsible for further reducing it to 16-bit if needed,
203 * as well as to perform the final 1's complement on it.
204 */
205 uint32_t static inline
_pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)206 _pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
207 uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
208 {
209 uint8_t odd = 0;
210 uint8_t *sbaddr = NULL;
211 uint32_t sum = initial_sum, partial;
212 uint32_t len0 = len;
213 boolean_t needs_swap, started_on_odd = FALSE;
214 uint16_t clen, sboff, sblen, sbcnt, off0 = soff;
215 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
216 kern_buflet_t sbuf = NULL, sbufp = NULL;
217
218 sbcnt = __packet_get_buflet_count(sph);
219
220 if (odd_start) {
221 started_on_odd = *odd_start;
222 }
223
224 /* fastpath (copy+sum, single buflet, even aligned, even length) */
225 if (do_csum && sbcnt == 1 && len != 0) {
226 PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
227 ASSERT(sbuf != NULL);
228 sboff = __buflet_get_data_offset(sbuf);
229 sblen = __buflet_get_data_length(sbuf);
230 ASSERT(sboff <= soff);
231 ASSERT(soff < sboff + sblen);
232 sblen -= (soff - sboff);
233 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
234
235 clen = (uint16_t)MIN(len, sblen);
236
237 if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
238 sum = __packet_copy_and_sum(sbaddr, dbaddr, clen, sum);
239 return __packet_fold_sum(sum);
240 }
241
242 sbaddr = NULL;
243 sbuf = sbufp = NULL;
244 }
245
246 while (len != 0) {
247 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
248 if (__improbable(sbuf == NULL)) {
249 panic("%s: bad packet, 0x%llx [off %d, len %d]",
250 __func__, SK_KVA(spkt), off0, len0);
251 /* NOTREACHED */
252 __builtin_unreachable();
253 }
254 sbufp = sbuf;
255 sboff = __buflet_get_data_offset(sbuf);
256 sblen = __buflet_get_data_length(sbuf);
257 ASSERT((sboff <= soff) && (soff < sboff + sblen));
258 sblen -= (soff - sboff);
259 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
260 soff = 0;
261 clen = (uint16_t)MIN(len, sblen);
262 if (__probable(do_csum)) {
263 partial = 0;
264 if (__improbable((uintptr_t)sbaddr & 1)) {
265 /* Align on word boundary */
266 started_on_odd = !started_on_odd;
267 #if BYTE_ORDER == LITTLE_ENDIAN
268 partial = (uint8_t)*sbaddr << 8;
269 #else /* BYTE_ORDER != LITTLE_ENDIAN */
270 partial = (uint8_t)*sbaddr;
271 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
272 *dbaddr++ = *sbaddr++;
273 sblen -= 1;
274 clen -= 1;
275 len -= 1;
276 }
277 needs_swap = started_on_odd;
278
279 odd = clen & 1u;
280 clen -= odd;
281
282 if (clen != 0) {
283 partial = __packet_copy_and_sum(sbaddr, dbaddr,
284 clen, partial);
285 }
286
287 if (__improbable(partial & 0xc0000000)) {
288 if (needs_swap) {
289 partial = (partial << 8) +
290 (partial >> 24);
291 }
292 sum += (partial >> 16);
293 sum += (partial & 0xffff);
294 partial = 0;
295 }
296 } else {
297 _pkt_copy(sbaddr, dbaddr, clen);
298 }
299
300 dbaddr += clen;
301 sbaddr += clen;
302
303 if (__probable(do_csum)) {
304 if (odd != 0) {
305 #if BYTE_ORDER == LITTLE_ENDIAN
306 partial += (uint8_t)*sbaddr;
307 #else /* BYTE_ORDER != LITTLE_ENDIAN */
308 partial += (uint8_t)*sbaddr << 8;
309 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
310 *dbaddr++ = *sbaddr++;
311 started_on_odd = !started_on_odd;
312 }
313
314 if (needs_swap) {
315 partial = (partial << 8) + (partial >> 24);
316 }
317 sum += (partial >> 16) + (partial & 0xffff);
318 /*
319 * Reduce sum to allow potential byte swap
320 * in the next iteration without carry.
321 */
322 sum = (sum >> 16) + (sum & 0xffff);
323 }
324
325 sblen -= clen + odd;
326 len -= clen + odd;
327 ASSERT(sblen == 0 || len == 0);
328 }
329
330 if (odd_start) {
331 *odd_start = started_on_odd;
332 }
333
334 if (__probable(do_csum)) {
335 /* Final fold (reduce 32-bit to 16-bit) */
336 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
337 sum = (sum >> 16) + (sum & 0xffff);
338 }
339 return sum;
340 }
341
342 /*
343 * NOTE: Caller of this function is responsible to adjust the length and offset
344 * of the first buflet of the destination packet if (doff != 0),
345 * i.e. additional data is being prependend to the packet.
346 * It should also finalize the packet.
347 * To simplify & optimize the routine, we have also assumed that soff & doff
348 * will lie within the first buffer, which is true for the current use cases
349 * where, doff is the offset of the checksum field in the TCP/IP header and
350 * soff is the L3 offset.
351 * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
352 * caller is responsible for further reducing it to 16-bit if needed,
353 * as well as to perform the final 1's complement on it.
354 */
355 static inline boolean_t
_pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint32_t len,uint32_t * csum_partial,boolean_t do_csum)356 _pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
357 uint16_t doff, uint32_t len, uint32_t *csum_partial, boolean_t do_csum)
358 {
359 uint8_t odd = 0;
360 uint32_t sum = 0, partial;
361 boolean_t needs_swap, started_on_odd = FALSE;
362 uint8_t *sbaddr = NULL, *dbaddr = NULL;
363 uint16_t clen, sblen, dlen0, dlim, sbcnt, dbcnt, sboff;
364 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
365 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
366 kern_buflet_t sbuf = NULL, sbufp = NULL, dbuf = NULL, dbufp = NULL;
367
368 ASSERT(csum_partial != NULL || !do_csum);
369 sbcnt = __packet_get_buflet_count(sph);
370 dbcnt = __packet_get_buflet_count(dph);
371
372 while (len != 0) {
373 ASSERT(sbaddr == NULL || dbaddr == NULL);
374 if (sbaddr == NULL) {
375 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
376 if (__improbable(sbuf == NULL)) {
377 break;
378 }
379 sbufp = sbuf;
380 sblen = __buflet_get_data_length(sbuf);
381 sboff = __buflet_get_data_offset(sbuf);
382 ASSERT(soff >= sboff);
383 ASSERT(sboff + sblen > soff);
384 sblen -= (soff - sboff);
385 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
386 soff = 0;
387 }
388
389 if (dbaddr == NULL) {
390 if (dbufp != NULL) {
391 __buflet_set_data_length(dbufp, dlen0);
392 }
393
394 PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
395 if (__improbable(dbuf == NULL)) {
396 break;
397 }
398 dbufp = dbuf;
399 dlim = __buflet_get_data_limit(dbuf);
400 ASSERT(dlim > doff);
401 dlim -= doff;
402 if (doff != 0) {
403 VERIFY(__buflet_set_data_offset(dbuf, doff) == 0);
404 }
405 dbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(dbuf) + doff);
406 dlen0 = dlim;
407 doff = 0;
408 }
409
410 clen = (uint16_t)MIN(len, sblen);
411 clen = MIN(clen, dlim);
412
413 if (__probable(do_csum)) {
414 partial = 0;
415 if (__improbable((uintptr_t)sbaddr & 1)) {
416 /* Align on word boundary */
417 started_on_odd = !started_on_odd;
418 #if BYTE_ORDER == LITTLE_ENDIAN
419 partial = (uint8_t)*sbaddr << 8;
420 #else /* BYTE_ORDER != LITTLE_ENDIAN */
421 partial = (uint8_t)*sbaddr;
422 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
423 *dbaddr++ = *sbaddr++;
424 clen -= 1;
425 dlim -= 1;
426 len -= 1;
427 }
428 needs_swap = started_on_odd;
429
430 odd = clen & 1u;
431 clen -= odd;
432
433 if (clen != 0) {
434 partial = __packet_copy_and_sum(sbaddr, dbaddr,
435 clen, partial);
436 }
437
438 if (__improbable(partial & 0xc0000000)) {
439 if (needs_swap) {
440 partial = (partial << 8) +
441 (partial >> 24);
442 }
443 sum += (partial >> 16);
444 sum += (partial & 0xffff);
445 partial = 0;
446 }
447 } else {
448 _pkt_copy(sbaddr, dbaddr, clen);
449 }
450 sbaddr += clen;
451 dbaddr += clen;
452
453 if (__probable(do_csum)) {
454 if (odd != 0) {
455 #if BYTE_ORDER == LITTLE_ENDIAN
456 partial += (uint8_t)*sbaddr;
457 #else /* BYTE_ORDER != LITTLE_ENDIAN */
458 partial += (uint8_t)*sbaddr << 8;
459 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
460 *dbaddr++ = *sbaddr++;
461 started_on_odd = !started_on_odd;
462 }
463
464 if (needs_swap) {
465 partial = (partial << 8) + (partial >> 24);
466 }
467 sum += (partial >> 16) + (partial & 0xffff);
468 /*
469 * Reduce sum to allow potential byte swap
470 * in the next iteration without carry.
471 */
472 sum = (sum >> 16) + (sum & 0xffff);
473 }
474
475 sblen -= clen + odd;
476 dlim -= clen + odd;
477 len -= clen + odd;
478
479 if (sblen == 0) {
480 sbaddr = NULL;
481 }
482
483 if (dlim == 0) {
484 dbaddr = NULL;
485 }
486 }
487
488 if (__probable(dbuf != NULL)) {
489 __buflet_set_data_length(dbuf, (dlen0 - dlim));
490 }
491 if (__probable(do_csum)) {
492 /* Final fold (reduce 32-bit to 16-bit) */
493 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
494 sum = (sum >> 16) + (sum & 0xffff);
495 *csum_partial = (uint32_t)sum;
496 }
497 return len == 0;
498 }
499
500 uint32_t
pkt_sum(kern_packet_t sph,uint16_t soff,uint16_t len)501 pkt_sum(kern_packet_t sph, uint16_t soff, uint16_t len)
502 {
503 uint8_t odd = 0;
504 uint32_t sum = 0, partial;
505 boolean_t needs_swap, started_on_odd = FALSE;
506 uint8_t *sbaddr = NULL;
507 uint16_t clen, sblen, sbcnt, sboff;
508 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
509 kern_buflet_t sbuf = NULL, sbufp = NULL;
510
511 sbcnt = __packet_get_buflet_count(sph);
512
513 /* fastpath (single buflet, even aligned, even length) */
514 if (sbcnt == 1 && len != 0) {
515 PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
516 ASSERT(sbuf != NULL);
517 sblen = __buflet_get_data_length(sbuf);
518 sboff = __buflet_get_data_offset(sbuf);
519 ASSERT(soff >= sboff);
520 ASSERT(sboff + sblen > soff);
521 sblen -= (soff - sboff);
522 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
523
524 clen = MIN(len, sblen);
525
526 if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
527 sum = __packet_cksum(sbaddr, clen, 0);
528 return __packet_fold_sum(sum);
529 }
530
531 sbaddr = NULL;
532 sbuf = sbufp = NULL;
533 }
534
535 /* slowpath */
536 while (len != 0) {
537 ASSERT(sbaddr == NULL);
538 if (sbaddr == NULL) {
539 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
540 if (__improbable(sbuf == NULL)) {
541 break;
542 }
543 sbufp = sbuf;
544 sblen = __buflet_get_data_length(sbuf);
545 sboff = __buflet_get_data_offset(sbuf);
546 ASSERT(soff >= sboff);
547 ASSERT(sboff + sblen > soff);
548 sblen -= (soff - sboff);
549 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
550 soff = 0;
551 }
552
553 clen = MIN(len, sblen);
554
555 partial = 0;
556 if (__improbable((uintptr_t)sbaddr & 1)) {
557 /* Align on word boundary */
558 started_on_odd = !started_on_odd;
559 #if BYTE_ORDER == LITTLE_ENDIAN
560 partial = (uint8_t)*sbaddr << 8;
561 #else /* BYTE_ORDER != LITTLE_ENDIAN */
562 partial = (uint8_t)*sbaddr;
563 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
564 clen -= 1;
565 len -= 1;
566 }
567 needs_swap = started_on_odd;
568
569 odd = clen & 1u;
570 clen -= odd;
571
572 if (clen != 0) {
573 partial = __packet_cksum(sbaddr,
574 clen, partial);
575 }
576
577 if (__improbable(partial & 0xc0000000)) {
578 if (needs_swap) {
579 partial = (partial << 8) +
580 (partial >> 24);
581 }
582 sum += (partial >> 16);
583 sum += (partial & 0xffff);
584 partial = 0;
585 }
586 sbaddr += clen;
587
588 if (odd != 0) {
589 #if BYTE_ORDER == LITTLE_ENDIAN
590 partial += (uint8_t)*sbaddr;
591 #else /* BYTE_ORDER != LITTLE_ENDIAN */
592 partial += (uint8_t)*sbaddr << 8;
593 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
594 started_on_odd = !started_on_odd;
595 }
596
597 if (needs_swap) {
598 partial = (partial << 8) + (partial >> 24);
599 }
600 sum += (partial >> 16) + (partial & 0xffff);
601 /*
602 * Reduce sum to allow potential byte swap
603 * in the next iteration without carry.
604 */
605 sum = (sum >> 16) + (sum & 0xffff);
606
607 sblen -= clen + odd;
608 len -= clen + odd;
609
610 if (sblen == 0) {
611 sbaddr = NULL;
612 }
613 }
614
615 /* Final fold (reduce 32-bit to 16-bit) */
616 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
617 sum = (sum >> 16) + (sum & 0xffff);
618 return (uint32_t)sum;
619 }
620
621
622 /*
623 * This is a multi-buflet variant of pkt_copy_from_pkt().
624 *
625 * start/stuff is relative to soff, within [0, len], such that
626 * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
627 */
628 void
pkt_copy_multi_buflet_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)629 pkt_copy_multi_buflet_from_pkt(const enum txrx t, kern_packet_t dph,
630 const uint16_t doff, kern_packet_t sph, const uint16_t soff,
631 const uint32_t len, const boolean_t copysum, const uint16_t start,
632 const uint16_t stuff, const boolean_t invert)
633 {
634 boolean_t rc;
635 uint32_t partial;
636 uint16_t csum = 0;
637 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
638
639 VERIFY((doff + len) <=
640 (dpkt->pkt_qum.qum_pp->pp_buflet_size *
641 __packet_get_buflet_count(dph)));
642
643 switch (t) {
644 case NR_RX:
645 dpkt->pkt_csum_flags = 0;
646 if (__probable(copysum)) {
647 /*
648 * copy the portion up to the point where we need to
649 * start the checksum, and copy the remainder,
650 * checksumming as we go.
651 */
652 if (__probable(start != 0)) {
653 rc = _pkt_copypkt_sum(sph, soff, dph, doff,
654 start, NULL, FALSE);
655 ASSERT(rc);
656 }
657 _pkt_copypkt_sum(sph, (soff + start), dph,
658 (doff + start), (len - start), &partial, TRUE);
659 csum = __packet_fold_sum(partial);
660 __packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
661 start, csum, FALSE);
662 METADATA_ADJUST_LEN(dpkt, start, doff);
663 } else {
664 rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
665 FALSE);
666 ASSERT(rc);
667 dpkt->pkt_csum_rx_start_off = 0;
668 dpkt->pkt_csum_rx_value = 0;
669 }
670 break;
671
672 case NR_TX:
673 if (__probable(copysum)) {
674 uint8_t *baddr;
675 /*
676 * copy the portion up to the point where we need to
677 * start the checksum, and copy the remainder,
678 * checksumming as we go.
679 */
680 if (__probable(start != 0)) {
681 rc = _pkt_copypkt_sum(sph, soff, dph, doff,
682 start, NULL, FALSE);
683 ASSERT(rc);
684 }
685 rc = _pkt_copypkt_sum(sph, (soff + start), dph,
686 (doff + start), (len - start), &partial, TRUE);
687 ASSERT(rc);
688 csum = __packet_fold_sum_final(partial);
689
690 /* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
691 if (csum == 0 && invert) {
692 csum = 0xffff;
693 }
694
695 /*
696 * Insert checksum into packet.
697 * Here we assume that checksum will be in the
698 * first buffer.
699 */
700 ASSERT((stuff + doff + sizeof(csum)) <=
701 dpkt->pkt_qum.qum_pp->pp_buflet_size);
702 ASSERT(stuff <= (len - sizeof(csum)));
703
704 /* get first buflet buffer address from packet */
705 MD_BUFLET_ADDR_ABS(dpkt, baddr);
706 ASSERT(baddr != NULL);
707 baddr += doff;
708 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
709 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
710 } else {
711 bcopy((void *)&csum, baddr + stuff,
712 sizeof(csum));
713 }
714 METADATA_ADJUST_LEN(dpkt, start, doff);
715 } else {
716 rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
717 FALSE);
718 ASSERT(rc);
719 }
720 dpkt->pkt_csum_flags = 0;
721 dpkt->pkt_csum_tx_start_off = 0;
722 dpkt->pkt_csum_tx_stuff_off = 0;
723 break;
724
725 default:
726 VERIFY(0);
727 /* NOTREACHED */
728 __builtin_unreachable();
729 }
730 }
731
732 /*
733 * This routine is used for copying an mbuf which originated in the host
734 * stack destined to a native skywalk interface (NR_TX), as well as for
735 * mbufs originating on compat network interfaces (NR_RX).
736 *
737 * start/stuff is relative to moff, within [0, len], such that
738 * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
739 */
740 void
pkt_copy_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)741 pkt_copy_from_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
742 struct mbuf *m, const uint16_t moff, const uint32_t len,
743 const boolean_t copysum, const uint16_t start)
744 {
745 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
746 uint32_t partial;
747 uint16_t csum = 0;
748 uint8_t *baddr;
749
750 _CASSERT(sizeof(csum) == sizeof(uint16_t));
751
752 /* get buffer address from packet */
753 MD_BUFLET_ADDR_ABS(pkt, baddr);
754 ASSERT(baddr != NULL);
755 baddr += poff;
756 VERIFY((poff + len) <= pkt->pkt_qum.qum_pp->pp_buflet_size);
757
758 switch (t) {
759 case NR_RX:
760 pkt->pkt_csum_flags = 0;
761 pkt->pkt_svc_class = m_get_service_class(m);
762 if (__probable(copysum)) {
763 /*
764 * Use m_copydata() to copy the portion up to the
765 * point where we need to start the checksum, and
766 * copy the remainder, checksumming as we go.
767 */
768 if (start != 0) {
769 m_copydata(m, moff, start, baddr);
770 }
771 partial = m_copydata_sum(m, start, (len - start),
772 (baddr + start), 0, NULL);
773 csum = __packet_fold_sum(partial);
774
775 __packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
776 start, csum, FALSE);
777 } else {
778 m_copydata(m, moff, len, baddr);
779 pkt->pkt_csum_rx_start_off = 0;
780 pkt->pkt_csum_rx_value = 0;
781 }
782 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
783 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
784 sk_proc_name_address(current_proc()),
785 sk_proc_pid(current_proc()), len,
786 (copysum ? (len - start) : 0), csum, start);
787 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
788 " mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
789 SK_KVA(m), m->m_pkthdr.csum_flags,
790 (uint32_t)m->m_pkthdr.csum_rx_start,
791 (uint32_t)m->m_pkthdr.csum_rx_val);
792 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
793 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
794 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
795 (uint32_t)pkt->pkt_csum_rx_start_off,
796 (uint32_t)pkt->pkt_csum_rx_value);
797 break;
798
799 case NR_TX:
800 if (__probable(copysum)) {
801 uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
802 /*
803 * Use m_copydata() to copy the portion up to the
804 * point where we need to start the checksum, and
805 * copy the remainder, checksumming as we go.
806 */
807 if (start != 0) {
808 m_copydata(m, moff, start, baddr);
809 }
810 partial = m_copydata_sum(m, start, (len - start),
811 (baddr + start), 0, NULL);
812 csum = __packet_fold_sum_final(partial);
813
814 /*
815 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
816 * ideally we'd only test for CSUM_ZERO_INVERT
817 * here, but catch cases where the originator
818 * did not set it for UDP.
819 */
820 if (csum == 0 && (m->m_pkthdr.csum_flags &
821 (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
822 csum = 0xffff;
823 }
824
825 /* Insert checksum into packet */
826 ASSERT(stuff <= (len - sizeof(csum)));
827 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
828 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
829 } else {
830 bcopy((void *)&csum, baddr + stuff,
831 sizeof(csum));
832 }
833 } else {
834 m_copydata(m, moff, len, baddr);
835 }
836 pkt->pkt_csum_flags = 0;
837 pkt->pkt_csum_tx_start_off = 0;
838 pkt->pkt_csum_tx_stuff_off = 0;
839
840 /* translate mbuf metadata */
841 pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
842 pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
843 pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
844 switch (m->m_pkthdr.pkt_proto) {
845 case IPPROTO_QUIC:
846 pkt->pkt_flow_ip_proto = IPPROTO_UDP;
847 pkt->pkt_transport_protocol = IPPROTO_QUIC;
848 break;
849
850 default:
851 pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
852 pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
853 break;
854 }
855 (void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
856 pkt->pkt_svc_class = m_get_service_class(m);
857 pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
858 pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
859 if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
860 pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
861 }
862 necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
863 pkt->pkt_policy_id =
864 (uint32_t)necp_get_policy_id_from_packet(m);
865
866 if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
867 if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
868 __packet_set_tx_completion_data(ph,
869 m->m_pkthdr.drv_tx_compl_arg,
870 m->m_pkthdr.drv_tx_compl_data);
871 }
872 pkt->pkt_tx_compl_context =
873 m->m_pkthdr.pkt_compl_context;
874 pkt->pkt_tx_compl_callbacks =
875 m->m_pkthdr.pkt_compl_callbacks;
876 /*
877 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
878 * mbuf can no longer trigger a completion callback.
879 * callback will be invoked when the kernel packet is
880 * completed.
881 */
882 m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
883
884 m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
885 }
886
887 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
888 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
889 sk_proc_name_address(current_proc()),
890 sk_proc_pid(current_proc()), len,
891 (copysum ? (len - start) : 0), csum, start);
892 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
893 " mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
894 SK_KVA(m), m->m_pkthdr.csum_flags,
895 (uint32_t)m->m_pkthdr.csum_tx_start,
896 (uint32_t)m->m_pkthdr.csum_tx_stuff);
897 break;
898
899 default:
900 VERIFY(0);
901 /* NOTREACHED */
902 __builtin_unreachable();
903 }
904 METADATA_ADJUST_LEN(pkt, len, poff);
905
906 if (m->m_flags & M_BCAST) {
907 __packet_set_link_broadcast(ph);
908 } else if (m->m_flags & M_MCAST) {
909 __packet_set_link_multicast(ph);
910 }
911
912 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
913 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
914 (t == NR_RX) ? "RX" : "TX",
915 sk_dump("buf", baddr, len, 128, NULL, 0));
916 }
917
918 /*
919 * Like m_copydata_sum(), but works on a destination kernel packet.
920 */
921 static inline uint32_t
m_copypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint32_t len,boolean_t do_cscum)922 m_copypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
923 uint32_t len, boolean_t do_cscum)
924 {
925 boolean_t needs_swap, started_on_odd = FALSE;
926 int off0 = soff;
927 uint32_t len0 = len;
928 struct mbuf *m0 = m;
929 uint32_t sum = 0, partial;
930 unsigned count0, count, odd, mlen_copied;
931 uint8_t *sbaddr = NULL, *dbaddr = NULL;
932 uint16_t dlen0, dlim, dbcnt = __packet_get_buflet_count(dph);
933 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
934 kern_buflet_t dbuf = NULL, dbufp = NULL;
935
936 while (soff > 0) {
937 if (__improbable(m == NULL)) {
938 panic("%s: invalid mbuf chain %p [off %d, len %d]",
939 __func__, m0, off0, len0);
940 /* NOTREACHED */
941 __builtin_unreachable();
942 }
943 if (soff < m->m_len) {
944 break;
945 }
946 soff -= m->m_len;
947 m = m->m_next;
948 }
949
950 if (__improbable(m == NULL)) {
951 panic("%s: invalid mbuf chain %p [off %d, len %d]",
952 __func__, m0, off0, len0);
953 /* NOTREACHED */
954 __builtin_unreachable();
955 }
956
957 sbaddr = mtod(m, uint8_t *) + soff;
958 count = m->m_len - soff;
959 mlen_copied = 0;
960
961 while (len != 0) {
962 ASSERT(sbaddr == NULL || dbaddr == NULL);
963 if (sbaddr == NULL) {
964 soff = 0;
965 m = m->m_next;
966 if (__improbable(m == NULL)) {
967 panic("%s: invalid mbuf chain %p [off %d, "
968 "len %d]", __func__, m0, off0, len0);
969 /* NOTREACHED */
970 __builtin_unreachable();
971 }
972 sbaddr = mtod(m, uint8_t *);
973 count = m->m_len;
974 mlen_copied = 0;
975 }
976
977 if (__improbable(count == 0)) {
978 sbaddr = NULL;
979 continue;
980 }
981
982 if (dbaddr == NULL) {
983 if (dbufp != NULL) {
984 __buflet_set_data_length(dbufp, dlen0);
985 }
986
987 PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
988 if (__improbable(dbuf == NULL)) {
989 panic("%s: mbuf too large %p [off %d, "
990 "len %d]", __func__, m0, off0, len0);
991 /* NOTREACHED */
992 __builtin_unreachable();
993 }
994 dbufp = dbuf;
995 dlim = __buflet_get_data_limit(dbuf) - doff;
996 dbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(dbuf) + doff);
997 dlen0 = dlim;
998 doff = 0;
999 }
1000
1001 count = MIN(count, (unsigned)len);
1002 count0 = count = MIN(count, dlim);
1003
1004 if (!do_cscum) {
1005 _pkt_copy(sbaddr, dbaddr, count);
1006 sbaddr += count;
1007 dbaddr += count;
1008 goto skip_csum;
1009 }
1010
1011 partial = 0;
1012 if ((uintptr_t)sbaddr & 1) {
1013 /* Align on word boundary */
1014 started_on_odd = !started_on_odd;
1015 #if BYTE_ORDER == LITTLE_ENDIAN
1016 partial = *sbaddr << 8;
1017 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1018 partial = *sbaddr;
1019 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1020 *dbaddr++ = *sbaddr++;
1021 count -= 1;
1022 }
1023
1024 needs_swap = started_on_odd;
1025 odd = count & 1u;
1026 count -= odd;
1027
1028 if (count) {
1029 partial = __packet_copy_and_sum(sbaddr,
1030 dbaddr, count, partial);
1031 sbaddr += count;
1032 dbaddr += count;
1033 if (__improbable(partial & 0xc0000000)) {
1034 if (needs_swap) {
1035 partial = (partial << 8) +
1036 (partial >> 24);
1037 }
1038 sum += (partial >> 16);
1039 sum += (partial & 0xffff);
1040 partial = 0;
1041 }
1042 }
1043
1044 if (odd) {
1045 #if BYTE_ORDER == LITTLE_ENDIAN
1046 partial += *sbaddr;
1047 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1048 partial += *sbaddr << 8;
1049 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1050 *dbaddr++ = *sbaddr++;
1051 started_on_odd = !started_on_odd;
1052 }
1053
1054 if (needs_swap) {
1055 partial = (partial << 8) + (partial >> 24);
1056 }
1057 sum += (partial >> 16) + (partial & 0xffff);
1058 /*
1059 * Reduce sum to allow potential byte swap
1060 * in the next iteration without carry.
1061 */
1062 sum = (sum >> 16) + (sum & 0xffff);
1063
1064 skip_csum:
1065 dlim -= count0;
1066 len -= count0;
1067 mlen_copied += count0;
1068
1069 if (dlim == 0) {
1070 dbaddr = NULL;
1071 }
1072
1073 count = m->m_len - soff - mlen_copied;
1074 if (count == 0) {
1075 sbaddr = NULL;
1076 }
1077 }
1078
1079 ASSERT(len == 0);
1080 ASSERT(dbuf != NULL);
1081 __buflet_set_data_length(dbuf, (dlen0 - dlim));
1082
1083 if (!do_cscum) {
1084 return 0;
1085 }
1086
1087 /* Final fold (reduce 32-bit to 16-bit) */
1088 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
1089 sum = (sum >> 16) + (sum & 0xffff);
1090 return sum;
1091 }
1092
1093 /*
1094 * This is a multi-buflet variant of pkt_copy_from_mbuf().
1095 *
1096 * start/stuff is relative to moff, within [0, len], such that
1097 * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
1098 */
1099 void
pkt_copy_multi_buflet_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1100 pkt_copy_multi_buflet_from_mbuf(const enum txrx t, kern_packet_t ph,
1101 const uint16_t poff, struct mbuf *m, const uint16_t moff,
1102 const uint32_t len, const boolean_t copysum, const uint16_t start)
1103 {
1104 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1105 uint32_t partial;
1106 uint16_t csum = 0;
1107 uint8_t *baddr;
1108
1109 _CASSERT(sizeof(csum) == sizeof(uint16_t));
1110
1111 /* get buffer address from packet */
1112 MD_BUFLET_ADDR_ABS(pkt, baddr);
1113 ASSERT(baddr != NULL);
1114 baddr += poff;
1115 VERIFY((poff + len) <= (pkt->pkt_qum.qum_pp->pp_buflet_size *
1116 __packet_get_buflet_count(ph)));
1117
1118 switch (t) {
1119 case NR_RX:
1120 pkt->pkt_csum_flags = 0;
1121 if (__probable(copysum)) {
1122 /*
1123 * Use m_copydata() to copy the portion up to the
1124 * point where we need to start the checksum, and
1125 * copy the remainder, checksumming as we go.
1126 */
1127 if (start != 0) {
1128 m_copydata(m, moff, start, baddr);
1129 }
1130 partial = m_copypkt_sum(m, start, ph, (poff + start),
1131 (len - start), TRUE);
1132 csum = __packet_fold_sum(partial);
1133 __packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
1134 start, csum, FALSE);
1135 METADATA_ADJUST_LEN(pkt, start, poff);
1136 } else {
1137 (void) m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1138 pkt->pkt_csum_rx_start_off = 0;
1139 pkt->pkt_csum_rx_value = 0;
1140 }
1141 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1142 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1143 sk_proc_name_address(current_proc()),
1144 sk_proc_pid(current_proc()), len,
1145 (copysum ? (len - start) : 0), csum, start);
1146 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1147 " mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
1148 SK_KVA(m), m->m_pkthdr.csum_flags,
1149 (uint32_t)m->m_pkthdr.csum_rx_start,
1150 (uint32_t)m->m_pkthdr.csum_rx_val);
1151 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1152 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1153 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1154 (uint32_t)pkt->pkt_csum_rx_start_off,
1155 (uint32_t)pkt->pkt_csum_rx_value);
1156 break;
1157
1158 case NR_TX:
1159 if (__probable(copysum)) {
1160 uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
1161 /*
1162 * Use m_copydata() to copy the portion up to the
1163 * point where we need to start the checksum, and
1164 * copy the remainder, checksumming as we go.
1165 */
1166 if (start != 0) {
1167 m_copydata(m, moff, start, baddr);
1168 }
1169 partial = m_copypkt_sum(m, start, ph, (poff + start),
1170 (len - start), TRUE);
1171 csum = __packet_fold_sum_final(partial);
1172
1173 /*
1174 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
1175 * ideally we'd only test for CSUM_ZERO_INVERT
1176 * here, but catch cases where the originator
1177 * did not set it for UDP.
1178 */
1179 if (csum == 0 && (m->m_pkthdr.csum_flags &
1180 (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
1181 csum = 0xffff;
1182 }
1183
1184 /* Insert checksum into packet */
1185 ASSERT(stuff <= (len - sizeof(csum)));
1186 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
1187 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
1188 } else {
1189 bcopy((void *)&csum, baddr + stuff,
1190 sizeof(csum));
1191 }
1192 METADATA_ADJUST_LEN(pkt, start, poff);
1193 } else {
1194 m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1195 }
1196 pkt->pkt_csum_flags = 0;
1197 pkt->pkt_csum_tx_start_off = 0;
1198 pkt->pkt_csum_tx_stuff_off = 0;
1199
1200 /* translate mbuf metadata */
1201 pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
1202 pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
1203 pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
1204 switch (m->m_pkthdr.pkt_proto) {
1205 case IPPROTO_QUIC:
1206 pkt->pkt_flow_ip_proto = IPPROTO_UDP;
1207 pkt->pkt_transport_protocol = IPPROTO_QUIC;
1208 break;
1209
1210 default:
1211 pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
1212 pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
1213 break;
1214 }
1215 (void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
1216 pkt->pkt_svc_class = m_get_service_class(m);
1217 pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
1218 pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
1219 if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
1220 pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
1221 }
1222 necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
1223 pkt->pkt_policy_id =
1224 (uint32_t)necp_get_policy_id_from_packet(m);
1225
1226 if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
1227 if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
1228 __packet_set_tx_completion_data(ph,
1229 m->m_pkthdr.drv_tx_compl_arg,
1230 m->m_pkthdr.drv_tx_compl_data);
1231 }
1232 pkt->pkt_tx_compl_context =
1233 m->m_pkthdr.pkt_compl_context;
1234 pkt->pkt_tx_compl_callbacks =
1235 m->m_pkthdr.pkt_compl_callbacks;
1236 /*
1237 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
1238 * mbuf can no longer trigger a completion callback.
1239 * callback will be invoked when the kernel packet is
1240 * completed.
1241 */
1242 m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
1243
1244 m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
1245 }
1246
1247 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1248 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1249 sk_proc_name_address(current_proc()),
1250 sk_proc_pid(current_proc()), len,
1251 (copysum ? (len - start) : 0), csum, start);
1252 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1253 " mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
1254 SK_KVA(m), m->m_pkthdr.csum_flags,
1255 (uint32_t)m->m_pkthdr.csum_tx_start,
1256 (uint32_t)m->m_pkthdr.csum_tx_stuff);
1257 break;
1258
1259 default:
1260 VERIFY(0);
1261 /* NOTREACHED */
1262 __builtin_unreachable();
1263 }
1264
1265 if (m->m_flags & M_BCAST) {
1266 __packet_set_link_broadcast(ph);
1267 } else if (m->m_flags & M_MCAST) {
1268 __packet_set_link_multicast(ph);
1269 }
1270
1271 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1272 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1273 (t == NR_RX) ? "RX" : "TX",
1274 sk_dump("buf", baddr, len, 128, NULL, 0));
1275 }
1276
1277 /*
1278 * This routine is used for copying from a packet originating from a native
1279 * skywalk interface to an mbuf destined for the host legacy stack (NR_RX),
1280 * as well as for mbufs destined for the compat network interfaces (NR_TX).
1281 *
1282 * Note that this routine does not alter m_data pointer of the mbuf, as the
1283 * caller may want to use the original value upon return. We do, however,
1284 * adjust the length to reflect the total data span.
1285 *
1286 * start/stuff is relative to poff, within [0, len], such that
1287 * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1288 */
1289 void
pkt_copy_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1290 pkt_copy_to_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
1291 struct mbuf *m, const uint16_t moff, const uint32_t len,
1292 const boolean_t copysum, const uint16_t start)
1293 {
1294 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1295 uint32_t partial;
1296 uint16_t csum = 0;
1297 uint8_t *baddr;
1298 uint8_t *dp;
1299
1300 _CASSERT(sizeof(csum) == sizeof(uint16_t));
1301
1302 /* get buffer address from packet */
1303 MD_BUFLET_ADDR_ABS(pkt, baddr);
1304 ASSERT(baddr != NULL);
1305 baddr += poff;
1306 VERIFY((poff + len) <= pkt->pkt_qum.qum_pp->pp_buflet_size);
1307
1308 ASSERT((m->m_flags & M_PKTHDR) && m->m_next == NULL);
1309 m->m_data += moff;
1310 dp = (uint8_t *)m->m_data;
1311 VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1312 (uint32_t)mbuf_maxlen(m));
1313 m->m_len += len;
1314 m->m_pkthdr.len += len;
1315 VERIFY(m->m_len == m->m_pkthdr.len &&
1316 (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1317
1318 switch (t) {
1319 case NR_RX:
1320 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1321 if (__probable(copysum)) {
1322 /*
1323 * Use pkt_copy() to copy the portion up to the
1324 * point where we need to start the checksum, and
1325 * copy the remainder, checksumming as we go.
1326 */
1327 if (__probable(start != 0)) {
1328 _pkt_copy(baddr, dp, start);
1329 }
1330 partial = __packet_copy_and_sum((baddr + start),
1331 (dp + start), (len - start), 0);
1332 csum = __packet_fold_sum(partial);
1333
1334 m->m_pkthdr.csum_flags |=
1335 (CSUM_DATA_VALID | CSUM_PARTIAL);
1336 m->m_pkthdr.csum_rx_start = start;
1337 m->m_pkthdr.csum_rx_val = csum;
1338 } else {
1339 _pkt_copy(baddr, dp, len);
1340 m->m_pkthdr.csum_rx_start = 0;
1341 m->m_pkthdr.csum_rx_val = 0;
1342 }
1343
1344 /* translate packet metadata */
1345 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1346 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1347
1348 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1349 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1350 sk_proc_name_address(current_proc()),
1351 sk_proc_pid(current_proc()), len,
1352 (copysum ? (len - start) : 0), csum, start);
1353 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1354 " mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1355 SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1356 (uint32_t)m->m_pkthdr.csum_rx_start,
1357 (uint32_t)m->m_pkthdr.csum_rx_val);
1358 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1359 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1360 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1361 (uint32_t)pkt->pkt_csum_rx_start_off,
1362 (uint32_t)pkt->pkt_csum_rx_value);
1363 break;
1364
1365 case NR_TX:
1366 if (__probable(copysum)) {
1367 uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1368 /*
1369 * Use pkt_copy() to copy the portion up to the
1370 * point where we need to start the checksum, and
1371 * copy the remainder, checksumming as we go.
1372 */
1373 if (__probable(start != 0)) {
1374 _pkt_copy(baddr, dp, start);
1375 }
1376 partial = __packet_copy_and_sum((baddr + start),
1377 (dp + start), (len - start), 0);
1378 csum = __packet_fold_sum_final(partial);
1379
1380 /* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1381 if (csum == 0 &&
1382 (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1383 csum = 0xffff;
1384 }
1385
1386 /* Insert checksum into packet */
1387 ASSERT(stuff <= (len - sizeof(csum)));
1388 if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1389 *(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1390 } else {
1391 bcopy((void *)&csum, dp + stuff, sizeof(csum));
1392 }
1393 } else {
1394 _pkt_copy(baddr, dp, len);
1395 }
1396 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1397 m->m_pkthdr.csum_tx_start = 0;
1398 m->m_pkthdr.csum_tx_stuff = 0;
1399
1400 /* translate packet metadata */
1401 m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1402 m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1403 m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1404 m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1405 m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1406 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1407 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1408 m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1409 m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1410 if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1411 m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1412 }
1413
1414 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1415 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1416 sk_proc_name_address(current_proc()),
1417 sk_proc_pid(current_proc()), len,
1418 (copysum ? (len - start) : 0), csum, start);
1419 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1420 " pkt 0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1421 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1422 (uint32_t)pkt->pkt_csum_tx_start_off,
1423 (uint32_t)pkt->pkt_csum_tx_stuff_off);
1424 break;
1425
1426 default:
1427 VERIFY(0);
1428 /* NOTREACHED */
1429 __builtin_unreachable();
1430 }
1431
1432 if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1433 m->m_flags |= M_BCAST;
1434 } else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1435 m->m_flags |= M_MCAST;
1436 }
1437 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1438 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1439 (t == NR_RX) ? "RX" : "TX",
1440 sk_dump("buf", (uint8_t *)dp, m->m_pkthdr.len, 128, NULL, 0));
1441 }
1442
1443 /*
1444 * This is a multi-buflet variant of pkt_copy_to_mbuf().
1445 * NOTE: poff is the offset within the packet.
1446 *
1447 * start/stuff is relative to poff, within [0, len], such that
1448 * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1449 */
1450 void
pkt_copy_multi_buflet_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1451 pkt_copy_multi_buflet_to_mbuf(const enum txrx t, kern_packet_t ph,
1452 const uint16_t poff, struct mbuf *m, const uint16_t moff,
1453 const uint32_t len, const boolean_t copysum, const uint16_t start)
1454 {
1455 #pragma unused(moff) /* may be PROC_NULL */
1456 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1457 uint32_t partial;
1458 uint16_t csum = 0;
1459 uint8_t *baddr;
1460 uint8_t *dp;
1461
1462 _CASSERT(sizeof(csum) == sizeof(uint16_t));
1463
1464 /* get buffer address from packet */
1465 MD_BUFLET_ADDR_ABS(pkt, baddr);
1466 ASSERT(baddr != NULL);
1467 baddr += poff;
1468 VERIFY((poff + len) <= (pkt->pkt_qum.qum_pp->pp_buflet_size *
1469 __packet_get_buflet_count(ph)));
1470
1471 ASSERT((m->m_flags & M_PKTHDR) && m->m_next == NULL);
1472 m->m_data += moff;
1473 dp = (uint8_t *)m->m_data;
1474 VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1475 (uint32_t)mbuf_maxlen(m));
1476 m->m_len += len;
1477 m->m_pkthdr.len += len;
1478 VERIFY(m->m_len == m->m_pkthdr.len &&
1479 (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1480
1481 switch (t) {
1482 case NR_RX:
1483 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1484 if (__probable(copysum)) {
1485 /*
1486 * Use pkt_copy() to copy the portion up to the
1487 * point where we need to start the checksum, and
1488 * copy the remainder, checksumming as we go.
1489 */
1490 if (__probable(start != 0)) {
1491 _pkt_copy(baddr, dp, start);
1492 }
1493 partial = _pkt_copyaddr_sum(ph, (poff + start),
1494 (dp + start), (len - start), TRUE, 0, NULL);
1495 csum = __packet_fold_sum(partial);
1496
1497 m->m_pkthdr.csum_flags |=
1498 (CSUM_DATA_VALID | CSUM_PARTIAL);
1499 m->m_pkthdr.csum_rx_start = start;
1500 m->m_pkthdr.csum_rx_val = csum;
1501 } else {
1502 (void) _pkt_copyaddr_sum(ph, poff, dp, len, FALSE, 0, NULL);
1503 m->m_pkthdr.csum_rx_start = 0;
1504 m->m_pkthdr.csum_rx_val = 0;
1505 }
1506
1507 /* translate packet metadata */
1508 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1509 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1510
1511 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1512 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1513 sk_proc_name_address(current_proc()),
1514 sk_proc_pid(current_proc()), len,
1515 (copysum ? (len - start) : 0), csum, start);
1516 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1517 " mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1518 SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1519 (uint32_t)m->m_pkthdr.csum_rx_start,
1520 (uint32_t)m->m_pkthdr.csum_rx_val);
1521 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1522 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1523 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1524 (uint32_t)pkt->pkt_csum_rx_start_off,
1525 (uint32_t)pkt->pkt_csum_rx_value);
1526 break;
1527
1528 case NR_TX:
1529 if (__probable(copysum)) {
1530 uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1531 /*
1532 * Use pkt_copy() to copy the portion up to the
1533 * point where we need to start the checksum, and
1534 * copy the remainder, checksumming as we go.
1535 */
1536 if (__probable(start != 0)) {
1537 _pkt_copy(baddr, dp, start);
1538 }
1539 partial = _pkt_copyaddr_sum(ph, (poff + start),
1540 (dp + start), (len - start), TRUE, 0, NULL);
1541 csum = __packet_fold_sum_final(partial);
1542
1543 /* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1544 if (csum == 0 &&
1545 (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1546 csum = 0xffff;
1547 }
1548
1549 /* Insert checksum into packet */
1550 ASSERT(stuff <= (len - sizeof(csum)));
1551 if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1552 *(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1553 } else {
1554 bcopy((void *)&csum, dp + stuff, sizeof(csum));
1555 }
1556 } else {
1557 (void) _pkt_copyaddr_sum(ph, poff, dp, len, FALSE, 0, NULL);
1558 }
1559 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1560 m->m_pkthdr.csum_tx_start = 0;
1561 m->m_pkthdr.csum_tx_stuff = 0;
1562
1563 /* translate packet metadata */
1564 m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1565 m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1566 m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1567 m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1568 m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1569 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1570 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1571 m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1572 m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1573 if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1574 m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1575 }
1576
1577 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1578 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1579 sk_proc_name_address(current_proc()),
1580 sk_proc_pid(current_proc()), len,
1581 (copysum ? (len - start) : 0), csum, start);
1582 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1583 " pkt 0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1584 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1585 (uint32_t)pkt->pkt_csum_tx_start_off,
1586 (uint32_t)pkt->pkt_csum_tx_stuff_off);
1587 break;
1588
1589 default:
1590 VERIFY(0);
1591 /* NOTREACHED */
1592 __builtin_unreachable();
1593 }
1594
1595 if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1596 m->m_flags |= M_BCAST;
1597 } else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1598 m->m_flags |= M_MCAST;
1599 }
1600 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1601 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1602 (t == NR_RX) ? "RX" : "TX",
1603 sk_dump("buf", (uint8_t *)dp, m->m_pkthdr.len, 128, NULL, 0));
1604 }
1605
1606 /*
1607 * Like m_copydata(), but computes 16-bit sum as the data is copied.
1608 * Caller can provide an initial sum to be folded into the computed
1609 * sum. The accumulated partial sum (32-bit) is returned to caller;
1610 * caller is responsible for further reducing it to 16-bit if needed,
1611 * as well as to perform the final 1's complement on it.
1612 */
1613 uint32_t
m_copydata_sum(struct mbuf * m,int off,int len,void * vp,uint32_t initial_sum,boolean_t * odd_start)1614 m_copydata_sum(struct mbuf *m, int off, int len, void *vp, uint32_t initial_sum,
1615 boolean_t *odd_start)
1616 {
1617 boolean_t needs_swap, started_on_odd = FALSE;
1618 int off0 = off, len0 = len;
1619 struct mbuf *m0 = m;
1620 uint64_t sum, partial;
1621 unsigned count, odd;
1622 char *cp = vp;
1623
1624 if (__improbable(off < 0 || len < 0)) {
1625 panic("%s: invalid offset %d or len %d", __func__, off, len);
1626 /* NOTREACHED */
1627 __builtin_unreachable();
1628 }
1629
1630 while (off > 0) {
1631 if (__improbable(m == NULL)) {
1632 panic("%s: invalid mbuf chain %p [off %d, len %d]",
1633 __func__, m0, off0, len0);
1634 /* NOTREACHED */
1635 __builtin_unreachable();
1636 }
1637 if (off < m->m_len) {
1638 break;
1639 }
1640 off -= m->m_len;
1641 m = m->m_next;
1642 }
1643
1644 if (odd_start) {
1645 started_on_odd = *odd_start;
1646 }
1647 sum = initial_sum;
1648
1649 for (; len > 0; m = m->m_next) {
1650 uint8_t *datap;
1651
1652 if (__improbable(m == NULL)) {
1653 panic("%s: invalid mbuf chain %p [off %d, len %d]",
1654 __func__, m0, off0, len0);
1655 /* NOTREACHED */
1656 __builtin_unreachable();
1657 }
1658
1659 datap = mtod(m, uint8_t *) + off;
1660 count = m->m_len;
1661
1662 if (__improbable(count == 0)) {
1663 continue;
1664 }
1665
1666 count = MIN(count - off, (unsigned)len);
1667 partial = 0;
1668
1669 if ((uintptr_t)datap & 1) {
1670 /* Align on word boundary */
1671 started_on_odd = !started_on_odd;
1672 #if BYTE_ORDER == LITTLE_ENDIAN
1673 partial = *datap << 8;
1674 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1675 partial = *datap;
1676 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1677 *cp++ = *datap++;
1678 count -= 1;
1679 len -= 1;
1680 }
1681
1682 needs_swap = started_on_odd;
1683 odd = count & 1u;
1684 count -= odd;
1685
1686 if (count) {
1687 partial = __packet_copy_and_sum(datap,
1688 cp, count, (uint32_t)partial);
1689 datap += count;
1690 cp += count;
1691 len -= count;
1692 if (__improbable((partial & (3ULL << 62)) != 0)) {
1693 if (needs_swap) {
1694 partial = (partial << 8) +
1695 (partial >> 56);
1696 }
1697 sum += (partial >> 32);
1698 sum += (partial & 0xffffffff);
1699 partial = 0;
1700 }
1701 }
1702
1703 if (odd) {
1704 #if BYTE_ORDER == LITTLE_ENDIAN
1705 partial += *datap;
1706 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1707 partial += *datap << 8;
1708 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1709 *cp++ = *datap++;
1710 len -= 1;
1711 started_on_odd = !started_on_odd;
1712 }
1713 off = 0;
1714
1715 if (needs_swap) {
1716 partial = (partial << 8) + (partial >> 24);
1717 }
1718 sum += (partial >> 32) + (partial & 0xffffffff);
1719 /*
1720 * Reduce sum to allow potential byte swap
1721 * in the next iteration without carry.
1722 */
1723 sum = (sum >> 32) + (sum & 0xffffffff);
1724 }
1725
1726 if (odd_start) {
1727 *odd_start = started_on_odd;
1728 }
1729
1730 /* Final fold (reduce 64-bit to 32-bit) */
1731 sum = (sum >> 32) + (sum & 0xffffffff); /* 33-bit */
1732 sum = (sum >> 16) + (sum & 0xffff); /* 17-bit + carry */
1733
1734 /* return 32-bit partial sum to caller */
1735 return (uint32_t)sum;
1736 }
1737
1738 #if DEBUG || DEVELOPMENT
1739 #define TRAILERS_MAX 16 /* max trailing bytes */
1740 #define TRAILERS_REGEN (64 * 1024) /* regeneration threshold */
1741 static uint8_t tb[TRAILERS_MAX]; /* random trailing bytes */
1742 static uint32_t regen = TRAILERS_REGEN; /* regeneration counter */
1743
1744 uint32_t
pkt_add_trailers(kern_packet_t ph,const uint32_t len,const uint16_t start)1745 pkt_add_trailers(kern_packet_t ph, const uint32_t len, const uint16_t start)
1746 {
1747 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1748 uint32_t extra;
1749 uint8_t *baddr;
1750
1751 /* get buffer address from packet */
1752 MD_BUFLET_ADDR_ABS(pkt, baddr);
1753 ASSERT(baddr != NULL);
1754 ASSERT(len <= pkt->pkt_qum.qum_pp->pp_buflet_size);
1755
1756 extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
1757 if (extra == 0 || extra > sizeof(tb) ||
1758 (len + extra) > pkt->pkt_qum.qum_pp->pp_buflet_size) {
1759 return 0;
1760 }
1761
1762 /* generate random bytes once per TRAILERS_REGEN packets (approx.) */
1763 if (regen++ == TRAILERS_REGEN) {
1764 read_frandom(&tb[0], sizeof(tb));
1765 regen = 0;
1766 }
1767
1768 bcopy(&tb[0], (baddr + len), extra);
1769
1770 /* recompute partial sum (also to exercise related logic) */
1771 pkt->pkt_csum_flags |= PACKET_CSUM_PARTIAL;
1772 pkt->pkt_csum_rx_value = (uint16_t)__packet_cksum((baddr + start),
1773 ((len + extra) - start), 0);
1774 pkt->pkt_csum_rx_start_off = start;
1775
1776 return extra;
1777 }
1778
1779 uint32_t
pkt_add_trailers_mbuf(struct mbuf * m,const uint16_t start)1780 pkt_add_trailers_mbuf(struct mbuf *m, const uint16_t start)
1781 {
1782 uint32_t extra;
1783
1784 extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
1785 if (extra == 0 || extra > sizeof(tb)) {
1786 return 0;
1787 }
1788
1789 if (mbuf_copyback(m, m_pktlen(m), extra, &tb[0], M_NOWAIT) != 0) {
1790 return 0;
1791 }
1792
1793 /* generate random bytes once per TRAILERS_REGEN packets (approx.) */
1794 if (regen++ == TRAILERS_REGEN) {
1795 read_frandom(&tb[0], sizeof(tb));
1796 regen = 0;
1797 }
1798
1799 /* recompute partial sum (also to exercise related logic) */
1800 m->m_pkthdr.csum_rx_val = m_sum16(m, start, (m_pktlen(m) - start));
1801 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1802 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
1803 m->m_pkthdr.csum_rx_start = start;
1804
1805 return extra;
1806 }
1807 #endif /* DEBUG || DEVELOPMENT */
1808
1809 void
pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint16_t len,uint32_t * partial,boolean_t do_csum)1810 pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
1811 uint16_t doff, uint16_t len, uint32_t *partial, boolean_t do_csum)
1812 {
1813 VERIFY(_pkt_copypkt_sum(sph, soff, dph, doff, len, partial, do_csum));
1814 }
1815
1816 uint32_t
pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * dbaddr,uint16_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)1817 pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
1818 uint16_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
1819 {
1820 return _pkt_copyaddr_sum(sph, soff, dbaddr, len, do_csum, initial_sum, odd_start);
1821 }
1822
1823 uint32_t
pkt_mcopypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint16_t len,boolean_t do_cscum)1824 pkt_mcopypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
1825 uint16_t len, boolean_t do_cscum)
1826 {
1827 return m_copypkt_sum(m, soff, dph, doff, len, do_cscum);
1828 }
1829
1830 void
pkt_copy(void * src,void * dst,size_t len)1831 pkt_copy(void *src, void *dst, size_t len)
1832 {
1833 return _pkt_copy(src, dst, len);
1834 }
1835