1 /*
2 * Copyright (c) 2017-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <skywalk/os_skywalk_private.h>
30 #include <machine/endian.h>
31 #include <net/necp.h>
32
33 #if (DEVELOPMENT || DEBUG)
34 SYSCTL_NODE(_kern_skywalk, OID_AUTO, packet,
35 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Skywalk packet");
36 int pkt_trailers = 0; /* for testing trailing bytes */
37 SYSCTL_INT(_kern_skywalk_packet, OID_AUTO, trailers,
38 CTLFLAG_RW | CTLFLAG_LOCKED, &pkt_trailers, 0, "");
39 #endif /* !DEVELOPMENT && !DEBUG */
40
41
42 __attribute__((always_inline))
43 static inline void
_pkt_copy(void * src,void * dst,size_t len)44 _pkt_copy(void *src, void *dst, size_t len)
45 {
46 if (__probable(IS_P2ALIGNED(src, 8) && IS_P2ALIGNED(dst, 8))) {
47 switch (len) {
48 case 20: /* standard IPv4 header */
49 sk_copy64_20(src, dst);
50 return;
51
52 case 40: /* IPv6 header */
53 sk_copy64_40(src, dst);
54 return;
55
56 default:
57 if (IS_P2ALIGNED(len, 64)) {
58 sk_copy64_64x(src, dst, len);
59 return;
60 } else if (IS_P2ALIGNED(len, 32)) {
61 sk_copy64_32x(src, dst, len);
62 return;
63 } else if (IS_P2ALIGNED(len, 8)) {
64 sk_copy64_8x(src, dst, len);
65 return;
66 } else if (IS_P2ALIGNED(len, 4)) {
67 sk_copy64_4x(src, dst, len);
68 return;
69 }
70 break;
71 }
72 }
73 bcopy(src, dst, len);
74 }
75
76 /*
77 * This routine is used for copying data across two kernel packets.
78 * Can also optionally compute 16-bit partial inet checksum as the
79 * data is copied.
80 * This routine is used by flowswitch while copying packet from vp
81 * adapter pool to packet in native netif pool and vice-a-versa.
82 *
83 * start/stuff is relative to soff, within [0, len], such that
84 * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
85 */
86 void
pkt_copy_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)87 pkt_copy_from_pkt(const enum txrx t, kern_packet_t dph, const uint16_t doff,
88 kern_packet_t sph, const uint16_t soff, const uint32_t len,
89 const boolean_t copysum, const uint16_t start, const uint16_t stuff,
90 const boolean_t invert)
91 {
92 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
93 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
94 uint32_t partial;
95 uint16_t csum = 0;
96 uint8_t *sbaddr, *dbaddr;
97 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
98
99 _CASSERT(sizeof(csum) == sizeof(uint16_t));
100
101 /* get buffer address from packet */
102 MD_BUFLET_ADDR_ABS(spkt, sbaddr);
103 ASSERT(sbaddr != NULL);
104 sbaddr += soff;
105 MD_BUFLET_ADDR_ABS(dpkt, dbaddr);
106 ASSERT(dbaddr != NULL);
107 dbaddr += doff;
108 VERIFY((doff + len) <= PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
109
110 switch (t) {
111 case NR_RX:
112 dpkt->pkt_csum_flags = 0;
113 if (__probable(do_sum)) {
114 /*
115 * Use pkt_copy() to copy the portion up to the
116 * point where we need to start the checksum, and
117 * copy the remainder, checksumming as we go.
118 */
119 if (__probable(start != 0)) {
120 _pkt_copy(sbaddr, dbaddr, start);
121 }
122 partial = __packet_copy_and_sum((sbaddr + start),
123 (dbaddr + start), (len - start), 0);
124 csum = __packet_fold_sum(partial);
125
126 __packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
127 start, csum, FALSE);
128 } else {
129 _pkt_copy(sbaddr, dbaddr, len);
130 dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
131 dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
132 dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
133 }
134
135 SK_DF(SK_VERB_COPY | SK_VERB_RX,
136 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
137 sk_proc_name_address(current_proc()),
138 sk_proc_pid(current_proc()), len,
139 (copysum ? (len - start) : 0), csum, start);
140 SK_DF(SK_VERB_COPY | SK_VERB_RX,
141 " pkt 0x%llx doff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
142 SK_KVA(dpkt), doff, dpkt->pkt_csum_flags,
143 (uint32_t)dpkt->pkt_csum_rx_start_off,
144 (uint32_t)dpkt->pkt_csum_rx_value);
145 break;
146
147 case NR_TX:
148 if (__probable(copysum)) {
149 /*
150 * Use pkt_copy() to copy the portion up to the
151 * point where we need to start the checksum, and
152 * copy the remainder, checksumming as we go.
153 */
154 if (__probable(start != 0)) {
155 _pkt_copy(sbaddr, dbaddr, start);
156 }
157 partial = __packet_copy_and_sum((sbaddr + start),
158 (dbaddr + start), (len - start), 0);
159 csum = __packet_fold_sum_final(partial);
160
161 /* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
162 if (csum == 0 && invert) {
163 csum = 0xffff;
164 }
165
166 /* Insert checksum into packet */
167 ASSERT(stuff <= (len - sizeof(csum)));
168 if (IS_P2ALIGNED(dbaddr + stuff, sizeof(csum))) {
169 *(uint16_t *)(uintptr_t)(dbaddr + stuff) = csum;
170 } else {
171 bcopy((void *)&csum, dbaddr + stuff,
172 sizeof(csum));
173 }
174 } else {
175 _pkt_copy(sbaddr, dbaddr, len);
176 }
177 dpkt->pkt_csum_flags = spkt->pkt_csum_flags & (PACKET_CSUM_TSO_FLAGS);
178 dpkt->pkt_csum_tx_start_off = 0;
179 dpkt->pkt_csum_tx_stuff_off = 0;
180
181 SK_DF(SK_VERB_COPY | SK_VERB_TX,
182 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
183 sk_proc_name_address(current_proc()),
184 sk_proc_pid(current_proc()), len,
185 (copysum ? (len - start) : 0), csum, start);
186 break;
187
188 default:
189 VERIFY(0);
190 /* NOTREACHED */
191 __builtin_unreachable();
192 }
193 METADATA_ADJUST_LEN(dpkt, len, doff);
194
195 SK_DF(SK_VERB_COPY | SK_VERB_DUMP, "%s(%d) %s %s",
196 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
197 (t == NR_RX) ? "RX" : "TX",
198 sk_dump("buf", dbaddr, len, 128, NULL, 0));
199 }
200
201 /*
202 * NOTE: soff is the offset within the packet
203 * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
204 * caller is responsible for further reducing it to 16-bit if needed,
205 * as well as to perform the final 1's complement on it.
206 */
207 uint32_t static inline
_pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)208 _pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
209 uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
210 {
211 uint8_t odd = 0;
212 uint8_t *sbaddr = NULL;
213 uint32_t sum = initial_sum, partial;
214 uint32_t len0 = len;
215 boolean_t needs_swap, started_on_odd = FALSE;
216 uint16_t clen, sboff, sblen, sbcnt, off0 = soff;
217 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
218 kern_buflet_t sbuf = NULL, sbufp = NULL;
219
220 sbcnt = __packet_get_buflet_count(sph);
221
222 if (odd_start) {
223 started_on_odd = *odd_start;
224 }
225
226 /* fastpath (copy+sum, single buflet, even aligned, even length) */
227 if (do_csum && sbcnt == 1 && len != 0) {
228 PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
229 ASSERT(sbuf != NULL);
230 sboff = __buflet_get_data_offset(sbuf);
231 sblen = __buflet_get_data_length(sbuf);
232 ASSERT(sboff <= soff);
233 ASSERT(soff < sboff + sblen);
234 sblen -= (soff - sboff);
235 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
236
237 clen = (uint16_t)MIN(len, sblen);
238
239 if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
240 sum = __packet_copy_and_sum(sbaddr, dbaddr, clen, sum);
241 return __packet_fold_sum(sum);
242 }
243
244 sbaddr = NULL;
245 sbuf = sbufp = NULL;
246 }
247
248 while (len != 0) {
249 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
250 if (__improbable(sbuf == NULL)) {
251 panic("%s: bad packet, 0x%llx [off %d, len %d]",
252 __func__, SK_KVA(spkt), off0, len0);
253 /* NOTREACHED */
254 __builtin_unreachable();
255 }
256 sbufp = sbuf;
257 sboff = __buflet_get_data_offset(sbuf);
258 sblen = __buflet_get_data_length(sbuf);
259 ASSERT((sboff <= soff) && (soff < sboff + sblen));
260 sblen -= (soff - sboff);
261 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
262 soff = 0;
263 clen = (uint16_t)MIN(len, sblen);
264 if (__probable(do_csum)) {
265 partial = 0;
266 if (__improbable((uintptr_t)sbaddr & 1)) {
267 /* Align on word boundary */
268 started_on_odd = !started_on_odd;
269 #if BYTE_ORDER == LITTLE_ENDIAN
270 partial = (uint8_t)*sbaddr << 8;
271 #else /* BYTE_ORDER != LITTLE_ENDIAN */
272 partial = (uint8_t)*sbaddr;
273 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
274 *dbaddr++ = *sbaddr++;
275 sblen -= 1;
276 clen -= 1;
277 len -= 1;
278 }
279 needs_swap = started_on_odd;
280
281 odd = clen & 1u;
282 clen -= odd;
283
284 if (clen != 0) {
285 partial = __packet_copy_and_sum(sbaddr, dbaddr,
286 clen, partial);
287 }
288
289 if (__improbable(partial & 0xc0000000)) {
290 if (needs_swap) {
291 partial = (partial << 8) +
292 (partial >> 24);
293 }
294 sum += (partial >> 16);
295 sum += (partial & 0xffff);
296 partial = 0;
297 }
298 } else {
299 _pkt_copy(sbaddr, dbaddr, clen);
300 }
301
302 dbaddr += clen;
303 sbaddr += clen;
304
305 if (__probable(do_csum)) {
306 if (odd != 0) {
307 #if BYTE_ORDER == LITTLE_ENDIAN
308 partial += (uint8_t)*sbaddr;
309 #else /* BYTE_ORDER != LITTLE_ENDIAN */
310 partial += (uint8_t)*sbaddr << 8;
311 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
312 *dbaddr++ = *sbaddr++;
313 started_on_odd = !started_on_odd;
314 }
315
316 if (needs_swap) {
317 partial = (partial << 8) + (partial >> 24);
318 }
319 sum += (partial >> 16) + (partial & 0xffff);
320 /*
321 * Reduce sum to allow potential byte swap
322 * in the next iteration without carry.
323 */
324 sum = (sum >> 16) + (sum & 0xffff);
325 }
326
327 sblen -= clen + odd;
328 len -= clen + odd;
329 ASSERT(sblen == 0 || len == 0);
330 }
331
332 if (odd_start) {
333 *odd_start = started_on_odd;
334 }
335
336 if (__probable(do_csum)) {
337 /* Final fold (reduce 32-bit to 16-bit) */
338 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
339 sum = (sum >> 16) + (sum & 0xffff);
340 }
341 return sum;
342 }
343
344 /*
345 * NOTE: Caller of this function is responsible to adjust the length and offset
346 * of the first buflet of the destination packet if (doff != 0),
347 * i.e. additional data is being prependend to the packet.
348 * It should also finalize the packet.
349 * To simplify & optimize the routine, we have also assumed that soff & doff
350 * will lie within the first buffer, which is true for the current use cases
351 * where, doff is the offset of the checksum field in the TCP/IP header and
352 * soff is the L3 offset.
353 * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
354 * caller is responsible for further reducing it to 16-bit if needed,
355 * as well as to perform the final 1's complement on it.
356 */
357 static inline boolean_t
_pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint32_t len,uint32_t * csum_partial,boolean_t do_csum)358 _pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
359 uint16_t doff, uint32_t len, uint32_t *csum_partial, boolean_t do_csum)
360 {
361 uint8_t odd = 0;
362 uint32_t sum = 0, partial;
363 boolean_t needs_swap, started_on_odd = FALSE;
364 uint8_t *sbaddr = NULL, *dbaddr = NULL;
365 uint16_t clen, sblen, dlen0, dlim, sbcnt, dbcnt, sboff;
366 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
367 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
368 kern_buflet_t sbuf = NULL, sbufp = NULL, dbuf = NULL, dbufp = NULL;
369
370 ASSERT(csum_partial != NULL || !do_csum);
371 sbcnt = __packet_get_buflet_count(sph);
372 dbcnt = __packet_get_buflet_count(dph);
373
374 while (len != 0) {
375 ASSERT(sbaddr == NULL || dbaddr == NULL);
376 if (sbaddr == NULL) {
377 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
378 if (__improbable(sbuf == NULL)) {
379 break;
380 }
381 sbufp = sbuf;
382 sblen = __buflet_get_data_length(sbuf);
383 sboff = __buflet_get_data_offset(sbuf);
384 ASSERT(soff >= sboff);
385 ASSERT(sboff + sblen > soff);
386 sblen -= (soff - sboff);
387 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
388 soff = 0;
389 }
390
391 if (dbaddr == NULL) {
392 if (dbufp != NULL) {
393 __buflet_set_data_length(dbufp, dlen0);
394 }
395
396 PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
397 if (__improbable(dbuf == NULL)) {
398 break;
399 }
400 dbufp = dbuf;
401 dlim = __buflet_get_data_limit(dbuf);
402 ASSERT(dlim > doff);
403 dlim -= doff;
404 if (doff != 0) {
405 VERIFY(__buflet_set_data_offset(dbuf, doff) == 0);
406 }
407 dbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(dbuf) + doff);
408 dlen0 = dlim;
409 doff = 0;
410 }
411
412 clen = (uint16_t)MIN(len, sblen);
413 clen = MIN(clen, dlim);
414
415 if (__probable(do_csum)) {
416 partial = 0;
417 if (__improbable((uintptr_t)sbaddr & 1)) {
418 /* Align on word boundary */
419 started_on_odd = !started_on_odd;
420 #if BYTE_ORDER == LITTLE_ENDIAN
421 partial = (uint8_t)*sbaddr << 8;
422 #else /* BYTE_ORDER != LITTLE_ENDIAN */
423 partial = (uint8_t)*sbaddr;
424 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
425 *dbaddr++ = *sbaddr++;
426 clen -= 1;
427 dlim -= 1;
428 len -= 1;
429 }
430 needs_swap = started_on_odd;
431
432 odd = clen & 1u;
433 clen -= odd;
434
435 if (clen != 0) {
436 partial = __packet_copy_and_sum(sbaddr, dbaddr,
437 clen, partial);
438 }
439
440 if (__improbable(partial & 0xc0000000)) {
441 if (needs_swap) {
442 partial = (partial << 8) +
443 (partial >> 24);
444 }
445 sum += (partial >> 16);
446 sum += (partial & 0xffff);
447 partial = 0;
448 }
449 } else {
450 _pkt_copy(sbaddr, dbaddr, clen);
451 }
452 sbaddr += clen;
453 dbaddr += clen;
454
455 if (__probable(do_csum)) {
456 if (odd != 0) {
457 #if BYTE_ORDER == LITTLE_ENDIAN
458 partial += (uint8_t)*sbaddr;
459 #else /* BYTE_ORDER != LITTLE_ENDIAN */
460 partial += (uint8_t)*sbaddr << 8;
461 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
462 *dbaddr++ = *sbaddr++;
463 started_on_odd = !started_on_odd;
464 }
465
466 if (needs_swap) {
467 partial = (partial << 8) + (partial >> 24);
468 }
469 sum += (partial >> 16) + (partial & 0xffff);
470 /*
471 * Reduce sum to allow potential byte swap
472 * in the next iteration without carry.
473 */
474 sum = (sum >> 16) + (sum & 0xffff);
475 }
476
477 sblen -= clen + odd;
478 dlim -= clen + odd;
479 len -= clen + odd;
480
481 if (sblen == 0) {
482 sbaddr = NULL;
483 }
484
485 if (dlim == 0) {
486 dbaddr = NULL;
487 }
488 }
489
490 if (__probable(dbuf != NULL)) {
491 __buflet_set_data_length(dbuf, (dlen0 - dlim));
492 }
493 if (__probable(do_csum)) {
494 /* Final fold (reduce 32-bit to 16-bit) */
495 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
496 sum = (sum >> 16) + (sum & 0xffff);
497 *csum_partial = (uint32_t)sum;
498 }
499 return len == 0;
500 }
501
502 uint32_t
pkt_sum(kern_packet_t sph,uint16_t soff,uint16_t len)503 pkt_sum(kern_packet_t sph, uint16_t soff, uint16_t len)
504 {
505 uint8_t odd = 0;
506 uint32_t sum = 0, partial;
507 boolean_t needs_swap, started_on_odd = FALSE;
508 uint8_t *sbaddr = NULL;
509 uint16_t clen, sblen, sbcnt, sboff;
510 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
511 kern_buflet_t sbuf = NULL, sbufp = NULL;
512
513 sbcnt = __packet_get_buflet_count(sph);
514
515 /* fastpath (single buflet, even aligned, even length) */
516 if (sbcnt == 1 && len != 0) {
517 PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
518 ASSERT(sbuf != NULL);
519 sblen = __buflet_get_data_length(sbuf);
520 sboff = __buflet_get_data_offset(sbuf);
521 ASSERT(soff >= sboff);
522 ASSERT(sboff + sblen > soff);
523 sblen -= (soff - sboff);
524 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
525
526 clen = MIN(len, sblen);
527
528 if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
529 sum = __packet_cksum(sbaddr, clen, 0);
530 return __packet_fold_sum(sum);
531 }
532
533 sbaddr = NULL;
534 sbuf = sbufp = NULL;
535 }
536
537 /* slowpath */
538 while (len != 0) {
539 ASSERT(sbaddr == NULL);
540 if (sbaddr == NULL) {
541 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
542 if (__improbable(sbuf == NULL)) {
543 break;
544 }
545 sbufp = sbuf;
546 sblen = __buflet_get_data_length(sbuf);
547 sboff = __buflet_get_data_offset(sbuf);
548 ASSERT(soff >= sboff);
549 ASSERT(sboff + sblen > soff);
550 sblen -= (soff - sboff);
551 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
552 soff = 0;
553 }
554
555 clen = MIN(len, sblen);
556
557 partial = 0;
558 if (__improbable((uintptr_t)sbaddr & 1)) {
559 /* Align on word boundary */
560 started_on_odd = !started_on_odd;
561 #if BYTE_ORDER == LITTLE_ENDIAN
562 partial = (uint8_t)*sbaddr << 8;
563 #else /* BYTE_ORDER != LITTLE_ENDIAN */
564 partial = (uint8_t)*sbaddr;
565 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
566 clen -= 1;
567 len -= 1;
568 }
569 needs_swap = started_on_odd;
570
571 odd = clen & 1u;
572 clen -= odd;
573
574 if (clen != 0) {
575 partial = __packet_cksum(sbaddr,
576 clen, partial);
577 }
578
579 if (__improbable(partial & 0xc0000000)) {
580 if (needs_swap) {
581 partial = (partial << 8) +
582 (partial >> 24);
583 }
584 sum += (partial >> 16);
585 sum += (partial & 0xffff);
586 partial = 0;
587 }
588 sbaddr += clen;
589
590 if (odd != 0) {
591 #if BYTE_ORDER == LITTLE_ENDIAN
592 partial += (uint8_t)*sbaddr;
593 #else /* BYTE_ORDER != LITTLE_ENDIAN */
594 partial += (uint8_t)*sbaddr << 8;
595 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
596 started_on_odd = !started_on_odd;
597 }
598
599 if (needs_swap) {
600 partial = (partial << 8) + (partial >> 24);
601 }
602 sum += (partial >> 16) + (partial & 0xffff);
603 /*
604 * Reduce sum to allow potential byte swap
605 * in the next iteration without carry.
606 */
607 sum = (sum >> 16) + (sum & 0xffff);
608
609 sblen -= clen + odd;
610 len -= clen + odd;
611
612 if (sblen == 0) {
613 sbaddr = NULL;
614 }
615 }
616
617 /* Final fold (reduce 32-bit to 16-bit) */
618 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
619 sum = (sum >> 16) + (sum & 0xffff);
620 return (uint32_t)sum;
621 }
622
623
624 /*
625 * This is a multi-buflet variant of pkt_copy_from_pkt().
626 *
627 * start/stuff is relative to soff, within [0, len], such that
628 * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
629 */
630 void
pkt_copy_multi_buflet_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)631 pkt_copy_multi_buflet_from_pkt(const enum txrx t, kern_packet_t dph,
632 const uint16_t doff, kern_packet_t sph, const uint16_t soff,
633 const uint32_t len, const boolean_t copysum, const uint16_t start,
634 const uint16_t stuff, const boolean_t invert)
635 {
636 boolean_t rc;
637 uint32_t partial;
638 uint16_t csum = 0;
639 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
640 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
641 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
642
643 VERIFY((doff + len) <= (PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp) *
644 __packet_get_buflet_count(dph)));
645
646 switch (t) {
647 case NR_RX:
648 dpkt->pkt_csum_flags = 0;
649 if (__probable(do_sum)) {
650 /*
651 * copy the portion up to the point where we need to
652 * start the checksum, and copy the remainder,
653 * checksumming as we go.
654 */
655 if (__probable(start != 0)) {
656 rc = _pkt_copypkt_sum(sph, soff, dph, doff,
657 start, NULL, FALSE);
658 ASSERT(rc);
659 }
660 _pkt_copypkt_sum(sph, (soff + start), dph,
661 (doff + start), (len - start), &partial, TRUE);
662 csum = __packet_fold_sum(partial);
663 __packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
664 start, csum, FALSE);
665 METADATA_ADJUST_LEN(dpkt, start, doff);
666 } else {
667 rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
668 FALSE);
669 ASSERT(rc);
670 dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
671 dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
672 dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
673 }
674 break;
675
676 case NR_TX:
677 if (__probable(copysum)) {
678 uint8_t *baddr;
679 /*
680 * copy the portion up to the point where we need to
681 * start the checksum, and copy the remainder,
682 * checksumming as we go.
683 */
684 if (__probable(start != 0)) {
685 rc = _pkt_copypkt_sum(sph, soff, dph, doff,
686 start, NULL, FALSE);
687 ASSERT(rc);
688 }
689 rc = _pkt_copypkt_sum(sph, (soff + start), dph,
690 (doff + start), (len - start), &partial, TRUE);
691 ASSERT(rc);
692 csum = __packet_fold_sum_final(partial);
693
694 /* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
695 if (csum == 0 && invert) {
696 csum = 0xffff;
697 }
698
699 /*
700 * Insert checksum into packet.
701 * Here we assume that checksum will be in the
702 * first buffer.
703 */
704 ASSERT((stuff + doff + sizeof(csum)) <=
705 PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
706 ASSERT(stuff <= (len - sizeof(csum)));
707
708 /* get first buflet buffer address from packet */
709 MD_BUFLET_ADDR_ABS(dpkt, baddr);
710 ASSERT(baddr != NULL);
711 baddr += doff;
712 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
713 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
714 } else {
715 bcopy((void *)&csum, baddr + stuff,
716 sizeof(csum));
717 }
718 METADATA_ADJUST_LEN(dpkt, start, doff);
719 } else {
720 rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
721 FALSE);
722 ASSERT(rc);
723 }
724 dpkt->pkt_csum_flags = spkt->pkt_csum_flags & (PACKET_CSUM_TSO_FLAGS);
725 dpkt->pkt_csum_tx_start_off = 0;
726 dpkt->pkt_csum_tx_stuff_off = 0;
727 break;
728
729 default:
730 VERIFY(0);
731 /* NOTREACHED */
732 __builtin_unreachable();
733 }
734 }
735
736 /*
737 * This routine is used for copying an mbuf which originated in the host
738 * stack destined to a native skywalk interface (NR_TX), as well as for
739 * mbufs originating on compat network interfaces (NR_RX).
740 *
741 * start/stuff is relative to moff, within [0, len], such that
742 * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
743 */
744 void
pkt_copy_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)745 pkt_copy_from_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
746 struct mbuf *m, const uint16_t moff, const uint32_t len,
747 const boolean_t copysum, const uint16_t start)
748 {
749 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
750 uint32_t partial;
751 uint16_t csum = 0;
752 uint8_t *baddr;
753
754 _CASSERT(sizeof(csum) == sizeof(uint16_t));
755
756 /* get buffer address from packet */
757 MD_BUFLET_ADDR_ABS(pkt, baddr);
758 ASSERT(baddr != NULL);
759 baddr += poff;
760 VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
761
762 switch (t) {
763 case NR_RX:
764 pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
765 pkt->pkt_csum_rx_start_off = 0;
766 pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
767 pkt->pkt_svc_class = m_get_service_class(m);
768 if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
769 != CSUM_RX_FULL_FLAGS) && copysum)) {
770 /*
771 * Use m_copydata() to copy the portion up to the
772 * point where we need to start the checksum, and
773 * copy the remainder, checksumming as we go.
774 */
775 if (start != 0) {
776 m_copydata(m, moff, start, baddr);
777 }
778 partial = m_copydata_sum(m, start, (len - start),
779 (baddr + start), 0, NULL);
780 csum = __packet_fold_sum(partial);
781
782 __packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
783 start, csum, FALSE);
784 } else {
785 m_copydata(m, moff, len, baddr);
786 }
787 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
788 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
789 sk_proc_name_address(current_proc()),
790 sk_proc_pid(current_proc()), len,
791 (copysum ? (len - start) : 0), csum, start);
792 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
793 " mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
794 SK_KVA(m), m->m_pkthdr.csum_flags,
795 (uint32_t)m->m_pkthdr.csum_rx_start,
796 (uint32_t)m->m_pkthdr.csum_rx_val);
797 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
798 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
799 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
800 (uint32_t)pkt->pkt_csum_rx_start_off,
801 (uint32_t)pkt->pkt_csum_rx_value);
802 break;
803
804 case NR_TX:
805 if (__probable(copysum)) {
806 uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
807 /*
808 * Use m_copydata() to copy the portion up to the
809 * point where we need to start the checksum, and
810 * copy the remainder, checksumming as we go.
811 */
812 if (start != 0) {
813 m_copydata(m, moff, start, baddr);
814 }
815 partial = m_copydata_sum(m, start, (len - start),
816 (baddr + start), 0, NULL);
817 csum = __packet_fold_sum_final(partial);
818
819 /*
820 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
821 * ideally we'd only test for CSUM_ZERO_INVERT
822 * here, but catch cases where the originator
823 * did not set it for UDP.
824 */
825 if (csum == 0 && (m->m_pkthdr.csum_flags &
826 (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
827 csum = 0xffff;
828 }
829
830 /* Insert checksum into packet */
831 ASSERT(stuff <= (len - sizeof(csum)));
832 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
833 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
834 } else {
835 bcopy((void *)&csum, baddr + stuff,
836 sizeof(csum));
837 }
838 } else {
839 m_copydata(m, moff, len, baddr);
840 }
841 pkt->pkt_csum_flags = 0;
842 pkt->pkt_csum_tx_start_off = 0;
843 pkt->pkt_csum_tx_stuff_off = 0;
844
845 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
846 pkt->pkt_csum_flags |= PACKET_TSO_IPV4;
847 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
848 }
849 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
850 pkt->pkt_csum_flags |= PACKET_TSO_IPV6;
851 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
852 }
853
854 /* translate mbuf metadata */
855 pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
856 pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
857 pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
858 pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
859 pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
860 switch (m->m_pkthdr.pkt_proto) {
861 case IPPROTO_QUIC:
862 pkt->pkt_flow_ip_proto = IPPROTO_UDP;
863 pkt->pkt_transport_protocol = IPPROTO_QUIC;
864 break;
865
866 default:
867 pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
868 pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
869 break;
870 }
871 (void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
872 pkt->pkt_svc_class = m_get_service_class(m);
873 pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
874 pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
875 if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
876 pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
877 }
878 if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) != 0) {
879 pkt->pkt_pflags |= PKT_F_L4S;
880 }
881 necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
882 pkt->pkt_policy_id =
883 (uint32_t)necp_get_policy_id_from_packet(m);
884
885 if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
886 if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
887 __packet_set_tx_completion_data(ph,
888 m->m_pkthdr.drv_tx_compl_arg,
889 m->m_pkthdr.drv_tx_compl_data);
890 }
891 pkt->pkt_tx_compl_context =
892 m->m_pkthdr.pkt_compl_context;
893 pkt->pkt_tx_compl_callbacks =
894 m->m_pkthdr.pkt_compl_callbacks;
895 /*
896 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
897 * mbuf can no longer trigger a completion callback.
898 * callback will be invoked when the kernel packet is
899 * completed.
900 */
901 m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
902
903 m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
904 }
905
906 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
907 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
908 sk_proc_name_address(current_proc()),
909 sk_proc_pid(current_proc()), len,
910 (copysum ? (len - start) : 0), csum, start);
911 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
912 " mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
913 SK_KVA(m), m->m_pkthdr.csum_flags,
914 (uint32_t)m->m_pkthdr.csum_tx_start,
915 (uint32_t)m->m_pkthdr.csum_tx_stuff);
916 break;
917
918 default:
919 VERIFY(0);
920 /* NOTREACHED */
921 __builtin_unreachable();
922 }
923 METADATA_ADJUST_LEN(pkt, len, poff);
924
925 if (m->m_flags & M_BCAST) {
926 __packet_set_link_broadcast(ph);
927 } else if (m->m_flags & M_MCAST) {
928 __packet_set_link_multicast(ph);
929 }
930
931 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
932 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
933 (t == NR_RX) ? "RX" : "TX",
934 sk_dump("buf", baddr, len, 128, NULL, 0));
935 }
936
937 /*
938 * Like m_copydata_sum(), but works on a destination kernel packet.
939 */
940 static inline uint32_t
m_copypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint32_t len,boolean_t do_cscum)941 m_copypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
942 uint32_t len, boolean_t do_cscum)
943 {
944 boolean_t needs_swap, started_on_odd = FALSE;
945 int off0 = soff;
946 uint32_t len0 = len;
947 struct mbuf *m0 = m;
948 uint32_t sum = 0, partial;
949 unsigned count0, count, odd, mlen_copied;
950 uint8_t *sbaddr = NULL, *dbaddr = NULL;
951 uint16_t dlen0, dlim, dbcnt = __packet_get_buflet_count(dph);
952 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
953 kern_buflet_t dbuf = NULL, dbufp = NULL;
954
955 while (soff > 0) {
956 if (__improbable(m == NULL)) {
957 panic("%s: invalid mbuf chain %p [off %d, len %d]",
958 __func__, m0, off0, len0);
959 /* NOTREACHED */
960 __builtin_unreachable();
961 }
962 if (soff < m->m_len) {
963 break;
964 }
965 soff -= m->m_len;
966 m = m->m_next;
967 }
968
969 if (__improbable(m == NULL)) {
970 panic("%s: invalid mbuf chain %p [off %d, len %d]",
971 __func__, m0, off0, len0);
972 /* NOTREACHED */
973 __builtin_unreachable();
974 }
975
976 sbaddr = mtod(m, uint8_t *) + soff;
977 count = m->m_len - soff;
978 mlen_copied = 0;
979
980 while (len != 0) {
981 ASSERT(sbaddr == NULL || dbaddr == NULL);
982 if (sbaddr == NULL) {
983 soff = 0;
984 m = m->m_next;
985 if (__improbable(m == NULL)) {
986 panic("%s: invalid mbuf chain %p [off %d, "
987 "len %d]", __func__, m0, off0, len0);
988 /* NOTREACHED */
989 __builtin_unreachable();
990 }
991 sbaddr = mtod(m, uint8_t *);
992 count = m->m_len;
993 mlen_copied = 0;
994 }
995
996 if (__improbable(count == 0)) {
997 sbaddr = NULL;
998 continue;
999 }
1000
1001 if (dbaddr == NULL) {
1002 if (dbufp != NULL) {
1003 __buflet_set_data_length(dbufp, dlen0);
1004 }
1005
1006 PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
1007 if (__improbable(dbuf == NULL)) {
1008 panic("%s: mbuf too large %p [off %d, "
1009 "len %d]", __func__, m0, off0, len0);
1010 /* NOTREACHED */
1011 __builtin_unreachable();
1012 }
1013 dbufp = dbuf;
1014 dlim = __buflet_get_data_limit(dbuf) - doff;
1015 dbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(dbuf) + doff);
1016 dlen0 = dlim;
1017 doff = 0;
1018 }
1019
1020 count = MIN(count, (unsigned)len);
1021 count0 = count = MIN(count, dlim);
1022
1023 if (!do_cscum) {
1024 _pkt_copy(sbaddr, dbaddr, count);
1025 sbaddr += count;
1026 dbaddr += count;
1027 goto skip_csum;
1028 }
1029
1030 partial = 0;
1031 if ((uintptr_t)sbaddr & 1) {
1032 /* Align on word boundary */
1033 started_on_odd = !started_on_odd;
1034 #if BYTE_ORDER == LITTLE_ENDIAN
1035 partial = *sbaddr << 8;
1036 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1037 partial = *sbaddr;
1038 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1039 *dbaddr++ = *sbaddr++;
1040 count -= 1;
1041 }
1042
1043 needs_swap = started_on_odd;
1044 odd = count & 1u;
1045 count -= odd;
1046
1047 if (count) {
1048 partial = __packet_copy_and_sum(sbaddr,
1049 dbaddr, count, partial);
1050 sbaddr += count;
1051 dbaddr += count;
1052 if (__improbable(partial & 0xc0000000)) {
1053 if (needs_swap) {
1054 partial = (partial << 8) +
1055 (partial >> 24);
1056 }
1057 sum += (partial >> 16);
1058 sum += (partial & 0xffff);
1059 partial = 0;
1060 }
1061 }
1062
1063 if (odd) {
1064 #if BYTE_ORDER == LITTLE_ENDIAN
1065 partial += *sbaddr;
1066 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1067 partial += *sbaddr << 8;
1068 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1069 *dbaddr++ = *sbaddr++;
1070 started_on_odd = !started_on_odd;
1071 }
1072
1073 if (needs_swap) {
1074 partial = (partial << 8) + (partial >> 24);
1075 }
1076 sum += (partial >> 16) + (partial & 0xffff);
1077 /*
1078 * Reduce sum to allow potential byte swap
1079 * in the next iteration without carry.
1080 */
1081 sum = (sum >> 16) + (sum & 0xffff);
1082
1083 skip_csum:
1084 dlim -= count0;
1085 len -= count0;
1086 mlen_copied += count0;
1087
1088 if (dlim == 0) {
1089 dbaddr = NULL;
1090 }
1091
1092 count = m->m_len - soff - mlen_copied;
1093 if (count == 0) {
1094 sbaddr = NULL;
1095 }
1096 }
1097
1098 ASSERT(len == 0);
1099 ASSERT(dbuf != NULL);
1100 __buflet_set_data_length(dbuf, (dlen0 - dlim));
1101
1102 if (!do_cscum) {
1103 return 0;
1104 }
1105
1106 /* Final fold (reduce 32-bit to 16-bit) */
1107 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
1108 sum = (sum >> 16) + (sum & 0xffff);
1109 return sum;
1110 }
1111
1112 /*
1113 * This is a multi-buflet variant of pkt_copy_from_mbuf().
1114 *
1115 * start/stuff is relative to moff, within [0, len], such that
1116 * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
1117 */
1118 void
pkt_copy_multi_buflet_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1119 pkt_copy_multi_buflet_from_mbuf(const enum txrx t, kern_packet_t ph,
1120 const uint16_t poff, struct mbuf *m, const uint16_t moff,
1121 const uint32_t len, const boolean_t copysum, const uint16_t start)
1122 {
1123 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1124 uint32_t partial;
1125 uint16_t csum = 0;
1126 uint8_t *baddr;
1127
1128 _CASSERT(sizeof(csum) == sizeof(uint16_t));
1129
1130 /* get buffer address from packet */
1131 MD_BUFLET_ADDR_ABS(pkt, baddr);
1132 ASSERT(baddr != NULL);
1133 baddr += poff;
1134 VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp) *
1135 __packet_get_buflet_count(ph)));
1136
1137 switch (t) {
1138 case NR_RX:
1139 pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
1140 pkt->pkt_csum_rx_start_off = 0;
1141 pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
1142 pkt->pkt_svc_class = m_get_service_class(m);
1143 if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
1144 != CSUM_RX_FULL_FLAGS) && copysum)) {
1145 /*
1146 * Use m_copydata() to copy the portion up to the
1147 * point where we need to start the checksum, and
1148 * copy the remainder, checksumming as we go.
1149 */
1150 if (start != 0) {
1151 m_copydata(m, moff, start, baddr);
1152 }
1153 partial = m_copypkt_sum(m, start, ph, (poff + start),
1154 (len - start), TRUE);
1155 csum = __packet_fold_sum(partial);
1156 __packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
1157 start, csum, FALSE);
1158 METADATA_ADJUST_LEN(pkt, start, poff);
1159 } else {
1160 (void) m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1161 }
1162 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1163 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1164 sk_proc_name_address(current_proc()),
1165 sk_proc_pid(current_proc()), len,
1166 (copysum ? (len - start) : 0), csum, start);
1167 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1168 " mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
1169 SK_KVA(m), m->m_pkthdr.csum_flags,
1170 (uint32_t)m->m_pkthdr.csum_rx_start,
1171 (uint32_t)m->m_pkthdr.csum_rx_val);
1172 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1173 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1174 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1175 (uint32_t)pkt->pkt_csum_rx_start_off,
1176 (uint32_t)pkt->pkt_csum_rx_value);
1177 break;
1178
1179 case NR_TX:
1180 if (__probable(copysum)) {
1181 uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
1182 /*
1183 * Use m_copydata() to copy the portion up to the
1184 * point where we need to start the checksum, and
1185 * copy the remainder, checksumming as we go.
1186 */
1187 if (start != 0) {
1188 m_copydata(m, moff, start, baddr);
1189 }
1190 partial = m_copypkt_sum(m, start, ph, (poff + start),
1191 (len - start), TRUE);
1192 csum = __packet_fold_sum_final(partial);
1193
1194 /*
1195 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
1196 * ideally we'd only test for CSUM_ZERO_INVERT
1197 * here, but catch cases where the originator
1198 * did not set it for UDP.
1199 */
1200 if (csum == 0 && (m->m_pkthdr.csum_flags &
1201 (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
1202 csum = 0xffff;
1203 }
1204
1205 /* Insert checksum into packet */
1206 ASSERT(stuff <= (len - sizeof(csum)));
1207 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
1208 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
1209 } else {
1210 bcopy((void *)&csum, baddr + stuff,
1211 sizeof(csum));
1212 }
1213 METADATA_ADJUST_LEN(pkt, start, poff);
1214 } else {
1215 m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1216 }
1217 pkt->pkt_csum_flags = 0;
1218 pkt->pkt_csum_tx_start_off = 0;
1219 pkt->pkt_csum_tx_stuff_off = 0;
1220
1221 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
1222 pkt->pkt_csum_flags |= PACKET_TSO_IPV4;
1223 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
1224 }
1225 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
1226 pkt->pkt_csum_flags |= PACKET_TSO_IPV6;
1227 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
1228 }
1229
1230 /* translate mbuf metadata */
1231 pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
1232 pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
1233 pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
1234 pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
1235 pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
1236 switch (m->m_pkthdr.pkt_proto) {
1237 case IPPROTO_QUIC:
1238 pkt->pkt_flow_ip_proto = IPPROTO_UDP;
1239 pkt->pkt_transport_protocol = IPPROTO_QUIC;
1240 break;
1241
1242 default:
1243 pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
1244 pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
1245 break;
1246 }
1247 (void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
1248 pkt->pkt_svc_class = m_get_service_class(m);
1249 pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
1250 pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
1251 if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
1252 pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
1253 }
1254 if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) != 0) {
1255 pkt->pkt_pflags |= PKT_F_L4S;
1256 }
1257 necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
1258 pkt->pkt_policy_id =
1259 (uint32_t)necp_get_policy_id_from_packet(m);
1260
1261 if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
1262 if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
1263 __packet_set_tx_completion_data(ph,
1264 m->m_pkthdr.drv_tx_compl_arg,
1265 m->m_pkthdr.drv_tx_compl_data);
1266 }
1267 pkt->pkt_tx_compl_context =
1268 m->m_pkthdr.pkt_compl_context;
1269 pkt->pkt_tx_compl_callbacks =
1270 m->m_pkthdr.pkt_compl_callbacks;
1271 /*
1272 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
1273 * mbuf can no longer trigger a completion callback.
1274 * callback will be invoked when the kernel packet is
1275 * completed.
1276 */
1277 m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
1278
1279 m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
1280 }
1281
1282 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1283 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1284 sk_proc_name_address(current_proc()),
1285 sk_proc_pid(current_proc()), len,
1286 (copysum ? (len - start) : 0), csum, start);
1287 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1288 " mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
1289 SK_KVA(m), m->m_pkthdr.csum_flags,
1290 (uint32_t)m->m_pkthdr.csum_tx_start,
1291 (uint32_t)m->m_pkthdr.csum_tx_stuff);
1292 break;
1293
1294 default:
1295 VERIFY(0);
1296 /* NOTREACHED */
1297 __builtin_unreachable();
1298 }
1299
1300 if (m->m_flags & M_BCAST) {
1301 __packet_set_link_broadcast(ph);
1302 } else if (m->m_flags & M_MCAST) {
1303 __packet_set_link_multicast(ph);
1304 }
1305
1306 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1307 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1308 (t == NR_RX) ? "RX" : "TX",
1309 sk_dump("buf", baddr, len, 128, NULL, 0));
1310 }
1311
1312 /*
1313 * This routine is used for copying from a packet originating from a native
1314 * skywalk interface to an mbuf destined for the host legacy stack (NR_RX),
1315 * as well as for mbufs destined for the compat network interfaces (NR_TX).
1316 *
1317 * Note that this routine does not alter m_data pointer of the mbuf, as the
1318 * caller may want to use the original value upon return. We do, however,
1319 * adjust the length to reflect the total data span.
1320 *
1321 * This routine supports copying into an mbuf chain for RX but not TX.
1322 *
1323 * start/stuff is relative to poff, within [0, len], such that
1324 * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1325 */
1326 void
pkt_copy_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1327 pkt_copy_to_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
1328 struct mbuf *m, const uint16_t moff, const uint32_t len,
1329 const boolean_t copysum, const uint16_t start)
1330 {
1331 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1332 struct mbuf *curr_m;
1333 uint32_t partial = 0;
1334 uint32_t remaining_len = len, copied_len = 0;
1335 uint16_t csum = 0;
1336 uint8_t *baddr;
1337 uint8_t *dp;
1338 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1339
1340 ASSERT(len >= start);
1341 _CASSERT(sizeof(csum) == sizeof(uint16_t));
1342
1343 /* get buffer address from packet */
1344 MD_BUFLET_ADDR_ABS(pkt, baddr);
1345 ASSERT(baddr != NULL);
1346 baddr += poff;
1347 VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
1348
1349 ASSERT((m->m_flags & M_PKTHDR));
1350 m->m_data += moff;
1351
1352 switch (t) {
1353 case NR_RX:
1354 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1355
1356 /*
1357 * Use pkt_copy() to copy the portion up to the
1358 * point where we need to start the checksum, and
1359 * copy the remainder, checksumming as we go.
1360 */
1361 if (__probable(do_sum && start != 0)) {
1362 ASSERT(M_TRAILINGSPACE(m) >= start);
1363 ASSERT(m->m_len == 0);
1364 dp = (uint8_t *)m->m_data;
1365 _pkt_copy(baddr, dp, start);
1366 remaining_len -= start;
1367 copied_len += start;
1368 m->m_len += start;
1369 m->m_pkthdr.len += start;
1370 }
1371 curr_m = m;
1372 while (curr_m != NULL && remaining_len != 0) {
1373 uint32_t tmp_len = MIN(remaining_len,
1374 (uint32_t)M_TRAILINGSPACE(curr_m));
1375 dp = (uint8_t *)curr_m->m_data + curr_m->m_len;
1376 if (__probable(do_sum)) {
1377 partial = __packet_copy_and_sum((baddr + copied_len),
1378 dp, tmp_len, partial);
1379 } else {
1380 _pkt_copy((baddr + copied_len), dp, tmp_len);
1381 }
1382
1383 curr_m->m_len += tmp_len;
1384 m->m_pkthdr.len += tmp_len;
1385 copied_len += tmp_len;
1386 remaining_len -= tmp_len;
1387 curr_m = curr_m->m_next;
1388 }
1389 ASSERT(remaining_len == 0);
1390
1391 if (__probable(do_sum)) {
1392 csum = __packet_fold_sum(partial);
1393
1394 m->m_pkthdr.csum_flags |=
1395 (CSUM_DATA_VALID | CSUM_PARTIAL);
1396 m->m_pkthdr.csum_rx_start = start;
1397 m->m_pkthdr.csum_rx_val = csum;
1398 } else {
1399 m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1400 m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1401 _CASSERT(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1402 m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1403 if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1404 m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1405 }
1406 }
1407
1408 /* translate packet metadata */
1409 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1410 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1411
1412 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1413 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1414 sk_proc_name_address(current_proc()),
1415 sk_proc_pid(current_proc()), len,
1416 (copysum ? (len - start) : 0), csum, start);
1417 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1418 " mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1419 SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1420 (uint32_t)m->m_pkthdr.csum_rx_start,
1421 (uint32_t)m->m_pkthdr.csum_rx_val);
1422 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1423 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1424 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1425 (uint32_t)pkt->pkt_csum_rx_start_off,
1426 (uint32_t)pkt->pkt_csum_rx_value);
1427 break;
1428
1429 case NR_TX:
1430 dp = (uint8_t *)m->m_data;
1431 ASSERT(m->m_next == NULL);
1432
1433 VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1434 (uint32_t)mbuf_maxlen(m));
1435 m->m_len += len;
1436 m->m_pkthdr.len += len;
1437 VERIFY(m->m_len == m->m_pkthdr.len &&
1438 (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1439
1440 if (__probable(copysum)) {
1441 uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1442 /*
1443 * Use pkt_copy() to copy the portion up to the
1444 * point where we need to start the checksum, and
1445 * copy the remainder, checksumming as we go.
1446 */
1447 if (__probable(start != 0)) {
1448 _pkt_copy(baddr, dp, start);
1449 }
1450 partial = __packet_copy_and_sum((baddr + start),
1451 (dp + start), (len - start), 0);
1452 csum = __packet_fold_sum_final(partial);
1453
1454 /* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1455 if (csum == 0 &&
1456 (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1457 csum = 0xffff;
1458 }
1459
1460 /* Insert checksum into packet */
1461 ASSERT(stuff <= (len - sizeof(csum)));
1462 if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1463 *(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1464 } else {
1465 bcopy((void *)&csum, dp + stuff, sizeof(csum));
1466 }
1467 } else {
1468 _pkt_copy(baddr, dp, len);
1469 }
1470 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1471 m->m_pkthdr.csum_tx_start = 0;
1472 m->m_pkthdr.csum_tx_stuff = 0;
1473
1474 /* translate packet metadata */
1475 m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1476 m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1477 m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1478 m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1479 m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1480 m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1481 m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1482 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1483 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1484 m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1485 m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1486 if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1487 m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1488 }
1489 if ((pkt->pkt_pflags & PKT_F_L4S) != 0) {
1490 m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_L4S;
1491 }
1492
1493 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1494 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1495 sk_proc_name_address(current_proc()),
1496 sk_proc_pid(current_proc()), len,
1497 (copysum ? (len - start) : 0), csum, start);
1498 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1499 " pkt 0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1500 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1501 (uint32_t)pkt->pkt_csum_tx_start_off,
1502 (uint32_t)pkt->pkt_csum_tx_stuff_off);
1503 break;
1504
1505 default:
1506 VERIFY(0);
1507 /* NOTREACHED */
1508 __builtin_unreachable();
1509 }
1510
1511 if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1512 m->m_flags |= M_BCAST;
1513 } else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1514 m->m_flags |= M_MCAST;
1515 }
1516 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1517 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1518 (t == NR_RX) ? "RX" : "TX",
1519 sk_dump("buf", (uint8_t *)dp, m->m_pkthdr.len, 128, NULL, 0));
1520 }
1521
1522 /*
1523 * This is a multi-buflet variant of pkt_copy_to_mbuf().
1524 * NOTE: poff is the offset within the packet.
1525 *
1526 * This routine supports copying into an mbuf chain for RX but not TX.
1527 *
1528 * start/stuff is relative to poff, within [0, len], such that
1529 * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1530 */
1531 void
pkt_copy_multi_buflet_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1532 pkt_copy_multi_buflet_to_mbuf(const enum txrx t, kern_packet_t ph,
1533 const uint16_t poff, struct mbuf *m, const uint16_t moff,
1534 const uint32_t len, const boolean_t copysum, const uint16_t start)
1535 {
1536 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1537 struct mbuf *curr_m;
1538 uint32_t partial = 0;
1539 uint32_t remaining_len = len, copied_len = 0;
1540 uint16_t csum = 0;
1541 uint8_t *baddr;
1542 uint8_t *dp;
1543 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1544
1545 ASSERT(len >= start);
1546 _CASSERT(sizeof(csum) == sizeof(uint16_t));
1547
1548 /* get buffer address from packet */
1549 MD_BUFLET_ADDR_ABS(pkt, baddr);
1550 ASSERT(baddr != NULL);
1551 baddr += poff;
1552 VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp) *
1553 __packet_get_buflet_count(ph)));
1554
1555 ASSERT((m->m_flags & M_PKTHDR));
1556 m->m_data += moff;
1557
1558 switch (t) {
1559 case NR_RX:
1560 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1561 if (__probable(do_sum && start != 0)) {
1562 ASSERT(M_TRAILINGSPACE(m) >= start);
1563 ASSERT(m->m_len == 0);
1564 dp = (uint8_t *)m->m_data;
1565 _pkt_copy(baddr, dp, start);
1566 remaining_len -= start;
1567 copied_len += start;
1568 m->m_len += start;
1569 m->m_pkthdr.len += start;
1570 }
1571 curr_m = m;
1572 while (curr_m != NULL && remaining_len != 0) {
1573 uint32_t tmp_len = MIN(remaining_len,
1574 (uint32_t)M_TRAILINGSPACE(curr_m));
1575 uint16_t soff = poff + (uint16_t)copied_len;
1576 dp = (uint8_t *)curr_m->m_data + curr_m->m_len;
1577
1578 if (__probable(do_sum)) {
1579 partial = _pkt_copyaddr_sum(ph, soff,
1580 dp, tmp_len, TRUE, partial, NULL);
1581 } else {
1582 pkt_copyaddr_sum(ph, soff,
1583 dp, tmp_len, FALSE, 0, NULL);
1584 }
1585
1586 curr_m->m_len += tmp_len;
1587 m->m_pkthdr.len += tmp_len;
1588 copied_len += tmp_len;
1589 remaining_len -= tmp_len;
1590 curr_m = curr_m->m_next;
1591 }
1592 ASSERT(remaining_len == 0);
1593
1594 if (__probable(do_sum)) {
1595 csum = __packet_fold_sum(partial);
1596
1597 m->m_pkthdr.csum_flags |=
1598 (CSUM_DATA_VALID | CSUM_PARTIAL);
1599 m->m_pkthdr.csum_rx_start = start;
1600 m->m_pkthdr.csum_rx_val = csum;
1601 } else {
1602 m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1603 m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1604 _CASSERT(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1605 m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1606 if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1607 m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1608 }
1609 }
1610
1611 /* translate packet metadata */
1612 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1613 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1614
1615 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1616 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1617 sk_proc_name_address(current_proc()),
1618 sk_proc_pid(current_proc()), len,
1619 (copysum ? (len - start) : 0), csum, start);
1620 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1621 " mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1622 SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1623 (uint32_t)m->m_pkthdr.csum_rx_start,
1624 (uint32_t)m->m_pkthdr.csum_rx_val);
1625 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1626 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1627 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1628 (uint32_t)pkt->pkt_csum_rx_start_off,
1629 (uint32_t)pkt->pkt_csum_rx_value);
1630 break;
1631 case NR_TX:
1632 dp = (uint8_t *)m->m_data;
1633 ASSERT(m->m_next == NULL);
1634 VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1635 (uint32_t)mbuf_maxlen(m));
1636 m->m_len += len;
1637 m->m_pkthdr.len += len;
1638 VERIFY(m->m_len == m->m_pkthdr.len &&
1639 (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1640 if (__probable(copysum)) {
1641 uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1642 /*
1643 * Use pkt_copy() to copy the portion up to the
1644 * point where we need to start the checksum, and
1645 * copy the remainder, checksumming as we go.
1646 */
1647 if (__probable(start != 0)) {
1648 _pkt_copy(baddr, dp, start);
1649 }
1650 partial = _pkt_copyaddr_sum(ph, (poff + start),
1651 (dp + start), (len - start), TRUE, 0, NULL);
1652 csum = __packet_fold_sum_final(partial);
1653
1654 /* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1655 if (csum == 0 &&
1656 (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1657 csum = 0xffff;
1658 }
1659
1660 /* Insert checksum into packet */
1661 ASSERT(stuff <= (len - sizeof(csum)));
1662 if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1663 *(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1664 } else {
1665 bcopy((void *)&csum, dp + stuff, sizeof(csum));
1666 }
1667 } else {
1668 (void) _pkt_copyaddr_sum(ph, poff, dp, len, FALSE, 0, NULL);
1669 }
1670 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1671 m->m_pkthdr.csum_tx_start = 0;
1672 m->m_pkthdr.csum_tx_stuff = 0;
1673
1674 /* translate packet metadata */
1675 m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1676 m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1677 m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1678 m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1679 m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1680 m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1681 m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1682 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1683 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1684 m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1685 m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1686 if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1687 m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1688 }
1689 if ((pkt->pkt_pflags & PKT_F_L4S) != 0) {
1690 m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_L4S;
1691 }
1692
1693 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1694 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1695 sk_proc_name_address(current_proc()),
1696 sk_proc_pid(current_proc()), len,
1697 (copysum ? (len - start) : 0), csum, start);
1698 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1699 " pkt 0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1700 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1701 (uint32_t)pkt->pkt_csum_tx_start_off,
1702 (uint32_t)pkt->pkt_csum_tx_stuff_off);
1703 break;
1704
1705 default:
1706 VERIFY(0);
1707 /* NOTREACHED */
1708 __builtin_unreachable();
1709 }
1710
1711 if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1712 m->m_flags |= M_BCAST;
1713 } else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1714 m->m_flags |= M_MCAST;
1715 }
1716 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1717 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1718 (t == NR_RX) ? "RX" : "TX",
1719 sk_dump("buf", (uint8_t *)dp, m->m_pkthdr.len, 128, NULL, 0));
1720 }
1721
1722 /*
1723 * Like m_copydata(), but computes 16-bit sum as the data is copied.
1724 * Caller can provide an initial sum to be folded into the computed
1725 * sum. The accumulated partial sum (32-bit) is returned to caller;
1726 * caller is responsible for further reducing it to 16-bit if needed,
1727 * as well as to perform the final 1's complement on it.
1728 */
1729 uint32_t
m_copydata_sum(struct mbuf * m,int off,int len,void * vp,uint32_t initial_sum,boolean_t * odd_start)1730 m_copydata_sum(struct mbuf *m, int off, int len, void *vp, uint32_t initial_sum,
1731 boolean_t *odd_start)
1732 {
1733 boolean_t needs_swap, started_on_odd = FALSE;
1734 int off0 = off, len0 = len;
1735 struct mbuf *m0 = m;
1736 uint64_t sum, partial;
1737 unsigned count, odd;
1738 char *cp = vp;
1739
1740 if (__improbable(off < 0 || len < 0)) {
1741 panic("%s: invalid offset %d or len %d", __func__, off, len);
1742 /* NOTREACHED */
1743 __builtin_unreachable();
1744 }
1745
1746 while (off > 0) {
1747 if (__improbable(m == NULL)) {
1748 panic("%s: invalid mbuf chain %p [off %d, len %d]",
1749 __func__, m0, off0, len0);
1750 /* NOTREACHED */
1751 __builtin_unreachable();
1752 }
1753 if (off < m->m_len) {
1754 break;
1755 }
1756 off -= m->m_len;
1757 m = m->m_next;
1758 }
1759
1760 if (odd_start) {
1761 started_on_odd = *odd_start;
1762 }
1763 sum = initial_sum;
1764
1765 for (; len > 0; m = m->m_next) {
1766 uint8_t *datap;
1767
1768 if (__improbable(m == NULL)) {
1769 panic("%s: invalid mbuf chain %p [off %d, len %d]",
1770 __func__, m0, off0, len0);
1771 /* NOTREACHED */
1772 __builtin_unreachable();
1773 }
1774
1775 datap = mtod(m, uint8_t *) + off;
1776 count = m->m_len;
1777
1778 if (__improbable(count == 0)) {
1779 continue;
1780 }
1781
1782 count = MIN(count - off, (unsigned)len);
1783 partial = 0;
1784
1785 if ((uintptr_t)datap & 1) {
1786 /* Align on word boundary */
1787 started_on_odd = !started_on_odd;
1788 #if BYTE_ORDER == LITTLE_ENDIAN
1789 partial = *datap << 8;
1790 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1791 partial = *datap;
1792 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1793 *cp++ = *datap++;
1794 count -= 1;
1795 len -= 1;
1796 }
1797
1798 needs_swap = started_on_odd;
1799 odd = count & 1u;
1800 count -= odd;
1801
1802 if (count) {
1803 partial = __packet_copy_and_sum(datap,
1804 cp, count, (uint32_t)partial);
1805 datap += count;
1806 cp += count;
1807 len -= count;
1808 if (__improbable((partial & (3ULL << 62)) != 0)) {
1809 if (needs_swap) {
1810 partial = (partial << 8) +
1811 (partial >> 56);
1812 }
1813 sum += (partial >> 32);
1814 sum += (partial & 0xffffffff);
1815 partial = 0;
1816 }
1817 }
1818
1819 if (odd) {
1820 #if BYTE_ORDER == LITTLE_ENDIAN
1821 partial += *datap;
1822 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1823 partial += *datap << 8;
1824 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1825 *cp++ = *datap++;
1826 len -= 1;
1827 started_on_odd = !started_on_odd;
1828 }
1829 off = 0;
1830
1831 if (needs_swap) {
1832 partial = (partial << 8) + (partial >> 24);
1833 }
1834 sum += (partial >> 32) + (partial & 0xffffffff);
1835 /*
1836 * Reduce sum to allow potential byte swap
1837 * in the next iteration without carry.
1838 */
1839 sum = (sum >> 32) + (sum & 0xffffffff);
1840 }
1841
1842 if (odd_start) {
1843 *odd_start = started_on_odd;
1844 }
1845
1846 /* Final fold (reduce 64-bit to 32-bit) */
1847 sum = (sum >> 32) + (sum & 0xffffffff); /* 33-bit */
1848 sum = (sum >> 16) + (sum & 0xffff); /* 17-bit + carry */
1849
1850 /* return 32-bit partial sum to caller */
1851 return (uint32_t)sum;
1852 }
1853
1854 #if DEBUG || DEVELOPMENT
1855 #define TRAILERS_MAX 16 /* max trailing bytes */
1856 #define TRAILERS_REGEN (64 * 1024) /* regeneration threshold */
1857 static uint8_t tb[TRAILERS_MAX]; /* random trailing bytes */
1858 static uint32_t regen = TRAILERS_REGEN; /* regeneration counter */
1859
1860 uint32_t
pkt_add_trailers(kern_packet_t ph,const uint32_t len,const uint16_t start)1861 pkt_add_trailers(kern_packet_t ph, const uint32_t len, const uint16_t start)
1862 {
1863 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1864 uint32_t extra;
1865 uint8_t *baddr;
1866
1867 /* get buffer address from packet */
1868 MD_BUFLET_ADDR_ABS(pkt, baddr);
1869 ASSERT(baddr != NULL);
1870 ASSERT(len <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
1871
1872 extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
1873 if (extra == 0 || extra > sizeof(tb) ||
1874 (len + extra) > PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp)) {
1875 return 0;
1876 }
1877
1878 /* generate random bytes once per TRAILERS_REGEN packets (approx.) */
1879 if (regen++ == TRAILERS_REGEN) {
1880 read_frandom(&tb[0], sizeof(tb));
1881 regen = 0;
1882 }
1883
1884 bcopy(&tb[0], (baddr + len), extra);
1885
1886 /* recompute partial sum (also to exercise related logic) */
1887 pkt->pkt_csum_flags |= PACKET_CSUM_PARTIAL;
1888 pkt->pkt_csum_rx_value = (uint16_t)__packet_cksum((baddr + start),
1889 ((len + extra) - start), 0);
1890 pkt->pkt_csum_rx_start_off = start;
1891
1892 return extra;
1893 }
1894
1895 uint32_t
pkt_add_trailers_mbuf(struct mbuf * m,const uint16_t start)1896 pkt_add_trailers_mbuf(struct mbuf *m, const uint16_t start)
1897 {
1898 uint32_t extra;
1899
1900 extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
1901 if (extra == 0 || extra > sizeof(tb)) {
1902 return 0;
1903 }
1904
1905 if (mbuf_copyback(m, m_pktlen(m), extra, &tb[0], M_NOWAIT) != 0) {
1906 return 0;
1907 }
1908
1909 /* generate random bytes once per TRAILERS_REGEN packets (approx.) */
1910 if (regen++ == TRAILERS_REGEN) {
1911 read_frandom(&tb[0], sizeof(tb));
1912 regen = 0;
1913 }
1914
1915 /* recompute partial sum (also to exercise related logic) */
1916 m->m_pkthdr.csum_rx_val = m_sum16(m, start, (m_pktlen(m) - start));
1917 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1918 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
1919 m->m_pkthdr.csum_rx_start = start;
1920
1921 return extra;
1922 }
1923 #endif /* DEBUG || DEVELOPMENT */
1924
1925 void
pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint16_t len,uint32_t * partial,boolean_t do_csum)1926 pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
1927 uint16_t doff, uint16_t len, uint32_t *partial, boolean_t do_csum)
1928 {
1929 VERIFY(_pkt_copypkt_sum(sph, soff, dph, doff, len, partial, do_csum));
1930 }
1931
1932 uint32_t
pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)1933 pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
1934 uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
1935 {
1936 return _pkt_copyaddr_sum(sph, soff, dbaddr, len, do_csum, initial_sum, odd_start);
1937 }
1938
1939 uint32_t
pkt_mcopypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint16_t len,boolean_t do_cscum)1940 pkt_mcopypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
1941 uint16_t len, boolean_t do_cscum)
1942 {
1943 return m_copypkt_sum(m, soff, dph, doff, len, do_cscum);
1944 }
1945
1946 void
pkt_copy(void * src,void * dst,size_t len)1947 pkt_copy(void *src, void *dst, size_t len)
1948 {
1949 return _pkt_copy(src, dst, len);
1950 }
1951