1 /*
2 * Copyright (c) 2017-2023 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <skywalk/os_skywalk_private.h>
30 #include <machine/endian.h>
31 #include <net/necp.h>
32
33 uint32_t copy_pkt_tx_time = 1;
34 #if (DEVELOPMENT || DEBUG)
35 SYSCTL_NODE(_kern_skywalk, OID_AUTO, packet,
36 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Skywalk packet");
37 int pkt_trailers = 0; /* for testing trailing bytes */
38 SYSCTL_INT(_kern_skywalk_packet, OID_AUTO, trailers,
39 CTLFLAG_RW | CTLFLAG_LOCKED, &pkt_trailers, 0, "");
40
41 SYSCTL_UINT(_kern_skywalk_packet, OID_AUTO, copy_pkt_tx_time,
42 CTLFLAG_RW | CTLFLAG_LOCKED, ©_pkt_tx_time, 0,
43 "copy tx time from pkt to mbuf");
44 #endif /* !DEVELOPMENT && !DEBUG */
45
46
47 __attribute__((always_inline))
48 static inline void
_pkt_copy(void * src,void * dst,size_t len)49 _pkt_copy(void *src, void *dst, size_t len)
50 {
51 if (__probable(IS_P2ALIGNED(src, 8) && IS_P2ALIGNED(dst, 8))) {
52 switch (len) {
53 case 20: /* standard IPv4 header */
54 sk_copy64_20(src, dst);
55 return;
56
57 case 40: /* IPv6 header */
58 sk_copy64_40(src, dst);
59 return;
60
61 default:
62 if (IS_P2ALIGNED(len, 64)) {
63 sk_copy64_64x(src, dst, len);
64 return;
65 } else if (IS_P2ALIGNED(len, 32)) {
66 sk_copy64_32x(src, dst, len);
67 return;
68 } else if (IS_P2ALIGNED(len, 8)) {
69 sk_copy64_8x(src, dst, len);
70 return;
71 } else if (IS_P2ALIGNED(len, 4)) {
72 sk_copy64_4x(src, dst, len);
73 return;
74 }
75 break;
76 }
77 }
78 bcopy(src, dst, len);
79 }
80
81 /*
82 * This routine is used for copying data across two kernel packets.
83 * Can also optionally compute 16-bit partial inet checksum as the
84 * data is copied.
85 * This routine is used by flowswitch while copying packet from vp
86 * adapter pool to packet in native netif pool and vice-a-versa.
87 *
88 * start/stuff is relative to soff, within [0, len], such that
89 * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
90 */
91 void
pkt_copy_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)92 pkt_copy_from_pkt(const enum txrx t, kern_packet_t dph, const uint16_t doff,
93 kern_packet_t sph, const uint16_t soff, const uint32_t len,
94 const boolean_t copysum, const uint16_t start, const uint16_t stuff,
95 const boolean_t invert)
96 {
97 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
98 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
99 uint32_t partial;
100 uint16_t csum = 0;
101 uint8_t *sbaddr, *dbaddr;
102 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
103
104 _CASSERT(sizeof(csum) == sizeof(uint16_t));
105
106 /* get buffer address from packet */
107 MD_BUFLET_ADDR_ABS(spkt, sbaddr);
108 ASSERT(sbaddr != NULL);
109 sbaddr += soff;
110 MD_BUFLET_ADDR_ABS(dpkt, dbaddr);
111 ASSERT(dbaddr != NULL);
112 dbaddr += doff;
113 VERIFY((doff + len) <= PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
114
115 switch (t) {
116 case NR_RX:
117 dpkt->pkt_csum_flags = 0;
118 if (__probable(do_sum)) {
119 /*
120 * Use pkt_copy() to copy the portion up to the
121 * point where we need to start the checksum, and
122 * copy the remainder, checksumming as we go.
123 */
124 if (__probable(start != 0)) {
125 _pkt_copy(sbaddr, dbaddr, start);
126 }
127 partial = __packet_copy_and_sum((sbaddr + start),
128 (dbaddr + start), (len - start), 0);
129 csum = __packet_fold_sum(partial);
130
131 __packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
132 start, csum, FALSE);
133 } else {
134 _pkt_copy(sbaddr, dbaddr, len);
135 dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
136 dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
137 dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
138 }
139
140 SK_DF(SK_VERB_COPY | SK_VERB_RX,
141 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
142 sk_proc_name_address(current_proc()),
143 sk_proc_pid(current_proc()), len,
144 (copysum ? (len - start) : 0), csum, start);
145 SK_DF(SK_VERB_COPY | SK_VERB_RX,
146 " pkt 0x%llx doff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
147 SK_KVA(dpkt), doff, dpkt->pkt_csum_flags,
148 (uint32_t)dpkt->pkt_csum_rx_start_off,
149 (uint32_t)dpkt->pkt_csum_rx_value);
150 break;
151
152 case NR_TX:
153 if (copysum) {
154 /*
155 * Use pkt_copy() to copy the portion up to the
156 * point where we need to start the checksum, and
157 * copy the remainder, checksumming as we go.
158 */
159 if (__probable(start != 0)) {
160 _pkt_copy(sbaddr, dbaddr, start);
161 }
162 partial = __packet_copy_and_sum((sbaddr + start),
163 (dbaddr + start), (len - start), 0);
164 csum = __packet_fold_sum_final(partial);
165
166 /* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
167 if (csum == 0 && invert) {
168 csum = 0xffff;
169 }
170
171 /* Insert checksum into packet */
172 ASSERT(stuff <= (len - sizeof(csum)));
173 if (IS_P2ALIGNED(dbaddr + stuff, sizeof(csum))) {
174 *(uint16_t *)(uintptr_t)(dbaddr + stuff) = csum;
175 } else {
176 bcopy((void *)&csum, dbaddr + stuff,
177 sizeof(csum));
178 }
179 } else {
180 _pkt_copy(sbaddr, dbaddr, len);
181 }
182 dpkt->pkt_csum_flags = spkt->pkt_csum_flags &
183 (PACKET_CSUM_TSO_FLAGS | PACKET_TX_CSUM_OFFLOAD_FLAGS);
184 dpkt->pkt_csum_tx_start_off = 0;
185 dpkt->pkt_csum_tx_stuff_off = 0;
186
187 SK_DF(SK_VERB_COPY | SK_VERB_TX,
188 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u, flags %u",
189 sk_proc_name_address(current_proc()),
190 sk_proc_pid(current_proc()), len,
191 (copysum ? (len - start) : 0), csum, start, dpkt->pkt_csum_flags);
192 break;
193
194 default:
195 VERIFY(0);
196 /* NOTREACHED */
197 __builtin_unreachable();
198 }
199 METADATA_ADJUST_LEN(dpkt, len, doff);
200
201 SK_DF(SK_VERB_COPY | SK_VERB_DUMP, "%s(%d) %s %s",
202 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
203 (t == NR_RX) ? "RX" : "TX",
204 sk_dump("buf", dbaddr, len, 128, NULL, 0));
205 }
206
207 /*
208 * NOTE: soff is the offset within the packet
209 * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
210 * caller is responsible for further reducing it to 16-bit if needed,
211 * as well as to perform the final 1's complement on it.
212 */
213 uint32_t static inline
_pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)214 _pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
215 uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
216 {
217 uint8_t odd = 0;
218 uint8_t *sbaddr = NULL;
219 uint32_t sum = initial_sum, partial;
220 uint32_t len0 = len;
221 boolean_t needs_swap, started_on_odd = FALSE;
222 uint16_t sbcnt, off0 = soff;
223 uint32_t clen, sboff, sblen;
224 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
225 kern_buflet_t sbuf = NULL, sbufp = NULL;
226
227 sbcnt = __packet_get_buflet_count(sph);
228
229 if (odd_start) {
230 started_on_odd = *odd_start;
231 }
232
233 /* fastpath (copy+sum, single buflet, even aligned, even length) */
234 if (do_csum && sbcnt == 1 && len != 0) {
235 PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
236 ASSERT(sbuf != NULL);
237 sboff = __buflet_get_data_offset(sbuf);
238 sblen = __buflet_get_data_length(sbuf);
239 ASSERT(sboff <= soff);
240 ASSERT(soff < sboff + sblen);
241 sblen -= (soff - sboff);
242 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
243
244 clen = (uint16_t)MIN(len, sblen);
245
246 if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
247 sum = __packet_copy_and_sum(sbaddr, dbaddr, clen, sum);
248 return __packet_fold_sum(sum);
249 }
250
251 sbaddr = NULL;
252 sbuf = sbufp = NULL;
253 }
254
255 while (len != 0) {
256 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
257 if (__improbable(sbuf == NULL)) {
258 panic("%s: bad packet, 0x%llx [off %d, len %d]",
259 __func__, SK_KVA(spkt), off0, len0);
260 /* NOTREACHED */
261 __builtin_unreachable();
262 }
263 sbufp = sbuf;
264 sboff = __buflet_get_data_offset(sbuf);
265 sblen = __buflet_get_data_length(sbuf);
266 ASSERT((sboff <= soff) && (soff < sboff + sblen));
267 sblen -= (soff - sboff);
268 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
269 soff = 0;
270 clen = (uint16_t)MIN(len, sblen);
271 if (__probable(do_csum)) {
272 partial = 0;
273 if (__improbable((uintptr_t)sbaddr & 1)) {
274 /* Align on word boundary */
275 started_on_odd = !started_on_odd;
276 #if BYTE_ORDER == LITTLE_ENDIAN
277 partial = (uint8_t)*sbaddr << 8;
278 #else /* BYTE_ORDER != LITTLE_ENDIAN */
279 partial = (uint8_t)*sbaddr;
280 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
281 *dbaddr++ = *sbaddr++;
282 sblen -= 1;
283 clen -= 1;
284 len -= 1;
285 }
286 needs_swap = started_on_odd;
287
288 odd = clen & 1u;
289 clen -= odd;
290
291 if (clen != 0) {
292 partial = __packet_copy_and_sum(sbaddr, dbaddr,
293 clen, partial);
294 }
295
296 if (__improbable(partial & 0xc0000000)) {
297 if (needs_swap) {
298 partial = (partial << 8) +
299 (partial >> 24);
300 }
301 sum += (partial >> 16);
302 sum += (partial & 0xffff);
303 partial = 0;
304 }
305 } else {
306 _pkt_copy(sbaddr, dbaddr, clen);
307 }
308
309 dbaddr += clen;
310 sbaddr += clen;
311
312 if (__probable(do_csum)) {
313 if (odd != 0) {
314 #if BYTE_ORDER == LITTLE_ENDIAN
315 partial += (uint8_t)*sbaddr;
316 #else /* BYTE_ORDER != LITTLE_ENDIAN */
317 partial += (uint8_t)*sbaddr << 8;
318 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
319 *dbaddr++ = *sbaddr++;
320 started_on_odd = !started_on_odd;
321 }
322
323 if (needs_swap) {
324 partial = (partial << 8) + (partial >> 24);
325 }
326 sum += (partial >> 16) + (partial & 0xffff);
327 /*
328 * Reduce sum to allow potential byte swap
329 * in the next iteration without carry.
330 */
331 sum = (sum >> 16) + (sum & 0xffff);
332 }
333
334 sblen -= clen + odd;
335 len -= clen + odd;
336 ASSERT(sblen == 0 || len == 0);
337 }
338
339 if (odd_start) {
340 *odd_start = started_on_odd;
341 }
342
343 if (__probable(do_csum)) {
344 /* Final fold (reduce 32-bit to 16-bit) */
345 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
346 sum = (sum >> 16) + (sum & 0xffff);
347 }
348 return sum;
349 }
350
351 /*
352 * NOTE: Caller of this function is responsible to adjust the length and offset
353 * of the first buflet of the destination packet if (doff != 0),
354 * i.e. additional data is being prependend to the packet.
355 * It should also finalize the packet.
356 * To simplify & optimize the routine, we have also assumed that soff & doff
357 * will lie within the first buffer, which is true for the current use cases
358 * where, doff is the offset of the checksum field in the TCP/IP header and
359 * soff is the L3 offset.
360 * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
361 * caller is responsible for further reducing it to 16-bit if needed,
362 * as well as to perform the final 1's complement on it.
363 */
364 static inline boolean_t
_pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint32_t len,uint32_t * csum_partial,boolean_t do_csum)365 _pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
366 uint16_t doff, uint32_t len, uint32_t *csum_partial, boolean_t do_csum)
367 {
368 uint8_t odd = 0;
369 uint32_t sum = 0, partial;
370 boolean_t needs_swap, started_on_odd = FALSE;
371 uint8_t *sbaddr = NULL, *dbaddr = NULL;
372 uint16_t sbcnt, dbcnt;
373 uint32_t clen, dlen0, sboff, sblen, dlim;
374 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
375 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
376 kern_buflet_t sbuf = NULL, sbufp = NULL, dbuf = NULL, dbufp = NULL;
377
378 ASSERT(csum_partial != NULL || !do_csum);
379 sbcnt = __packet_get_buflet_count(sph);
380 dbcnt = __packet_get_buflet_count(dph);
381
382 while (len != 0) {
383 ASSERT(sbaddr == NULL || dbaddr == NULL);
384 if (sbaddr == NULL) {
385 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
386 if (__improbable(sbuf == NULL)) {
387 break;
388 }
389 sbufp = sbuf;
390 sblen = __buflet_get_data_length(sbuf);
391 sboff = __buflet_get_data_offset(sbuf);
392 ASSERT(soff >= sboff);
393 ASSERT(sboff + sblen > soff);
394 sblen -= (soff - sboff);
395 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
396 soff = 0;
397 }
398
399 if (dbaddr == NULL) {
400 if (dbufp != NULL) {
401 __buflet_set_data_length(dbufp, dlen0);
402 }
403
404 PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
405 if (__improbable(dbuf == NULL)) {
406 break;
407 }
408 dbufp = dbuf;
409 dlim = __buflet_get_data_limit(dbuf);
410 ASSERT(dlim > doff);
411 dlim -= doff;
412 if (doff != 0) {
413 VERIFY(__buflet_set_data_offset(dbuf, doff) == 0);
414 }
415 dbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(dbuf) + doff);
416 dlen0 = dlim;
417 doff = 0;
418 }
419
420 clen = MIN(len, sblen);
421 clen = MIN(clen, dlim);
422
423 if (__probable(do_csum)) {
424 partial = 0;
425 if (__improbable((uintptr_t)sbaddr & 1)) {
426 /* Align on word boundary */
427 started_on_odd = !started_on_odd;
428 #if BYTE_ORDER == LITTLE_ENDIAN
429 partial = (uint8_t)*sbaddr << 8;
430 #else /* BYTE_ORDER != LITTLE_ENDIAN */
431 partial = (uint8_t)*sbaddr;
432 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
433 *dbaddr++ = *sbaddr++;
434 clen -= 1;
435 dlim -= 1;
436 len -= 1;
437 }
438 needs_swap = started_on_odd;
439
440 odd = clen & 1u;
441 clen -= odd;
442
443 if (clen != 0) {
444 partial = __packet_copy_and_sum(sbaddr, dbaddr,
445 clen, partial);
446 }
447
448 if (__improbable(partial & 0xc0000000)) {
449 if (needs_swap) {
450 partial = (partial << 8) +
451 (partial >> 24);
452 }
453 sum += (partial >> 16);
454 sum += (partial & 0xffff);
455 partial = 0;
456 }
457 } else {
458 _pkt_copy(sbaddr, dbaddr, clen);
459 }
460 sbaddr += clen;
461 dbaddr += clen;
462
463 if (__probable(do_csum)) {
464 if (odd != 0) {
465 #if BYTE_ORDER == LITTLE_ENDIAN
466 partial += (uint8_t)*sbaddr;
467 #else /* BYTE_ORDER != LITTLE_ENDIAN */
468 partial += (uint8_t)*sbaddr << 8;
469 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
470 *dbaddr++ = *sbaddr++;
471 started_on_odd = !started_on_odd;
472 }
473
474 if (needs_swap) {
475 partial = (partial << 8) + (partial >> 24);
476 }
477 sum += (partial >> 16) + (partial & 0xffff);
478 /*
479 * Reduce sum to allow potential byte swap
480 * in the next iteration without carry.
481 */
482 sum = (sum >> 16) + (sum & 0xffff);
483 }
484
485 sblen -= clen + odd;
486 dlim -= clen + odd;
487 len -= clen + odd;
488
489 if (sblen == 0) {
490 sbaddr = NULL;
491 }
492
493 if (dlim == 0) {
494 dbaddr = NULL;
495 }
496 }
497
498 if (__probable(dbuf != NULL)) {
499 __buflet_set_data_length(dbuf, (dlen0 - dlim));
500 }
501 if (__probable(do_csum)) {
502 /* Final fold (reduce 32-bit to 16-bit) */
503 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
504 sum = (sum >> 16) + (sum & 0xffff);
505 *csum_partial = (uint32_t)sum;
506 }
507 return len == 0;
508 }
509
510 uint32_t
pkt_sum(kern_packet_t sph,uint16_t soff,uint16_t len)511 pkt_sum(kern_packet_t sph, uint16_t soff, uint16_t len)
512 {
513 uint8_t odd = 0;
514 uint32_t sum = 0, partial;
515 boolean_t needs_swap, started_on_odd = FALSE;
516 uint8_t *sbaddr = NULL;
517 uint16_t sbcnt;
518 uint32_t clen, sblen, sboff;
519 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
520 kern_buflet_t sbuf = NULL, sbufp = NULL;
521
522 sbcnt = __packet_get_buflet_count(sph);
523
524 /* fastpath (single buflet, even aligned, even length) */
525 if (sbcnt == 1 && len != 0) {
526 PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
527 ASSERT(sbuf != NULL);
528 sblen = __buflet_get_data_length(sbuf);
529 sboff = __buflet_get_data_offset(sbuf);
530 ASSERT(soff >= sboff);
531 ASSERT(sboff + sblen > soff);
532 sblen -= (soff - sboff);
533 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
534
535 clen = MIN(len, sblen);
536
537 if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
538 sum = __packet_cksum(sbaddr, clen, 0);
539 return __packet_fold_sum(sum);
540 }
541
542 sbaddr = NULL;
543 sbuf = sbufp = NULL;
544 }
545
546 /* slowpath */
547 while (len != 0) {
548 ASSERT(sbaddr == NULL);
549 if (sbaddr == NULL) {
550 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
551 if (__improbable(sbuf == NULL)) {
552 break;
553 }
554 sbufp = sbuf;
555 sblen = __buflet_get_data_length(sbuf);
556 sboff = __buflet_get_data_offset(sbuf);
557 ASSERT(soff >= sboff);
558 ASSERT(sboff + sblen > soff);
559 sblen -= (soff - sboff);
560 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
561 soff = 0;
562 }
563
564 clen = MIN(len, sblen);
565
566 partial = 0;
567 if (__improbable((uintptr_t)sbaddr & 1)) {
568 /* Align on word boundary */
569 started_on_odd = !started_on_odd;
570 #if BYTE_ORDER == LITTLE_ENDIAN
571 partial = (uint8_t)*sbaddr << 8;
572 #else /* BYTE_ORDER != LITTLE_ENDIAN */
573 partial = (uint8_t)*sbaddr;
574 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
575 clen -= 1;
576 len -= 1;
577 }
578 needs_swap = started_on_odd;
579
580 odd = clen & 1u;
581 clen -= odd;
582
583 if (clen != 0) {
584 partial = __packet_cksum(sbaddr,
585 clen, partial);
586 }
587
588 if (__improbable(partial & 0xc0000000)) {
589 if (needs_swap) {
590 partial = (partial << 8) +
591 (partial >> 24);
592 }
593 sum += (partial >> 16);
594 sum += (partial & 0xffff);
595 partial = 0;
596 }
597 sbaddr += clen;
598
599 if (odd != 0) {
600 #if BYTE_ORDER == LITTLE_ENDIAN
601 partial += (uint8_t)*sbaddr;
602 #else /* BYTE_ORDER != LITTLE_ENDIAN */
603 partial += (uint8_t)*sbaddr << 8;
604 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
605 started_on_odd = !started_on_odd;
606 }
607
608 if (needs_swap) {
609 partial = (partial << 8) + (partial >> 24);
610 }
611 sum += (partial >> 16) + (partial & 0xffff);
612 /*
613 * Reduce sum to allow potential byte swap
614 * in the next iteration without carry.
615 */
616 sum = (sum >> 16) + (sum & 0xffff);
617
618 sblen -= clen + odd;
619 len -= clen + odd;
620
621 if (sblen == 0) {
622 sbaddr = NULL;
623 }
624 }
625
626 /* Final fold (reduce 32-bit to 16-bit) */
627 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
628 sum = (sum >> 16) + (sum & 0xffff);
629 return (uint32_t)sum;
630 }
631
632
633 /*
634 * This is a multi-buflet variant of pkt_copy_from_pkt().
635 *
636 * start/stuff is relative to soff, within [0, len], such that
637 * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
638 */
639 void
pkt_copy_multi_buflet_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)640 pkt_copy_multi_buflet_from_pkt(const enum txrx t, kern_packet_t dph,
641 const uint16_t doff, kern_packet_t sph, const uint16_t soff,
642 const uint32_t len, const boolean_t copysum, const uint16_t start,
643 const uint16_t stuff, const boolean_t invert)
644 {
645 boolean_t rc;
646 uint32_t partial;
647 uint16_t csum = 0;
648 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
649 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
650 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
651
652 VERIFY((doff + len) <= (PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp) *
653 __packet_get_buflet_count(dph)));
654
655 switch (t) {
656 case NR_RX:
657 dpkt->pkt_csum_flags = 0;
658 if (__probable(do_sum)) {
659 /*
660 * copy the portion up to the point where we need to
661 * start the checksum, and copy the remainder,
662 * checksumming as we go.
663 */
664 if (__probable(start != 0)) {
665 rc = _pkt_copypkt_sum(sph, soff, dph, doff,
666 start, NULL, FALSE);
667 ASSERT(rc);
668 }
669 _pkt_copypkt_sum(sph, (soff + start), dph,
670 (doff + start), (len - start), &partial, TRUE);
671 csum = __packet_fold_sum(partial);
672 __packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
673 start, csum, FALSE);
674 METADATA_ADJUST_LEN(dpkt, start, doff);
675 } else {
676 rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
677 FALSE);
678 ASSERT(rc);
679 dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
680 dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
681 dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
682 }
683 break;
684
685 case NR_TX:
686 if (copysum) {
687 uint8_t *baddr;
688 /*
689 * copy the portion up to the point where we need to
690 * start the checksum, and copy the remainder,
691 * checksumming as we go.
692 */
693 if (__probable(start != 0)) {
694 rc = _pkt_copypkt_sum(sph, soff, dph, doff,
695 start, NULL, FALSE);
696 ASSERT(rc);
697 }
698 rc = _pkt_copypkt_sum(sph, (soff + start), dph,
699 (doff + start), (len - start), &partial, TRUE);
700 ASSERT(rc);
701 csum = __packet_fold_sum_final(partial);
702
703 /* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
704 if (csum == 0 && invert) {
705 csum = 0xffff;
706 }
707
708 /*
709 * Insert checksum into packet.
710 * Here we assume that checksum will be in the
711 * first buffer.
712 */
713 ASSERT((stuff + doff + sizeof(csum)) <=
714 PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
715 ASSERT(stuff <= (len - sizeof(csum)));
716
717 /* get first buflet buffer address from packet */
718 MD_BUFLET_ADDR_ABS(dpkt, baddr);
719 ASSERT(baddr != NULL);
720 baddr += doff;
721 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
722 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
723 } else {
724 bcopy((void *)&csum, baddr + stuff,
725 sizeof(csum));
726 }
727 METADATA_ADJUST_LEN(dpkt, start, doff);
728 } else {
729 rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
730 FALSE);
731 ASSERT(rc);
732 }
733 dpkt->pkt_csum_flags = spkt->pkt_csum_flags &
734 (PACKET_CSUM_TSO_FLAGS | PACKET_TX_CSUM_OFFLOAD_FLAGS);
735 dpkt->pkt_csum_tx_start_off = 0;
736 dpkt->pkt_csum_tx_stuff_off = 0;
737
738 SK_DF(SK_VERB_COPY | SK_VERB_TX,
739 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u, flags %u",
740 sk_proc_name_address(current_proc()),
741 sk_proc_pid(current_proc()), len,
742 (copysum ? (len - start) : 0), csum, start, dpkt->pkt_csum_flags);
743 break;
744
745 default:
746 VERIFY(0);
747 /* NOTREACHED */
748 __builtin_unreachable();
749 }
750 }
751
752 static inline uint32_t
_convert_mbuf_csum_flags(uint32_t mbuf_flags)753 _convert_mbuf_csum_flags(uint32_t mbuf_flags)
754 {
755 uint32_t pkt_flags = 0;
756
757 if (mbuf_flags & CSUM_TCP) {
758 pkt_flags |= PACKET_CSUM_TCP;
759 }
760 if (mbuf_flags & CSUM_TCPIPV6) {
761 pkt_flags |= PACKET_CSUM_TCPIPV6;
762 }
763 if (mbuf_flags & CSUM_UDP) {
764 pkt_flags |= PACKET_CSUM_UDP;
765 }
766 if (mbuf_flags & CSUM_UDPIPV6) {
767 pkt_flags |= PACKET_CSUM_UDPIPV6;
768 }
769 if (mbuf_flags & CSUM_IP) {
770 pkt_flags |= PACKET_CSUM_IP;
771 }
772 if (mbuf_flags & CSUM_ZERO_INVERT) {
773 pkt_flags |= PACKET_CSUM_ZERO_INVERT;
774 }
775
776 return pkt_flags;
777 }
778
779 /*
780 * This routine is used for copying an mbuf which originated in the host
781 * stack destined to a native skywalk interface (NR_TX), as well as for
782 * mbufs originating on compat network interfaces (NR_RX).
783 *
784 * start/stuff is relative to moff, within [0, len], such that
785 * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
786 */
787 void
pkt_copy_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)788 pkt_copy_from_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
789 struct mbuf *m, const uint16_t moff, const uint32_t len,
790 const boolean_t copysum, const uint16_t start)
791 {
792 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
793 uint32_t partial;
794 uint16_t csum = 0;
795 uint8_t *baddr;
796
797 _CASSERT(sizeof(csum) == sizeof(uint16_t));
798
799 /* get buffer address from packet */
800 MD_BUFLET_ADDR_ABS(pkt, baddr);
801 ASSERT(baddr != NULL);
802 baddr += poff;
803 VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
804
805 switch (t) {
806 case NR_RX:
807 pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
808 pkt->pkt_csum_rx_start_off = 0;
809 pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
810 pkt->pkt_svc_class = m_get_service_class(m);
811 if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
812 != CSUM_RX_FULL_FLAGS) && copysum)) {
813 /*
814 * Use m_copydata() to copy the portion up to the
815 * point where we need to start the checksum, and
816 * copy the remainder, checksumming as we go.
817 */
818 if (start != 0) {
819 m_copydata(m, moff, start, baddr);
820 }
821 partial = m_copydata_sum(m, start, (len - start),
822 (baddr + start), 0, NULL);
823 csum = __packet_fold_sum(partial);
824
825 __packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
826 start, csum, FALSE);
827 } else {
828 m_copydata(m, moff, len, baddr);
829 }
830 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
831 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
832 sk_proc_name_address(current_proc()),
833 sk_proc_pid(current_proc()), len,
834 (copysum ? (len - start) : 0), csum, start);
835 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
836 " mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
837 SK_KVA(m), m->m_pkthdr.csum_flags,
838 (uint32_t)m->m_pkthdr.csum_rx_start,
839 (uint32_t)m->m_pkthdr.csum_rx_val);
840 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
841 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
842 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
843 (uint32_t)pkt->pkt_csum_rx_start_off,
844 (uint32_t)pkt->pkt_csum_rx_value);
845 break;
846
847 case NR_TX:
848 if (copysum) {
849 uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
850 /*
851 * Use m_copydata() to copy the portion up to the
852 * point where we need to start the checksum, and
853 * copy the remainder, checksumming as we go.
854 */
855 if (start != 0) {
856 m_copydata(m, moff, start, baddr);
857 }
858 partial = m_copydata_sum(m, start, (len - start),
859 (baddr + start), 0, NULL);
860 csum = __packet_fold_sum_final(partial);
861
862 /*
863 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
864 * ideally we'd only test for CSUM_ZERO_INVERT
865 * here, but catch cases where the originator
866 * did not set it for UDP.
867 */
868 if (csum == 0 && (m->m_pkthdr.csum_flags &
869 (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
870 csum = 0xffff;
871 }
872
873 /* Insert checksum into packet */
874 ASSERT(stuff <= (len - sizeof(csum)));
875 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
876 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
877 } else {
878 bcopy((void *)&csum, baddr + stuff,
879 sizeof(csum));
880 }
881 } else {
882 m_copydata(m, moff, len, baddr);
883 }
884 pkt->pkt_csum_flags = 0;
885 pkt->pkt_csum_tx_start_off = 0;
886 pkt->pkt_csum_tx_stuff_off = 0;
887
888 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
889 pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV4;
890 pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
891 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
892 }
893 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
894 pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV6;
895 pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
896 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
897 }
898 if (!copysum) {
899 pkt->pkt_csum_flags |= _convert_mbuf_csum_flags(m->m_pkthdr.csum_flags);
900 }
901
902 /* translate mbuf metadata */
903 pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
904 pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
905 pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
906 pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
907 switch (m->m_pkthdr.pkt_proto) {
908 case IPPROTO_QUIC:
909 pkt->pkt_flow_ip_proto = IPPROTO_UDP;
910 pkt->pkt_transport_protocol = IPPROTO_QUIC;
911 break;
912
913 default:
914 pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
915 pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
916 break;
917 }
918 (void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
919 pkt->pkt_svc_class = m_get_service_class(m);
920 pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
921 pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
922 if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
923 pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
924 }
925 if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) != 0) {
926 pkt->pkt_pflags |= PKT_F_L4S;
927 }
928 necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
929 pkt->pkt_policy_id =
930 (uint32_t)necp_get_policy_id_from_packet(m);
931
932 if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
933 if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
934 __packet_set_tx_completion_data(ph,
935 m->m_pkthdr.drv_tx_compl_arg,
936 m->m_pkthdr.drv_tx_compl_data);
937 }
938 pkt->pkt_tx_compl_context =
939 m->m_pkthdr.pkt_compl_context;
940 pkt->pkt_tx_compl_callbacks =
941 m->m_pkthdr.pkt_compl_callbacks;
942 /*
943 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
944 * mbuf can no longer trigger a completion callback.
945 * callback will be invoked when the kernel packet is
946 * completed.
947 */
948 m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
949
950 m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
951 }
952
953 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
954 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
955 sk_proc_name_address(current_proc()),
956 sk_proc_pid(current_proc()), len,
957 (copysum ? (len - start) : 0), csum, start);
958 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
959 " mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
960 SK_KVA(m), m->m_pkthdr.csum_flags,
961 (uint32_t)m->m_pkthdr.csum_tx_start,
962 (uint32_t)m->m_pkthdr.csum_tx_stuff);
963 break;
964
965 default:
966 VERIFY(0);
967 /* NOTREACHED */
968 __builtin_unreachable();
969 }
970 METADATA_ADJUST_LEN(pkt, len, poff);
971
972 if (m->m_flags & M_BCAST) {
973 __packet_set_link_broadcast(ph);
974 } else if (m->m_flags & M_MCAST) {
975 __packet_set_link_multicast(ph);
976 }
977
978 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
979 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
980 (t == NR_RX) ? "RX" : "TX",
981 sk_dump("buf", baddr, len, 128, NULL, 0));
982 }
983
984 /*
985 * Like m_copydata_sum(), but works on a destination kernel packet.
986 */
987 static inline uint32_t
m_copypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint32_t len,boolean_t do_cscum)988 m_copypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
989 uint32_t len, boolean_t do_cscum)
990 {
991 boolean_t needs_swap, started_on_odd = FALSE;
992 int off0 = soff;
993 uint32_t len0 = len;
994 struct mbuf *m0 = m;
995 uint32_t sum = 0, partial;
996 unsigned count0, count, odd, mlen_copied;
997 uint8_t *sbaddr = NULL, *dbaddr = NULL;
998 uint16_t dbcnt = __packet_get_buflet_count(dph);
999 uint32_t dlim, dlen0;
1000 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
1001 kern_buflet_t dbuf = NULL, dbufp = NULL;
1002
1003 while (soff > 0) {
1004 if (__improbable(m == NULL)) {
1005 panic("%s: invalid mbuf chain %p [off %d, len %d]",
1006 __func__, m0, off0, len0);
1007 /* NOTREACHED */
1008 __builtin_unreachable();
1009 }
1010 if (soff < m->m_len) {
1011 break;
1012 }
1013 soff -= m->m_len;
1014 m = m->m_next;
1015 }
1016
1017 if (__improbable(m == NULL)) {
1018 panic("%s: invalid mbuf chain %p [off %d, len %d]",
1019 __func__, m0, off0, len0);
1020 /* NOTREACHED */
1021 __builtin_unreachable();
1022 }
1023
1024 sbaddr = mtod(m, uint8_t *) + soff;
1025 count = m->m_len - soff;
1026 mlen_copied = 0;
1027
1028 while (len != 0) {
1029 ASSERT(sbaddr == NULL || dbaddr == NULL);
1030 if (sbaddr == NULL) {
1031 soff = 0;
1032 m = m->m_next;
1033 if (__improbable(m == NULL)) {
1034 panic("%s: invalid mbuf chain %p [off %d, "
1035 "len %d]", __func__, m0, off0, len0);
1036 /* NOTREACHED */
1037 __builtin_unreachable();
1038 }
1039 sbaddr = mtod(m, uint8_t *);
1040 count = m->m_len;
1041 mlen_copied = 0;
1042 }
1043
1044 if (__improbable(count == 0)) {
1045 sbaddr = NULL;
1046 continue;
1047 }
1048
1049 if (dbaddr == NULL) {
1050 if (dbufp != NULL) {
1051 __buflet_set_data_length(dbufp, dlen0);
1052 }
1053
1054 PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
1055 if (__improbable(dbuf == NULL)) {
1056 panic("%s: mbuf too large %p [off %d, "
1057 "len %d]", __func__, m0, off0, len0);
1058 /* NOTREACHED */
1059 __builtin_unreachable();
1060 }
1061 dbufp = dbuf;
1062 dlim = __buflet_get_data_limit(dbuf) - doff;
1063 dbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(dbuf) + doff);
1064 dlen0 = dlim;
1065 doff = 0;
1066 }
1067
1068 count = MIN(count, (unsigned)len);
1069 count0 = count = MIN(count, dlim);
1070
1071 if (!do_cscum) {
1072 _pkt_copy(sbaddr, dbaddr, count);
1073 sbaddr += count;
1074 dbaddr += count;
1075 goto skip_csum;
1076 }
1077
1078 partial = 0;
1079 if ((uintptr_t)sbaddr & 1) {
1080 /* Align on word boundary */
1081 started_on_odd = !started_on_odd;
1082 #if BYTE_ORDER == LITTLE_ENDIAN
1083 partial = *sbaddr << 8;
1084 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1085 partial = *sbaddr;
1086 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1087 *dbaddr++ = *sbaddr++;
1088 count -= 1;
1089 }
1090
1091 needs_swap = started_on_odd;
1092 odd = count & 1u;
1093 count -= odd;
1094
1095 if (count) {
1096 partial = __packet_copy_and_sum(sbaddr,
1097 dbaddr, count, partial);
1098 sbaddr += count;
1099 dbaddr += count;
1100 if (__improbable(partial & 0xc0000000)) {
1101 if (needs_swap) {
1102 partial = (partial << 8) +
1103 (partial >> 24);
1104 }
1105 sum += (partial >> 16);
1106 sum += (partial & 0xffff);
1107 partial = 0;
1108 }
1109 }
1110
1111 if (odd) {
1112 #if BYTE_ORDER == LITTLE_ENDIAN
1113 partial += *sbaddr;
1114 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1115 partial += *sbaddr << 8;
1116 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1117 *dbaddr++ = *sbaddr++;
1118 started_on_odd = !started_on_odd;
1119 }
1120
1121 if (needs_swap) {
1122 partial = (partial << 8) + (partial >> 24);
1123 }
1124 sum += (partial >> 16) + (partial & 0xffff);
1125 /*
1126 * Reduce sum to allow potential byte swap
1127 * in the next iteration without carry.
1128 */
1129 sum = (sum >> 16) + (sum & 0xffff);
1130
1131 skip_csum:
1132 dlim -= count0;
1133 len -= count0;
1134 mlen_copied += count0;
1135
1136 if (dlim == 0) {
1137 dbaddr = NULL;
1138 }
1139
1140 count = m->m_len - soff - mlen_copied;
1141 if (count == 0) {
1142 sbaddr = NULL;
1143 }
1144 }
1145
1146 ASSERT(len == 0);
1147 ASSERT(dbuf != NULL);
1148 __buflet_set_data_length(dbuf, (dlen0 - dlim));
1149
1150 if (!do_cscum) {
1151 return 0;
1152 }
1153
1154 /* Final fold (reduce 32-bit to 16-bit) */
1155 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
1156 sum = (sum >> 16) + (sum & 0xffff);
1157 return sum;
1158 }
1159
1160 /*
1161 * This is a multi-buflet variant of pkt_copy_from_mbuf().
1162 *
1163 * start/stuff is relative to moff, within [0, len], such that
1164 * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
1165 */
1166 void
pkt_copy_multi_buflet_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1167 pkt_copy_multi_buflet_from_mbuf(const enum txrx t, kern_packet_t ph,
1168 const uint16_t poff, struct mbuf *m, const uint16_t moff,
1169 const uint32_t len, const boolean_t copysum, const uint16_t start)
1170 {
1171 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1172 uint32_t partial;
1173 uint16_t csum = 0;
1174 uint8_t *baddr;
1175
1176 _CASSERT(sizeof(csum) == sizeof(uint16_t));
1177
1178 /* get buffer address from packet */
1179 MD_BUFLET_ADDR_ABS(pkt, baddr);
1180 ASSERT(baddr != NULL);
1181 baddr += poff;
1182 VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp) *
1183 __packet_get_buflet_count(ph)));
1184
1185 switch (t) {
1186 case NR_RX:
1187 pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
1188 pkt->pkt_csum_rx_start_off = 0;
1189 pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
1190 pkt->pkt_svc_class = m_get_service_class(m);
1191 if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
1192 != CSUM_RX_FULL_FLAGS) && copysum)) {
1193 /*
1194 * Use m_copydata() to copy the portion up to the
1195 * point where we need to start the checksum, and
1196 * copy the remainder, checksumming as we go.
1197 */
1198 if (start != 0) {
1199 m_copydata(m, moff, start, baddr);
1200 }
1201 partial = m_copypkt_sum(m, start, ph, (poff + start),
1202 (len - start), TRUE);
1203 csum = __packet_fold_sum(partial);
1204 __packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
1205 start, csum, FALSE);
1206 METADATA_ADJUST_LEN(pkt, start, poff);
1207 } else {
1208 (void) m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1209 }
1210 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1211 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1212 sk_proc_name_address(current_proc()),
1213 sk_proc_pid(current_proc()), len,
1214 (copysum ? (len - start) : 0), csum, start);
1215 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1216 " mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
1217 SK_KVA(m), m->m_pkthdr.csum_flags,
1218 (uint32_t)m->m_pkthdr.csum_rx_start,
1219 (uint32_t)m->m_pkthdr.csum_rx_val);
1220 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1221 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1222 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1223 (uint32_t)pkt->pkt_csum_rx_start_off,
1224 (uint32_t)pkt->pkt_csum_rx_value);
1225 break;
1226
1227 case NR_TX:
1228 if (copysum) {
1229 uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
1230 /*
1231 * Use m_copydata() to copy the portion up to the
1232 * point where we need to start the checksum, and
1233 * copy the remainder, checksumming as we go.
1234 */
1235 if (start != 0) {
1236 m_copydata(m, moff, start, baddr);
1237 }
1238 partial = m_copypkt_sum(m, start, ph, (poff + start),
1239 (len - start), TRUE);
1240 csum = __packet_fold_sum_final(partial);
1241
1242 /*
1243 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
1244 * ideally we'd only test for CSUM_ZERO_INVERT
1245 * here, but catch cases where the originator
1246 * did not set it for UDP.
1247 */
1248 if (csum == 0 && (m->m_pkthdr.csum_flags &
1249 (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
1250 csum = 0xffff;
1251 }
1252
1253 /* Insert checksum into packet */
1254 ASSERT(stuff <= (len - sizeof(csum)));
1255 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
1256 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
1257 } else {
1258 bcopy((void *)&csum, baddr + stuff,
1259 sizeof(csum));
1260 }
1261 METADATA_ADJUST_LEN(pkt, start, poff);
1262 } else {
1263 m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1264 }
1265 pkt->pkt_csum_flags = 0;
1266 pkt->pkt_csum_tx_start_off = 0;
1267 pkt->pkt_csum_tx_stuff_off = 0;
1268
1269 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
1270 pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV4;
1271 pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
1272 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
1273 }
1274 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
1275 pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV6;
1276 pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
1277 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
1278 }
1279 if (!copysum) {
1280 pkt->pkt_csum_flags |= _convert_mbuf_csum_flags(m->m_pkthdr.csum_flags);
1281 }
1282
1283 /* translate mbuf metadata */
1284 pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
1285 pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
1286 pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
1287 pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
1288 switch (m->m_pkthdr.pkt_proto) {
1289 case IPPROTO_QUIC:
1290 pkt->pkt_flow_ip_proto = IPPROTO_UDP;
1291 pkt->pkt_transport_protocol = IPPROTO_QUIC;
1292 break;
1293
1294 default:
1295 pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
1296 pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
1297 break;
1298 }
1299 (void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
1300 pkt->pkt_svc_class = m_get_service_class(m);
1301 pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
1302 pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
1303 if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
1304 pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
1305 }
1306 if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) != 0) {
1307 pkt->pkt_pflags |= PKT_F_L4S;
1308 }
1309 necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
1310 pkt->pkt_policy_id =
1311 (uint32_t)necp_get_policy_id_from_packet(m);
1312
1313 if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
1314 if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
1315 __packet_set_tx_completion_data(ph,
1316 m->m_pkthdr.drv_tx_compl_arg,
1317 m->m_pkthdr.drv_tx_compl_data);
1318 }
1319 pkt->pkt_tx_compl_context =
1320 m->m_pkthdr.pkt_compl_context;
1321 pkt->pkt_tx_compl_callbacks =
1322 m->m_pkthdr.pkt_compl_callbacks;
1323 /*
1324 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
1325 * mbuf can no longer trigger a completion callback.
1326 * callback will be invoked when the kernel packet is
1327 * completed.
1328 */
1329 m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
1330
1331 m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
1332 }
1333
1334 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1335 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1336 sk_proc_name_address(current_proc()),
1337 sk_proc_pid(current_proc()), len,
1338 (copysum ? (len - start) : 0), csum, start);
1339 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1340 " mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
1341 SK_KVA(m), m->m_pkthdr.csum_flags,
1342 (uint32_t)m->m_pkthdr.csum_tx_start,
1343 (uint32_t)m->m_pkthdr.csum_tx_stuff);
1344 break;
1345
1346 default:
1347 VERIFY(0);
1348 /* NOTREACHED */
1349 __builtin_unreachable();
1350 }
1351
1352 if (m->m_flags & M_BCAST) {
1353 __packet_set_link_broadcast(ph);
1354 } else if (m->m_flags & M_MCAST) {
1355 __packet_set_link_multicast(ph);
1356 }
1357
1358 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1359 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1360 (t == NR_RX) ? "RX" : "TX",
1361 sk_dump("buf", baddr, len, 128, NULL, 0));
1362 }
1363
1364 static inline uint32_t
_convert_pkt_csum_flags(uint32_t pkt_flags)1365 _convert_pkt_csum_flags(uint32_t pkt_flags)
1366 {
1367 uint32_t mbuf_flags = 0;
1368 if (pkt_flags & PACKET_CSUM_TCP) {
1369 mbuf_flags |= CSUM_TCP;
1370 }
1371 if (pkt_flags & PACKET_CSUM_TCPIPV6) {
1372 mbuf_flags |= CSUM_TCPIPV6;
1373 }
1374 if (pkt_flags & PACKET_CSUM_UDP) {
1375 mbuf_flags |= CSUM_UDP;
1376 }
1377 if (pkt_flags & PACKET_CSUM_UDPIPV6) {
1378 mbuf_flags |= CSUM_UDPIPV6;
1379 }
1380 if (pkt_flags & PACKET_CSUM_IP) {
1381 mbuf_flags |= CSUM_IP;
1382 }
1383 if (pkt_flags & PACKET_CSUM_ZERO_INVERT) {
1384 mbuf_flags |= CSUM_ZERO_INVERT;
1385 }
1386
1387 return mbuf_flags;
1388 }
1389
1390 /*
1391 * This routine is used for copying from a packet originating from a native
1392 * skywalk interface to an mbuf destined for the host legacy stack (NR_RX),
1393 * as well as for mbufs destined for the compat network interfaces (NR_TX).
1394 *
1395 * We do adjust the length to reflect the total data span.
1396 *
1397 * This routine supports copying into an mbuf chain for RX but not TX.
1398 *
1399 * start/stuff is relative to poff, within [0, len], such that
1400 * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1401 */
1402 void
pkt_copy_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1403 pkt_copy_to_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
1404 struct mbuf *m, const uint16_t moff, const uint32_t len,
1405 const boolean_t copysum, const uint16_t start)
1406 {
1407 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1408 struct mbuf *curr_m;
1409 uint32_t partial = 0;
1410 uint32_t remaining_len = len, copied_len = 0;
1411 uint16_t csum = 0;
1412 uint8_t *baddr;
1413 uint8_t *dp;
1414 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1415
1416 ASSERT(len >= start);
1417 _CASSERT(sizeof(csum) == sizeof(uint16_t));
1418
1419 /* get buffer address from packet */
1420 MD_BUFLET_ADDR_ABS(pkt, baddr);
1421 ASSERT(baddr != NULL);
1422 baddr += poff;
1423 VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
1424
1425 ASSERT((m->m_flags & M_PKTHDR));
1426 m->m_data += moff;
1427
1428 switch (t) {
1429 case NR_RX:
1430 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1431
1432 /*
1433 * Use pkt_copy() to copy the portion up to the
1434 * point where we need to start the checksum, and
1435 * copy the remainder, checksumming as we go.
1436 */
1437 if (__probable(do_sum && start != 0)) {
1438 ASSERT(M_TRAILINGSPACE(m) >= start);
1439 ASSERT(m->m_len == 0);
1440 dp = (uint8_t *)m->m_data;
1441 _pkt_copy(baddr, dp, start);
1442 remaining_len -= start;
1443 copied_len += start;
1444 m->m_len += start;
1445 m->m_pkthdr.len += start;
1446 }
1447 curr_m = m;
1448 while (curr_m != NULL && remaining_len != 0) {
1449 uint32_t tmp_len = MIN(remaining_len,
1450 (uint32_t)M_TRAILINGSPACE(curr_m));
1451 dp = (uint8_t *)curr_m->m_data + curr_m->m_len;
1452 if (__probable(do_sum)) {
1453 partial = __packet_copy_and_sum((baddr + copied_len),
1454 dp, tmp_len, partial);
1455 } else {
1456 _pkt_copy((baddr + copied_len), dp, tmp_len);
1457 }
1458
1459 curr_m->m_len += tmp_len;
1460 m->m_pkthdr.len += tmp_len;
1461 copied_len += tmp_len;
1462 remaining_len -= tmp_len;
1463 curr_m = curr_m->m_next;
1464 }
1465 ASSERT(remaining_len == 0);
1466
1467 if (__probable(do_sum)) {
1468 csum = __packet_fold_sum(partial);
1469
1470 m->m_pkthdr.csum_flags |=
1471 (CSUM_DATA_VALID | CSUM_PARTIAL);
1472 m->m_pkthdr.csum_rx_start = start;
1473 m->m_pkthdr.csum_rx_val = csum;
1474 } else {
1475 m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1476 m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1477 _CASSERT(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1478 m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1479 if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1480 m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1481 }
1482 }
1483
1484 /* translate packet metadata */
1485 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1486 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1487
1488 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1489 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1490 sk_proc_name_address(current_proc()),
1491 sk_proc_pid(current_proc()), len,
1492 (copysum ? (len - start) : 0), csum, start);
1493 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1494 " mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1495 SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1496 (uint32_t)m->m_pkthdr.csum_rx_start,
1497 (uint32_t)m->m_pkthdr.csum_rx_val);
1498 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1499 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1500 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1501 (uint32_t)pkt->pkt_csum_rx_start_off,
1502 (uint32_t)pkt->pkt_csum_rx_value);
1503 break;
1504
1505 case NR_TX:
1506 dp = (uint8_t *)m->m_data;
1507 ASSERT(m->m_next == NULL);
1508
1509 VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1510 (uint32_t)mbuf_maxlen(m));
1511 m->m_len += len;
1512 m->m_pkthdr.len += len;
1513 VERIFY(m->m_len == m->m_pkthdr.len &&
1514 (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1515
1516 if (copysum) {
1517 uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1518 /*
1519 * Use pkt_copy() to copy the portion up to the
1520 * point where we need to start the checksum, and
1521 * copy the remainder, checksumming as we go.
1522 */
1523 if (__probable(start != 0)) {
1524 _pkt_copy(baddr, dp, start);
1525 }
1526 partial = __packet_copy_and_sum((baddr + start),
1527 (dp + start), (len - start), 0);
1528 csum = __packet_fold_sum_final(partial);
1529
1530 /* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1531 if (csum == 0 &&
1532 (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1533 csum = 0xffff;
1534 }
1535
1536 /* Insert checksum into packet */
1537 ASSERT(stuff <= (len - sizeof(csum)));
1538 if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1539 *(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1540 } else {
1541 bcopy((void *)&csum, dp + stuff, sizeof(csum));
1542 }
1543 } else {
1544 _pkt_copy(baddr, dp, len);
1545 }
1546 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1547 m->m_pkthdr.csum_tx_start = 0;
1548 m->m_pkthdr.csum_tx_stuff = 0;
1549 m->m_pkthdr.csum_flags |= _convert_pkt_csum_flags(pkt->pkt_csum_flags);
1550
1551 /* translate packet metadata */
1552 m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1553 m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1554 m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1555 m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1556 m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1557 m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1558 m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1559 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1560 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1561 m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1562 m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1563 if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1564 m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1565 }
1566 if ((pkt->pkt_pflags & PKT_F_L4S) != 0) {
1567 m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_L4S;
1568 }
1569 if (__improbable(copy_pkt_tx_time != 0 &&
1570 (pkt->pkt_pflags & PKT_F_OPT_TX_TIMESTAMP) != 0)) {
1571 struct m_tag *tag = NULL;
1572 tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM,
1573 sizeof(uint64_t), M_WAITOK, m);
1574 if (tag != NULL) {
1575 m_tag_prepend(m, tag);
1576 *(uint64_t *)tag->m_tag_data = pkt->pkt_com_opt->__po_pkt_tx_time;
1577 }
1578 }
1579
1580 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1581 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1582 sk_proc_name_address(current_proc()),
1583 sk_proc_pid(current_proc()), len,
1584 (copysum ? (len - start) : 0), csum, start);
1585 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1586 " pkt 0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1587 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1588 (uint32_t)pkt->pkt_csum_tx_start_off,
1589 (uint32_t)pkt->pkt_csum_tx_stuff_off);
1590 break;
1591
1592 default:
1593 VERIFY(0);
1594 /* NOTREACHED */
1595 __builtin_unreachable();
1596 }
1597
1598 if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1599 m->m_flags |= M_BCAST;
1600 } else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1601 m->m_flags |= M_MCAST;
1602 }
1603 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1604 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1605 (t == NR_RX) ? "RX" : "TX",
1606 sk_dump("buf", (uint8_t *)dp, m->m_pkthdr.len, 128, NULL, 0));
1607 }
1608
1609 /*
1610 * This is a multi-buflet variant of pkt_copy_to_mbuf().
1611 * NOTE: poff is the offset within the packet.
1612 *
1613 * This routine supports copying into an mbuf chain for RX but not TX.
1614 *
1615 * start/stuff is relative to poff, within [0, len], such that
1616 * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1617 */
1618 void
pkt_copy_multi_buflet_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1619 pkt_copy_multi_buflet_to_mbuf(const enum txrx t, kern_packet_t ph,
1620 const uint16_t poff, struct mbuf *m, const uint16_t moff,
1621 const uint32_t len, const boolean_t copysum, const uint16_t start)
1622 {
1623 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1624 struct mbuf *curr_m;
1625 uint32_t partial = 0;
1626 uint32_t remaining_len = len, copied_len = 0;
1627 uint16_t csum = 0;
1628 uint8_t *baddr;
1629 uint8_t *dp;
1630 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1631
1632 ASSERT(len >= start);
1633 _CASSERT(sizeof(csum) == sizeof(uint16_t));
1634
1635 /* get buffer address from packet */
1636 MD_BUFLET_ADDR_ABS(pkt, baddr);
1637 ASSERT(baddr != NULL);
1638 baddr += poff;
1639 VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp) *
1640 __packet_get_buflet_count(ph)));
1641
1642 ASSERT((m->m_flags & M_PKTHDR));
1643 m->m_data += moff;
1644
1645 switch (t) {
1646 case NR_RX:
1647 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1648 if (__probable(do_sum && start != 0)) {
1649 ASSERT(M_TRAILINGSPACE(m) >= start);
1650 ASSERT(m->m_len == 0);
1651 dp = (uint8_t *)m->m_data;
1652 _pkt_copy(baddr, dp, start);
1653 remaining_len -= start;
1654 copied_len += start;
1655 m->m_len += start;
1656 m->m_pkthdr.len += start;
1657 }
1658 curr_m = m;
1659 while (curr_m != NULL && remaining_len != 0) {
1660 uint32_t tmp_len = MIN(remaining_len,
1661 (uint32_t)M_TRAILINGSPACE(curr_m));
1662 uint16_t soff = poff + (uint16_t)copied_len;
1663 dp = (uint8_t *)curr_m->m_data + curr_m->m_len;
1664
1665 if (__probable(do_sum)) {
1666 partial = _pkt_copyaddr_sum(ph, soff,
1667 dp, tmp_len, TRUE, partial, NULL);
1668 } else {
1669 pkt_copyaddr_sum(ph, soff,
1670 dp, tmp_len, FALSE, 0, NULL);
1671 }
1672
1673 curr_m->m_len += tmp_len;
1674 m->m_pkthdr.len += tmp_len;
1675 copied_len += tmp_len;
1676 remaining_len -= tmp_len;
1677 curr_m = curr_m->m_next;
1678 }
1679 ASSERT(remaining_len == 0);
1680
1681 if (__probable(do_sum)) {
1682 csum = __packet_fold_sum(partial);
1683
1684 m->m_pkthdr.csum_flags |=
1685 (CSUM_DATA_VALID | CSUM_PARTIAL);
1686 m->m_pkthdr.csum_rx_start = start;
1687 m->m_pkthdr.csum_rx_val = csum;
1688 } else {
1689 m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1690 m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1691 _CASSERT(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1692 m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1693 if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1694 m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1695 }
1696 }
1697
1698 /* translate packet metadata */
1699 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1700 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1701
1702 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1703 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1704 sk_proc_name_address(current_proc()),
1705 sk_proc_pid(current_proc()), len,
1706 (copysum ? (len - start) : 0), csum, start);
1707 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1708 " mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1709 SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1710 (uint32_t)m->m_pkthdr.csum_rx_start,
1711 (uint32_t)m->m_pkthdr.csum_rx_val);
1712 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1713 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1714 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1715 (uint32_t)pkt->pkt_csum_rx_start_off,
1716 (uint32_t)pkt->pkt_csum_rx_value);
1717 break;
1718 case NR_TX:
1719 dp = (uint8_t *)m->m_data;
1720 ASSERT(m->m_next == NULL);
1721 VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1722 (uint32_t)mbuf_maxlen(m));
1723 m->m_len += len;
1724 m->m_pkthdr.len += len;
1725 VERIFY(m->m_len == m->m_pkthdr.len &&
1726 (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1727 if (copysum) {
1728 uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1729 /*
1730 * Use pkt_copy() to copy the portion up to the
1731 * point where we need to start the checksum, and
1732 * copy the remainder, checksumming as we go.
1733 */
1734 if (__probable(start != 0)) {
1735 _pkt_copy(baddr, dp, start);
1736 }
1737 partial = _pkt_copyaddr_sum(ph, (poff + start),
1738 (dp + start), (len - start), TRUE, 0, NULL);
1739 csum = __packet_fold_sum_final(partial);
1740
1741 /* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1742 if (csum == 0 &&
1743 (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1744 csum = 0xffff;
1745 }
1746
1747 /* Insert checksum into packet */
1748 ASSERT(stuff <= (len - sizeof(csum)));
1749 if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1750 *(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1751 } else {
1752 bcopy((void *)&csum, dp + stuff, sizeof(csum));
1753 }
1754 } else {
1755 (void) _pkt_copyaddr_sum(ph, poff, dp, len, FALSE, 0, NULL);
1756 }
1757 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1758 m->m_pkthdr.csum_tx_start = 0;
1759 m->m_pkthdr.csum_tx_stuff = 0;
1760 m->m_pkthdr.csum_flags |= _convert_pkt_csum_flags(pkt->pkt_csum_flags);
1761
1762 /* translate packet metadata */
1763 m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1764 m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1765 m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1766 m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1767 m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1768 m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1769 m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1770 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1771 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1772 m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1773 m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1774 if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1775 m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1776 }
1777 if ((pkt->pkt_pflags & PKT_F_L4S) != 0) {
1778 m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_L4S;
1779 }
1780 if (__improbable(copy_pkt_tx_time != 0 &&
1781 (pkt->pkt_pflags & PKT_F_OPT_TX_TIMESTAMP) != 0)) {
1782 struct m_tag *tag = NULL;
1783 tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM,
1784 sizeof(uint64_t), M_WAITOK, m);
1785 if (tag != NULL) {
1786 m_tag_prepend(m, tag);
1787 *(uint64_t *)tag->m_tag_data = pkt->pkt_com_opt->__po_pkt_tx_time;
1788 }
1789 }
1790
1791 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1792 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1793 sk_proc_name_address(current_proc()),
1794 sk_proc_pid(current_proc()), len,
1795 (copysum ? (len - start) : 0), csum, start);
1796 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1797 " pkt 0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1798 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1799 (uint32_t)pkt->pkt_csum_tx_start_off,
1800 (uint32_t)pkt->pkt_csum_tx_stuff_off);
1801 break;
1802
1803 default:
1804 VERIFY(0);
1805 /* NOTREACHED */
1806 __builtin_unreachable();
1807 }
1808
1809 if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1810 m->m_flags |= M_BCAST;
1811 } else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1812 m->m_flags |= M_MCAST;
1813 }
1814 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1815 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1816 (t == NR_RX) ? "RX" : "TX",
1817 sk_dump("buf", (uint8_t *)dp, m->m_pkthdr.len, 128, NULL, 0));
1818 }
1819
1820 /*
1821 * Like m_copydata(), but computes 16-bit sum as the data is copied.
1822 * Caller can provide an initial sum to be folded into the computed
1823 * sum. The accumulated partial sum (32-bit) is returned to caller;
1824 * caller is responsible for further reducing it to 16-bit if needed,
1825 * as well as to perform the final 1's complement on it.
1826 */
1827 uint32_t
m_copydata_sum(struct mbuf * m,int off,int len,void * vp,uint32_t initial_sum,boolean_t * odd_start)1828 m_copydata_sum(struct mbuf *m, int off, int len, void *vp, uint32_t initial_sum,
1829 boolean_t *odd_start)
1830 {
1831 boolean_t needs_swap, started_on_odd = FALSE;
1832 int off0 = off, len0 = len;
1833 struct mbuf *m0 = m;
1834 uint64_t sum, partial;
1835 unsigned count, odd;
1836 char *cp = vp;
1837
1838 if (__improbable(off < 0 || len < 0)) {
1839 panic("%s: invalid offset %d or len %d", __func__, off, len);
1840 /* NOTREACHED */
1841 __builtin_unreachable();
1842 }
1843
1844 while (off > 0) {
1845 if (__improbable(m == NULL)) {
1846 panic("%s: invalid mbuf chain %p [off %d, len %d]",
1847 __func__, m0, off0, len0);
1848 /* NOTREACHED */
1849 __builtin_unreachable();
1850 }
1851 if (off < m->m_len) {
1852 break;
1853 }
1854 off -= m->m_len;
1855 m = m->m_next;
1856 }
1857
1858 if (odd_start) {
1859 started_on_odd = *odd_start;
1860 }
1861 sum = initial_sum;
1862
1863 for (; len > 0; m = m->m_next) {
1864 uint8_t *datap;
1865
1866 if (__improbable(m == NULL)) {
1867 panic("%s: invalid mbuf chain %p [off %d, len %d]",
1868 __func__, m0, off0, len0);
1869 /* NOTREACHED */
1870 __builtin_unreachable();
1871 }
1872
1873 datap = mtod(m, uint8_t *) + off;
1874 count = m->m_len;
1875
1876 if (__improbable(count == 0)) {
1877 continue;
1878 }
1879
1880 count = MIN(count - off, (unsigned)len);
1881 partial = 0;
1882
1883 if ((uintptr_t)datap & 1) {
1884 /* Align on word boundary */
1885 started_on_odd = !started_on_odd;
1886 #if BYTE_ORDER == LITTLE_ENDIAN
1887 partial = *datap << 8;
1888 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1889 partial = *datap;
1890 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1891 *cp++ = *datap++;
1892 count -= 1;
1893 len -= 1;
1894 }
1895
1896 needs_swap = started_on_odd;
1897 odd = count & 1u;
1898 count -= odd;
1899
1900 if (count) {
1901 partial = __packet_copy_and_sum(datap,
1902 cp, count, (uint32_t)partial);
1903 datap += count;
1904 cp += count;
1905 len -= count;
1906 if (__improbable((partial & (3ULL << 62)) != 0)) {
1907 if (needs_swap) {
1908 partial = (partial << 8) +
1909 (partial >> 56);
1910 }
1911 sum += (partial >> 32);
1912 sum += (partial & 0xffffffff);
1913 partial = 0;
1914 }
1915 }
1916
1917 if (odd) {
1918 #if BYTE_ORDER == LITTLE_ENDIAN
1919 partial += *datap;
1920 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1921 partial += *datap << 8;
1922 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1923 *cp++ = *datap++;
1924 len -= 1;
1925 started_on_odd = !started_on_odd;
1926 }
1927 off = 0;
1928
1929 if (needs_swap) {
1930 partial = (partial << 8) + (partial >> 24);
1931 }
1932 sum += (partial >> 32) + (partial & 0xffffffff);
1933 /*
1934 * Reduce sum to allow potential byte swap
1935 * in the next iteration without carry.
1936 */
1937 sum = (sum >> 32) + (sum & 0xffffffff);
1938 }
1939
1940 if (odd_start) {
1941 *odd_start = started_on_odd;
1942 }
1943
1944 /* Final fold (reduce 64-bit to 32-bit) */
1945 sum = (sum >> 32) + (sum & 0xffffffff); /* 33-bit */
1946 sum = (sum >> 16) + (sum & 0xffff); /* 17-bit + carry */
1947
1948 /* return 32-bit partial sum to caller */
1949 return (uint32_t)sum;
1950 }
1951
1952 #if DEBUG || DEVELOPMENT
1953 #define TRAILERS_MAX 16 /* max trailing bytes */
1954 #define TRAILERS_REGEN (64 * 1024) /* regeneration threshold */
1955 static uint8_t tb[TRAILERS_MAX]; /* random trailing bytes */
1956 static uint32_t regen = TRAILERS_REGEN; /* regeneration counter */
1957
1958 uint32_t
pkt_add_trailers(kern_packet_t ph,const uint32_t len,const uint16_t start)1959 pkt_add_trailers(kern_packet_t ph, const uint32_t len, const uint16_t start)
1960 {
1961 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1962 uint32_t extra;
1963 uint8_t *baddr;
1964
1965 /* get buffer address from packet */
1966 MD_BUFLET_ADDR_ABS(pkt, baddr);
1967 ASSERT(baddr != NULL);
1968 ASSERT(len <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
1969
1970 extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
1971 if (extra == 0 || extra > sizeof(tb) ||
1972 (len + extra) > PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp)) {
1973 return 0;
1974 }
1975
1976 /* generate random bytes once per TRAILERS_REGEN packets (approx.) */
1977 if (regen++ == TRAILERS_REGEN) {
1978 read_frandom(&tb[0], sizeof(tb));
1979 regen = 0;
1980 }
1981
1982 bcopy(&tb[0], (baddr + len), extra);
1983
1984 /* recompute partial sum (also to exercise related logic) */
1985 pkt->pkt_csum_flags |= PACKET_CSUM_PARTIAL;
1986 pkt->pkt_csum_rx_value = (uint16_t)__packet_cksum((baddr + start),
1987 ((len + extra) - start), 0);
1988 pkt->pkt_csum_rx_start_off = start;
1989
1990 return extra;
1991 }
1992
1993 uint32_t
pkt_add_trailers_mbuf(struct mbuf * m,const uint16_t start)1994 pkt_add_trailers_mbuf(struct mbuf *m, const uint16_t start)
1995 {
1996 uint32_t extra;
1997
1998 extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
1999 if (extra == 0 || extra > sizeof(tb)) {
2000 return 0;
2001 }
2002
2003 if (mbuf_copyback(m, m_pktlen(m), extra, &tb[0], M_NOWAIT) != 0) {
2004 return 0;
2005 }
2006
2007 /* generate random bytes once per TRAILERS_REGEN packets (approx.) */
2008 if (regen++ == TRAILERS_REGEN) {
2009 read_frandom(&tb[0], sizeof(tb));
2010 regen = 0;
2011 }
2012
2013 /* recompute partial sum (also to exercise related logic) */
2014 m->m_pkthdr.csum_rx_val = m_sum16(m, start, (m_pktlen(m) - start));
2015 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
2016 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
2017 m->m_pkthdr.csum_rx_start = start;
2018
2019 return extra;
2020 }
2021 #endif /* DEBUG || DEVELOPMENT */
2022
2023 void
pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint16_t len,uint32_t * partial,boolean_t do_csum)2024 pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
2025 uint16_t doff, uint16_t len, uint32_t *partial, boolean_t do_csum)
2026 {
2027 VERIFY(_pkt_copypkt_sum(sph, soff, dph, doff, len, partial, do_csum));
2028 }
2029
2030 uint32_t
pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)2031 pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
2032 uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
2033 {
2034 return _pkt_copyaddr_sum(sph, soff, dbaddr, len, do_csum, initial_sum, odd_start);
2035 }
2036
2037 uint32_t
pkt_mcopypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint16_t len,boolean_t do_cscum)2038 pkt_mcopypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
2039 uint16_t len, boolean_t do_cscum)
2040 {
2041 return m_copypkt_sum(m, soff, dph, doff, len, do_cscum);
2042 }
2043
2044 void
pkt_copy(void * src,void * dst,size_t len)2045 pkt_copy(void *src, void *dst, size_t len)
2046 {
2047 return _pkt_copy(src, dst, len);
2048 }
2049