1 /*
2 * Copyright (c) 2017-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <skywalk/os_skywalk_private.h>
30 #include <machine/endian.h>
31 #include <net/necp.h>
32
33 #if (DEVELOPMENT || DEBUG)
34
35 /* per-packet logging is wasteful in release */
36 #define COPY_LOG 1
37
38 SYSCTL_NODE(_kern_skywalk, OID_AUTO, packet,
39 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Skywalk packet");
40 int pkt_trailers = 0; /* for testing trailing bytes */
41 SYSCTL_INT(_kern_skywalk_packet, OID_AUTO, trailers,
42 CTLFLAG_RW | CTLFLAG_LOCKED, &pkt_trailers, 0, "");
43 #endif /* !DEVELOPMENT && !DEBUG */
44
45
46 __attribute__((always_inline))
47 static inline void
_pkt_copy(void * __sized_by (len)src,void * __sized_by (len)dst,size_t len)48 _pkt_copy(void *__sized_by(len)src, void *__sized_by(len)dst, size_t len)
49 {
50 if (__probable(IS_P2ALIGNED(src, 8) && IS_P2ALIGNED(dst, 8))) {
51 switch (len) {
52 case 20: /* standard IPv4 header */
53 sk_copy64_20(src, dst);
54 return;
55
56 case 40: /* IPv6 header */
57 sk_copy64_40(src, dst);
58 return;
59
60 default:
61 if (IS_P2ALIGNED(len, 64)) {
62 sk_copy64_64x(src, dst, len);
63 return;
64 } else if (IS_P2ALIGNED(len, 32)) {
65 sk_copy64_32x(src, dst, len);
66 return;
67 } else if (IS_P2ALIGNED(len, 8)) {
68 sk_copy64_8x(src, dst, len);
69 return;
70 } else if (IS_P2ALIGNED(len, 4)) {
71 sk_copy64_4x(src, dst, len);
72 return;
73 }
74 break;
75 }
76 }
77 bcopy(src, dst, len);
78 }
79
80 /*
81 * This routine is used for copying data across two kernel packets.
82 * Can also optionally compute 16-bit partial inet checksum as the
83 * data is copied.
84 * This routine is used by flowswitch while copying packet from vp
85 * adapter pool to packet in native netif pool and vice-a-versa.
86 *
87 * start/stuff is relative to soff, within [0, len], such that
88 * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
89 */
90 void
pkt_copy_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)91 pkt_copy_from_pkt(const enum txrx t, kern_packet_t dph, const uint16_t doff,
92 kern_packet_t sph, const uint16_t soff, const uint32_t len,
93 const boolean_t copysum, const uint16_t start, const uint16_t stuff,
94 const boolean_t invert)
95 {
96 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
97 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
98 uint32_t partial;
99 uint16_t csum = 0;
100 uint8_t *sbaddr, *dbaddr;
101 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
102
103 static_assert(sizeof(csum) == sizeof(uint16_t));
104
105 /* get buffer address from packet */
106 MD_BUFLET_ADDR_ABS(spkt, sbaddr);
107 ASSERT(sbaddr != NULL);
108 sbaddr += soff;
109 MD_BUFLET_ADDR_ABS(dpkt, dbaddr);
110 ASSERT(dbaddr != NULL);
111 dbaddr += doff;
112 VERIFY((doff + len) <= PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
113
114 switch (t) {
115 case NR_RX:
116 dpkt->pkt_csum_flags = 0;
117 if (__probable(do_sum)) {
118 /*
119 * Use pkt_copy() to copy the portion up to the
120 * point where we need to start the checksum, and
121 * copy the remainder, checksumming as we go.
122 */
123 if (__probable(start != 0)) {
124 _pkt_copy(sbaddr, dbaddr, start);
125 }
126 partial = __packet_copy_and_sum((sbaddr + start),
127 (dbaddr + start), (len - start), 0);
128 csum = __packet_fold_sum(partial);
129
130 __packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
131 start, csum, FALSE);
132 } else {
133 _pkt_copy(sbaddr, dbaddr, len);
134 dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
135 dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
136 dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
137 }
138
139 #if COPY_LOG
140 SK_DF(SK_VERB_COPY | SK_VERB_RX,
141 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
142 sk_proc_name(current_proc()), sk_proc_pid(current_proc()),
143 len, (copysum ? (len - start) : 0), csum, start);
144 SK_DF(SK_VERB_COPY | SK_VERB_RX,
145 " pkt %p doff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
146 SK_KVA(dpkt), doff, dpkt->pkt_csum_flags,
147 (uint32_t)dpkt->pkt_csum_rx_start_off,
148 (uint32_t)dpkt->pkt_csum_rx_value);
149 #endif
150 break;
151
152 case NR_TX:
153 if (copysum) {
154 /*
155 * Use pkt_copy() to copy the portion up to the
156 * point where we need to start the checksum, and
157 * copy the remainder, checksumming as we go.
158 */
159 if (__probable(start != 0)) {
160 _pkt_copy(sbaddr, dbaddr, start);
161 }
162 partial = __packet_copy_and_sum((sbaddr + start),
163 (dbaddr + start), (len - start), 0);
164 csum = __packet_fold_sum_final(partial);
165
166 /* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
167 if (csum == 0 && invert) {
168 csum = 0xffff;
169 }
170
171 /* Insert checksum into packet */
172 ASSERT(stuff <= (len - sizeof(csum)));
173 if (IS_P2ALIGNED(dbaddr + stuff, sizeof(csum))) {
174 *(uint16_t *)(uintptr_t)(dbaddr + stuff) = csum;
175 } else {
176 bcopy((void *)&csum, dbaddr + stuff,
177 sizeof(csum));
178 }
179 } else {
180 _pkt_copy(sbaddr, dbaddr, len);
181 }
182 dpkt->pkt_csum_flags = spkt->pkt_csum_flags &
183 (PACKET_CSUM_TSO_FLAGS | PACKET_TX_CSUM_OFFLOAD_FLAGS);
184 dpkt->pkt_csum_tx_start_off = 0;
185 dpkt->pkt_csum_tx_stuff_off = 0;
186
187 #if COPY_LOG
188 SK_DF(SK_VERB_COPY | SK_VERB_TX,
189 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u, flags %u",
190 sk_proc_name(current_proc()),
191 sk_proc_pid(current_proc()), len,
192 (copysum ? (len - start) : 0), csum, start, dpkt->pkt_csum_flags);
193 #endif
194 break;
195
196 default:
197 VERIFY(0);
198 /* NOTREACHED */
199 __builtin_unreachable();
200 }
201 METADATA_ADJUST_LEN(dpkt, len, doff);
202
203 #if COPY_LOG
204 SK_DF(SK_VERB_COPY | SK_VERB_DUMP, "%s(%d) %s %s",
205 sk_proc_name(current_proc()), sk_proc_pid(current_proc()),
206 (t == NR_RX) ? "RX" : "TX",
207 sk_dump("buf", dbaddr, len, 128));
208 #endif
209 }
210
211 /*
212 * NOTE: soff is the offset within the packet
213 * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
214 * caller is responsible for further reducing it to 16-bit if needed,
215 * as well as to perform the final 1's complement on it.
216 */
217 uint32_t static inline
_pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * __sized_by (len)dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)218 _pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *__sized_by(len)dbaddr,
219 uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
220 {
221 uint8_t odd = 0;
222 uint8_t *sbaddr = NULL;
223 uint32_t sum = initial_sum, partial;
224 uint32_t len0 = len;
225 boolean_t needs_swap, started_on_odd = FALSE;
226 uint16_t sbcnt, off0 = soff;
227 uint32_t clen, sboff, sblen;
228 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
229 kern_buflet_t sbuf = NULL, sbufp = NULL;
230
231 sbcnt = __packet_get_buflet_count(sph);
232
233 if (odd_start) {
234 started_on_odd = *odd_start;
235 }
236
237 /* fastpath (copy+sum, single buflet, even aligned, even length) */
238 if (do_csum && sbcnt == 1 && len != 0) {
239 PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
240 ASSERT(sbuf != NULL);
241 sboff = __buflet_get_data_offset(sbuf);
242 sblen = __buflet_get_data_length(sbuf);
243 ASSERT(sboff <= soff);
244 ASSERT(soff < sboff + sblen);
245 sblen -= (soff - sboff);
246 sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
247
248 clen = (uint16_t)MIN(len, sblen);
249
250 if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
251 sum = __packet_copy_and_sum(sbaddr, dbaddr, clen, sum);
252 return __packet_fold_sum(sum);
253 }
254
255 sbaddr = NULL;
256 sbuf = sbufp = NULL;
257 }
258
259 while (len != 0) {
260 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
261 if (__improbable(sbuf == NULL)) {
262 panic("%s: bad packet, %p [off %d, len %d]",
263 __func__, SK_KVA(spkt), off0, len0);
264 /* NOTREACHED */
265 __builtin_unreachable();
266 }
267 sbufp = sbuf;
268 sboff = __buflet_get_data_offset(sbuf);
269 sblen = __buflet_get_data_length(sbuf);
270 ASSERT((sboff <= soff) && (soff < sboff + sblen));
271 sblen -= (soff - sboff);
272 sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
273 soff = 0;
274 clen = (uint16_t)MIN(len, sblen);
275 if (__probable(do_csum)) {
276 partial = 0;
277 if (__improbable((uintptr_t)sbaddr & 1)) {
278 /* Align on word boundary */
279 started_on_odd = !started_on_odd;
280 #if BYTE_ORDER == LITTLE_ENDIAN
281 partial = (uint8_t)*sbaddr << 8;
282 #else /* BYTE_ORDER != LITTLE_ENDIAN */
283 partial = (uint8_t)*sbaddr;
284 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
285 /*
286 * -fbounds-safety: *dbaddr++ = *sbaddr++ fails
287 * to compile. But the following works. Also,
288 * grouping dbaddr and len updates led to higher
289 * throughput performance, compared to doing
290 * dbaddr++; sbaddr++; len -= 1; in that order.
291 */
292 *dbaddr = *sbaddr;
293 dbaddr++;
294 sblen -= 1;
295 clen -= 1;
296 len -= 1;
297 sbaddr++;
298 }
299 needs_swap = started_on_odd;
300
301 odd = clen & 1u;
302 clen -= odd;
303
304 if (clen != 0) {
305 partial = __packet_copy_and_sum(sbaddr, dbaddr,
306 clen, partial);
307 }
308
309 if (__improbable(partial & 0xc0000000)) {
310 if (needs_swap) {
311 partial = (partial << 8) +
312 (partial >> 24);
313 }
314 sum += (partial >> 16);
315 sum += (partial & 0xffff);
316 partial = 0;
317 }
318 } else {
319 _pkt_copy(sbaddr, dbaddr, clen);
320 }
321
322 dbaddr += clen;
323 /*
324 * Updating len before updating sbaddr led to faster throughput
325 * than doing: dbaddr += clen; sbaddr += clen;
326 * len -= clen + odd;
327 */
328 len -= clen;
329 sblen -= clen;
330 sbaddr += clen;
331
332 if (__probable(do_csum)) {
333 if (odd != 0) {
334 #if BYTE_ORDER == LITTLE_ENDIAN
335 partial += (uint8_t)*sbaddr;
336 #else /* BYTE_ORDER != LITTLE_ENDIAN */
337 partial += (uint8_t)*sbaddr << 8;
338 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
339 ASSERT(odd == 1);
340 /*
341 * -fbounds-safety: Not written as `*dbaddr++ = *sbaddr++`
342 * to avoid compiler bug (rdar://98749526). This
343 * bug is only fixed when using `bound-checks-new-checks`.
344 */
345 *dbaddr = *sbaddr++;
346 dbaddr++;
347 len -= 1;
348 sblen -= 1;
349 started_on_odd = !started_on_odd;
350 }
351
352 if (needs_swap) {
353 partial = (partial << 8) + (partial >> 24);
354 }
355 sum += (partial >> 16) + (partial & 0xffff);
356 /*
357 * Reduce sum to allow potential byte swap
358 * in the next iteration without carry.
359 */
360 sum = (sum >> 16) + (sum & 0xffff);
361 }
362 ASSERT(sblen == 0 || len == 0);
363 }
364
365 if (odd_start) {
366 *odd_start = started_on_odd;
367 }
368
369 if (__probable(do_csum)) {
370 /* Final fold (reduce 32-bit to 16-bit) */
371 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
372 sum = (sum >> 16) + (sum & 0xffff);
373 }
374 return sum;
375 }
376
377 /*
378 * NOTE: Caller of this function is responsible to adjust the length and offset
379 * of the first buflet of the destination packet if (doff != 0),
380 * i.e. additional data is being prependend to the packet.
381 * It should also finalize the packet.
382 * To simplify & optimize the routine, we have also assumed that soff & doff
383 * will lie within the first buffer, which is true for the current use cases
384 * where, doff is the offset of the checksum field in the TCP/IP header and
385 * soff is the L3 offset.
386 * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
387 * caller is responsible for further reducing it to 16-bit if needed,
388 * as well as to perform the final 1's complement on it.
389 */
390 static inline boolean_t
_pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint32_t len,uint32_t * csum_partial,boolean_t do_csum)391 _pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
392 uint16_t doff, uint32_t len, uint32_t *csum_partial, boolean_t do_csum)
393 {
394 uint8_t odd = 0;
395 uint32_t sum = 0, partial;
396 boolean_t needs_swap, started_on_odd = FALSE;
397 uint8_t *sbaddr = NULL, *dbaddr = NULL;
398 uint16_t sbcnt, dbcnt;
399 uint32_t clen, dlen0, sboff, sblen, dlim;
400 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
401 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
402 kern_buflet_t sbuf = NULL, sbufp = NULL, dbuf = NULL, dbufp = NULL;
403
404 ASSERT(csum_partial != NULL || !do_csum);
405 sbcnt = __packet_get_buflet_count(sph);
406 dbcnt = __packet_get_buflet_count(dph);
407
408 while (len != 0) {
409 ASSERT(sbaddr == NULL || dbaddr == NULL);
410 if (sbaddr == NULL) {
411 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
412 if (__improbable(sbuf == NULL)) {
413 break;
414 }
415 sbufp = sbuf;
416 sblen = __buflet_get_data_length(sbuf);
417 sboff = __buflet_get_data_offset(sbuf);
418 ASSERT(soff >= sboff);
419 ASSERT(sboff + sblen > soff);
420 sblen -= (soff - sboff);
421 sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
422 soff = 0;
423 }
424
425 if (dbaddr == NULL) {
426 if (dbufp != NULL) {
427 __buflet_set_data_length(dbufp, dlen0);
428 }
429
430 PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
431 if (__improbable(dbuf == NULL)) {
432 break;
433 }
434 dbufp = dbuf;
435 dlim = __buflet_get_data_limit(dbuf);
436 ASSERT(dlim > doff);
437 dlim -= doff;
438 if (doff != 0) {
439 VERIFY(__buflet_set_data_offset(dbuf, doff) == 0);
440 }
441 dbaddr = (uint8_t *)__buflet_get_data_address(dbuf) + doff;
442 dlen0 = dlim;
443 doff = 0;
444 }
445
446 clen = MIN(len, sblen);
447 clen = MIN(clen, dlim);
448
449 if (__probable(do_csum)) {
450 partial = 0;
451 if (__improbable((uintptr_t)sbaddr & 1)) {
452 /* Align on word boundary */
453 started_on_odd = !started_on_odd;
454 #if BYTE_ORDER == LITTLE_ENDIAN
455 partial = (uint8_t)*sbaddr << 8;
456 #else /* BYTE_ORDER != LITTLE_ENDIAN */
457 partial = (uint8_t)*sbaddr;
458 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
459 *dbaddr++ = *sbaddr++;
460 clen -= 1;
461 dlim -= 1;
462 len -= 1;
463 }
464 needs_swap = started_on_odd;
465
466 odd = clen & 1u;
467 clen -= odd;
468
469 if (clen != 0) {
470 partial = __packet_copy_and_sum(sbaddr, dbaddr,
471 clen, partial);
472 }
473
474 if (__improbable(partial & 0xc0000000)) {
475 if (needs_swap) {
476 partial = (partial << 8) +
477 (partial >> 24);
478 }
479 sum += (partial >> 16);
480 sum += (partial & 0xffff);
481 partial = 0;
482 }
483 } else {
484 _pkt_copy(sbaddr, dbaddr, clen);
485 }
486 sbaddr += clen;
487 dbaddr += clen;
488
489 if (__probable(do_csum)) {
490 if (odd != 0) {
491 #if BYTE_ORDER == LITTLE_ENDIAN
492 partial += (uint8_t)*sbaddr;
493 #else /* BYTE_ORDER != LITTLE_ENDIAN */
494 partial += (uint8_t)*sbaddr << 8;
495 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
496 *dbaddr++ = *sbaddr++;
497 started_on_odd = !started_on_odd;
498 }
499
500 if (needs_swap) {
501 partial = (partial << 8) + (partial >> 24);
502 }
503 sum += (partial >> 16) + (partial & 0xffff);
504 /*
505 * Reduce sum to allow potential byte swap
506 * in the next iteration without carry.
507 */
508 sum = (sum >> 16) + (sum & 0xffff);
509 }
510
511 sblen -= clen + odd;
512 dlim -= clen + odd;
513 len -= clen + odd;
514
515 if (sblen == 0) {
516 sbaddr = NULL;
517 }
518
519 if (dlim == 0) {
520 dbaddr = NULL;
521 }
522 }
523
524 if (__probable(dbuf != NULL)) {
525 __buflet_set_data_length(dbuf, (dlen0 - dlim));
526 }
527 if (__probable(do_csum)) {
528 /* Final fold (reduce 32-bit to 16-bit) */
529 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
530 sum = (sum >> 16) + (sum & 0xffff);
531 *csum_partial = (uint32_t)sum;
532 }
533 return len == 0;
534 }
535
536 uint32_t
pkt_sum(kern_packet_t sph,uint16_t soff,uint16_t len)537 pkt_sum(kern_packet_t sph, uint16_t soff, uint16_t len)
538 {
539 uint8_t odd = 0;
540 uint32_t sum = 0, partial;
541 boolean_t needs_swap, started_on_odd = FALSE;
542 uint8_t *sbaddr = NULL;
543 uint16_t sbcnt;
544 uint32_t clen, sblen, sboff;
545 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
546 kern_buflet_t sbuf = NULL, sbufp = NULL;
547
548 sbcnt = __packet_get_buflet_count(sph);
549
550 /* fastpath (single buflet, even aligned, even length) */
551 if (sbcnt == 1 && len != 0) {
552 PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
553 ASSERT(sbuf != NULL);
554 sblen = __buflet_get_data_length(sbuf);
555 sboff = __buflet_get_data_offset(sbuf);
556 ASSERT(soff >= sboff);
557 ASSERT(sboff + sblen > soff);
558 sblen -= (soff - sboff);
559 sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
560
561 clen = MIN(len, sblen);
562
563 if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
564 sum = __packet_cksum(sbaddr, clen, 0);
565 return __packet_fold_sum(sum);
566 }
567
568 sbaddr = NULL;
569 sbuf = sbufp = NULL;
570 }
571
572 /* slowpath */
573 while (len != 0) {
574 ASSERT(sbaddr == NULL);
575 if (sbaddr == NULL) {
576 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
577 if (__improbable(sbuf == NULL)) {
578 break;
579 }
580 sbufp = sbuf;
581 sblen = __buflet_get_data_length(sbuf);
582 sboff = __buflet_get_data_offset(sbuf);
583 ASSERT(soff >= sboff);
584 ASSERT(sboff + sblen > soff);
585 sblen -= (soff - sboff);
586 sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
587 soff = 0;
588 }
589
590 clen = MIN(len, sblen);
591
592 partial = 0;
593 if (__improbable((uintptr_t)sbaddr & 1)) {
594 /* Align on word boundary */
595 started_on_odd = !started_on_odd;
596 #if BYTE_ORDER == LITTLE_ENDIAN
597 partial = (uint8_t)*sbaddr << 8;
598 #else /* BYTE_ORDER != LITTLE_ENDIAN */
599 partial = (uint8_t)*sbaddr;
600 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
601 clen -= 1;
602 len -= 1;
603 }
604 needs_swap = started_on_odd;
605
606 odd = clen & 1u;
607 clen -= odd;
608
609 if (clen != 0) {
610 partial = __packet_cksum(sbaddr,
611 clen, partial);
612 }
613
614 if (__improbable(partial & 0xc0000000)) {
615 if (needs_swap) {
616 partial = (partial << 8) +
617 (partial >> 24);
618 }
619 sum += (partial >> 16);
620 sum += (partial & 0xffff);
621 partial = 0;
622 }
623 sbaddr += clen;
624
625 if (odd != 0) {
626 #if BYTE_ORDER == LITTLE_ENDIAN
627 partial += (uint8_t)*sbaddr;
628 #else /* BYTE_ORDER != LITTLE_ENDIAN */
629 partial += (uint8_t)*sbaddr << 8;
630 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
631 started_on_odd = !started_on_odd;
632 }
633
634 if (needs_swap) {
635 partial = (partial << 8) + (partial >> 24);
636 }
637 sum += (partial >> 16) + (partial & 0xffff);
638 /*
639 * Reduce sum to allow potential byte swap
640 * in the next iteration without carry.
641 */
642 sum = (sum >> 16) + (sum & 0xffff);
643
644 sblen -= clen + odd;
645 len -= clen + odd;
646
647 if (sblen == 0) {
648 sbaddr = NULL;
649 }
650 }
651
652 /* Final fold (reduce 32-bit to 16-bit) */
653 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
654 sum = (sum >> 16) + (sum & 0xffff);
655 return (uint32_t)sum;
656 }
657
658
659 /*
660 * This is a multi-buflet variant of pkt_copy_from_pkt().
661 *
662 * start/stuff is relative to soff, within [0, len], such that
663 * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
664 */
665 void
pkt_copy_multi_buflet_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)666 pkt_copy_multi_buflet_from_pkt(const enum txrx t, kern_packet_t dph,
667 const uint16_t doff, kern_packet_t sph, const uint16_t soff,
668 const uint32_t len, const boolean_t copysum, const uint16_t start,
669 const uint16_t stuff, const boolean_t invert)
670 {
671 boolean_t rc;
672 uint32_t partial;
673 uint16_t csum = 0;
674 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
675 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
676 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
677
678 VERIFY((doff + len) <= (PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp) *
679 __packet_get_buflet_count(dph)));
680
681 switch (t) {
682 case NR_RX:
683 dpkt->pkt_csum_flags = 0;
684 if (__probable(do_sum)) {
685 /*
686 * copy the portion up to the point where we need to
687 * start the checksum, and copy the remainder,
688 * checksumming as we go.
689 */
690 if (__probable(start != 0)) {
691 rc = _pkt_copypkt_sum(sph, soff, dph, doff,
692 start, NULL, FALSE);
693 ASSERT(rc);
694 }
695 _pkt_copypkt_sum(sph, (soff + start), dph,
696 (doff + start), (len - start), &partial, TRUE);
697 csum = __packet_fold_sum(partial);
698 __packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
699 start, csum, FALSE);
700 METADATA_ADJUST_LEN(dpkt, start, doff);
701 } else {
702 rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
703 FALSE);
704 ASSERT(rc);
705 dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
706 dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
707 dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
708 }
709 break;
710
711 case NR_TX:
712 if (copysum) {
713 uint8_t *baddr;
714 /*
715 * copy the portion up to the point where we need to
716 * start the checksum, and copy the remainder,
717 * checksumming as we go.
718 */
719 if (__probable(start != 0)) {
720 rc = _pkt_copypkt_sum(sph, soff, dph, doff,
721 start, NULL, FALSE);
722 ASSERT(rc);
723 }
724 rc = _pkt_copypkt_sum(sph, (soff + start), dph,
725 (doff + start), (len - start), &partial, TRUE);
726 ASSERT(rc);
727 csum = __packet_fold_sum_final(partial);
728
729 /* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
730 if (csum == 0 && invert) {
731 csum = 0xffff;
732 }
733
734 /*
735 * Insert checksum into packet.
736 * Here we assume that checksum will be in the
737 * first buffer.
738 */
739 ASSERT((stuff + doff + sizeof(csum)) <=
740 PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
741 ASSERT(stuff <= (len - sizeof(csum)));
742
743 /* get first buflet buffer address from packet */
744 MD_BUFLET_ADDR_ABS(dpkt, baddr);
745 ASSERT(baddr != NULL);
746 baddr += doff;
747 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
748 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
749 } else {
750 bcopy((void *)&csum, baddr + stuff,
751 sizeof(csum));
752 }
753 METADATA_ADJUST_LEN(dpkt, start, doff);
754 } else {
755 rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
756 FALSE);
757 ASSERT(rc);
758 }
759 dpkt->pkt_csum_flags = spkt->pkt_csum_flags &
760 (PACKET_CSUM_TSO_FLAGS | PACKET_TX_CSUM_OFFLOAD_FLAGS);
761 dpkt->pkt_csum_tx_start_off = 0;
762 dpkt->pkt_csum_tx_stuff_off = 0;
763
764 #if COPY_LOG
765 SK_DF(SK_VERB_COPY | SK_VERB_TX,
766 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u, flags %u",
767 sk_proc_name(current_proc()), sk_proc_pid(current_proc()),
768 len, (copysum ? (len - start) : 0), csum, start,
769 dpkt->pkt_csum_flags);
770 #endif
771 break;
772
773 default:
774 VERIFY(0);
775 /* NOTREACHED */
776 __builtin_unreachable();
777 }
778 }
779
780 static inline uint32_t
_convert_mbuf_csum_flags(uint32_t mbuf_flags)781 _convert_mbuf_csum_flags(uint32_t mbuf_flags)
782 {
783 uint32_t pkt_flags = 0;
784
785 if (mbuf_flags & CSUM_TCP) {
786 pkt_flags |= PACKET_CSUM_TCP;
787 }
788 if (mbuf_flags & CSUM_TCPIPV6) {
789 pkt_flags |= PACKET_CSUM_TCPIPV6;
790 }
791 if (mbuf_flags & CSUM_UDP) {
792 pkt_flags |= PACKET_CSUM_UDP;
793 }
794 if (mbuf_flags & CSUM_UDPIPV6) {
795 pkt_flags |= PACKET_CSUM_UDPIPV6;
796 }
797 if (mbuf_flags & CSUM_IP) {
798 pkt_flags |= PACKET_CSUM_IP;
799 }
800 if (mbuf_flags & CSUM_ZERO_INVERT) {
801 pkt_flags |= PACKET_CSUM_ZERO_INVERT;
802 }
803
804 return pkt_flags;
805 }
806
807 /*
808 * This routine is used for copying an mbuf which originated in the host
809 * stack destined to a native skywalk interface (NR_TX), as well as for
810 * mbufs originating on compat network interfaces (NR_RX).
811 *
812 * start/stuff is relative to moff, within [0, len], such that
813 * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
814 */
815 void
pkt_copy_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)816 pkt_copy_from_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
817 struct mbuf *m, const uint16_t moff, const uint32_t len,
818 const boolean_t copysum, const uint16_t start)
819 {
820 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
821 struct m_tag *ts_tag = NULL;
822 uint32_t partial;
823 uint16_t csum = 0;
824 uint16_t vlan = 0;
825 uint8_t *baddr;
826
827 static_assert(sizeof(csum) == sizeof(uint16_t));
828
829 /* get buffer address from packet */
830 MD_BUFLET_ADDR_ABS(pkt, baddr);
831 ASSERT(baddr != NULL);
832 baddr += poff;
833 VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
834
835 switch (t) {
836 case NR_RX:
837 pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
838 pkt->pkt_csum_rx_start_off = 0;
839 pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
840 pkt->pkt_svc_class = m_get_service_class(m);
841 if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
842 != CSUM_RX_FULL_FLAGS) && copysum)) {
843 /*
844 * Use m_copydata() to copy the portion up to the
845 * point where we need to start the checksum, and
846 * copy the remainder, checksumming as we go.
847 */
848 if (start != 0) {
849 m_copydata(m, moff, start, baddr);
850 }
851 partial = m_copydata_sum(m, start, (len - start),
852 (baddr + start), 0, NULL);
853 csum = __packet_fold_sum(partial);
854
855 __packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
856 start, csum, FALSE);
857 } else {
858 m_copydata(m, moff, len, baddr);
859 }
860
861 if (mbuf_get_vlan_tag(m, &vlan) == 0) {
862 __packet_set_vlan_tag(ph, vlan);
863 }
864
865 #if COPY_LOG
866 SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_RX, current_proc(),
867 "RX len %u, copy+sum %u (csum 0x%04x), start %u",
868 len, (copysum ? (len - start) : 0), csum, start);
869 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
870 " mbuf %p csumf/rxstart/rxval 0x%x/%u/0x%04x",
871 SK_KVA(m), m->m_pkthdr.csum_flags,
872 (uint32_t)m->m_pkthdr.csum_rx_start,
873 (uint32_t)m->m_pkthdr.csum_rx_val);
874 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
875 " pkt %p poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
876 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
877 (uint32_t)pkt->pkt_csum_rx_start_off,
878 (uint32_t)pkt->pkt_csum_rx_value);
879 #endif
880 break;
881
882 case NR_TX:
883 if (copysum) {
884 uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
885 /*
886 * Use m_copydata() to copy the portion up to the
887 * point where we need to start the checksum, and
888 * copy the remainder, checksumming as we go.
889 */
890 if (start != 0) {
891 m_copydata(m, moff, start, baddr);
892 }
893 partial = m_copydata_sum(m, start, (len - start),
894 (baddr + start), 0, NULL);
895 csum = __packet_fold_sum_final(partial);
896
897 /*
898 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
899 * ideally we'd only test for CSUM_ZERO_INVERT
900 * here, but catch cases where the originator
901 * did not set it for UDP.
902 */
903 if (csum == 0 && (m->m_pkthdr.csum_flags &
904 (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
905 csum = 0xffff;
906 }
907
908 /* Insert checksum into packet */
909 ASSERT(stuff <= (len - sizeof(csum)));
910 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
911 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
912 } else {
913 bcopy((void *)&csum, baddr + stuff,
914 sizeof(csum));
915 }
916 } else {
917 m_copydata(m, moff, len, baddr);
918 }
919 pkt->pkt_csum_flags = 0;
920 pkt->pkt_csum_tx_start_off = 0;
921 pkt->pkt_csum_tx_stuff_off = 0;
922
923 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
924 pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV4;
925 pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
926 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
927 }
928 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
929 pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV6;
930 pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
931 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
932 }
933 if (!copysum) {
934 pkt->pkt_csum_flags |= _convert_mbuf_csum_flags(m->m_pkthdr.csum_flags);
935 }
936
937 /* translate mbuf metadata */
938 pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
939 pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
940 pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
941 pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
942 switch (m->m_pkthdr.pkt_proto) {
943 case IPPROTO_QUIC:
944 pkt->pkt_flow_ip_proto = IPPROTO_UDP;
945 pkt->pkt_transport_protocol = IPPROTO_QUIC;
946 break;
947
948 default:
949 pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
950 pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
951 break;
952 }
953 (void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
954 pkt->pkt_svc_class = m_get_service_class(m);
955 pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
956 pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
957 if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
958 pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
959 }
960 if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_LPW) != 0) {
961 pkt->pkt_pflags |= __PKT_F_LPW;
962 }
963 if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) != 0) {
964 pkt->pkt_pflags |= PKT_F_L4S;
965 }
966 necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
967 pkt->pkt_policy_id =
968 (uint32_t)necp_get_policy_id_from_packet(m);
969 pkt->pkt_skip_policy_id =
970 (uint32_t)necp_get_skip_policy_id_from_packet(m);
971
972 if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
973 if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
974 __packet_set_tx_completion_data(ph,
975 m->m_pkthdr.drv_tx_compl_arg,
976 m->m_pkthdr.drv_tx_compl_data);
977 }
978 pkt->pkt_tx_compl_context =
979 m->m_pkthdr.pkt_compl_context;
980 pkt->pkt_tx_compl_callbacks =
981 m->m_pkthdr.pkt_compl_callbacks;
982 /*
983 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
984 * mbuf can no longer trigger a completion callback.
985 * callback will be invoked when the kernel packet is
986 * completed.
987 */
988 m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
989
990 m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
991 }
992
993 ts_tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM);
994 if (ts_tag != NULL) {
995 __packet_set_tx_timestamp(ph, *(uint64_t *)(ts_tag->m_tag_data));
996 }
997
998 if (mbuf_get_vlan_tag(m, &vlan) == 0) {
999 __packet_set_vlan_tag(ph, vlan);
1000 }
1001
1002 #if COPY_LOG
1003 SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_TX, current_proc(),
1004 "TX len %u, copy+sum %u (csum 0x%04x), start %u",
1005 len, (copysum ? (len - start) : 0), csum, start);
1006 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1007 " mbuf %p csumf/txstart/txstuff 0x%x/%u/%u",
1008 SK_KVA(m), m->m_pkthdr.csum_flags,
1009 (uint32_t)m->m_pkthdr.csum_tx_start,
1010 (uint32_t)m->m_pkthdr.csum_tx_stuff);
1011 #endif
1012 break;
1013
1014 default:
1015 VERIFY(0);
1016 /* NOTREACHED */
1017 __builtin_unreachable();
1018 }
1019 METADATA_ADJUST_LEN(pkt, len, poff);
1020
1021 if (m->m_flags & M_BCAST) {
1022 __packet_set_link_broadcast(ph);
1023 } else if (m->m_flags & M_MCAST) {
1024 __packet_set_link_multicast(ph);
1025 }
1026
1027 #if COPY_LOG
1028 SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, current_proc(), "%s %s",
1029 (t == NR_RX) ? "RX" : "TX", sk_dump("buf", baddr, len, 128));
1030 #endif
1031 }
1032
1033 /*
1034 * Like m_copydata_sum(), but works on a destination kernel packet.
1035 */
1036 static inline uint32_t
m_copypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint32_t len,boolean_t do_cscum)1037 m_copypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
1038 uint32_t len, boolean_t do_cscum)
1039 {
1040 boolean_t needs_swap, started_on_odd = FALSE;
1041 int off0 = soff;
1042 uint32_t len0 = len;
1043 struct mbuf *m0 = m;
1044 uint32_t sum = 0, partial;
1045 unsigned count0, count, odd, mlen_copied;
1046 uint8_t *sbaddr = NULL, *dbaddr = NULL;
1047 uint16_t dbcnt = __packet_get_buflet_count(dph);
1048 uint32_t dlim, dlen0;
1049 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
1050 kern_buflet_t dbuf = NULL, dbufp = NULL;
1051
1052 while (soff > 0) {
1053 if (__improbable(m == NULL)) {
1054 panic("%s: invalid mbuf chain %p [off %d, len %d]",
1055 __func__, m0, off0, len0);
1056 /* NOTREACHED */
1057 __builtin_unreachable();
1058 }
1059 if (soff < m->m_len) {
1060 break;
1061 }
1062 soff -= m->m_len;
1063 m = m->m_next;
1064 }
1065
1066 if (__improbable(m == NULL)) {
1067 panic("%s: invalid mbuf chain %p [off %d, len %d]",
1068 __func__, m0, off0, len0);
1069 /* NOTREACHED */
1070 __builtin_unreachable();
1071 }
1072
1073 sbaddr = mtod(m, uint8_t *) + soff;
1074 count = m->m_len - soff;
1075 mlen_copied = 0;
1076
1077 while (len != 0) {
1078 ASSERT(sbaddr == NULL || dbaddr == NULL);
1079 if (sbaddr == NULL) {
1080 soff = 0;
1081 m = m->m_next;
1082 if (__improbable(m == NULL)) {
1083 panic("%s: invalid mbuf chain %p [off %d, "
1084 "len %d]", __func__, m0, off0, len0);
1085 /* NOTREACHED */
1086 __builtin_unreachable();
1087 }
1088 sbaddr = mtod(m, uint8_t *);
1089 count = m->m_len;
1090 mlen_copied = 0;
1091 }
1092
1093 if (__improbable(count == 0)) {
1094 sbaddr = NULL;
1095 continue;
1096 }
1097
1098 if (dbaddr == NULL) {
1099 if (dbufp != NULL) {
1100 __buflet_set_data_length(dbufp, dlen0);
1101 }
1102
1103 PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
1104 if (__improbable(dbuf == NULL)) {
1105 panic("%s: mbuf too large %p [off %d, "
1106 "len %d]", __func__, m0, off0, len0);
1107 /* NOTREACHED */
1108 __builtin_unreachable();
1109 }
1110 dbufp = dbuf;
1111 dlim = __buflet_get_data_limit(dbuf) - doff;
1112 dbaddr = (uint8_t *)__buflet_get_data_address(dbuf) + doff;
1113 dlen0 = dlim;
1114 doff = 0;
1115 }
1116
1117 count = MIN(count, (unsigned)len);
1118 count0 = count = MIN(count, dlim);
1119
1120 if (!do_cscum) {
1121 _pkt_copy(sbaddr, dbaddr, count);
1122 sbaddr += count;
1123 dbaddr += count;
1124 goto skip_csum;
1125 }
1126
1127 partial = 0;
1128 if ((uintptr_t)sbaddr & 1) {
1129 /* Align on word boundary */
1130 started_on_odd = !started_on_odd;
1131 #if BYTE_ORDER == LITTLE_ENDIAN
1132 partial = *sbaddr << 8;
1133 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1134 partial = *sbaddr;
1135 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1136 *dbaddr++ = *sbaddr++;
1137 count -= 1;
1138 }
1139
1140 needs_swap = started_on_odd;
1141 odd = count & 1u;
1142 count -= odd;
1143
1144 if (count) {
1145 partial = __packet_copy_and_sum(sbaddr,
1146 dbaddr, count, partial);
1147 sbaddr += count;
1148 dbaddr += count;
1149 if (__improbable(partial & 0xc0000000)) {
1150 if (needs_swap) {
1151 partial = (partial << 8) +
1152 (partial >> 24);
1153 }
1154 sum += (partial >> 16);
1155 sum += (partial & 0xffff);
1156 partial = 0;
1157 }
1158 }
1159
1160 if (odd) {
1161 #if BYTE_ORDER == LITTLE_ENDIAN
1162 partial += *sbaddr;
1163 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1164 partial += *sbaddr << 8;
1165 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1166 *dbaddr++ = *sbaddr++;
1167 started_on_odd = !started_on_odd;
1168 }
1169
1170 if (needs_swap) {
1171 partial = (partial << 8) + (partial >> 24);
1172 }
1173 sum += (partial >> 16) + (partial & 0xffff);
1174 /*
1175 * Reduce sum to allow potential byte swap
1176 * in the next iteration without carry.
1177 */
1178 sum = (sum >> 16) + (sum & 0xffff);
1179
1180 skip_csum:
1181 dlim -= count0;
1182 len -= count0;
1183 mlen_copied += count0;
1184
1185 if (dlim == 0) {
1186 dbaddr = NULL;
1187 }
1188
1189 count = m->m_len - soff - mlen_copied;
1190 if (count == 0) {
1191 sbaddr = NULL;
1192 }
1193 }
1194
1195 ASSERT(len == 0);
1196 ASSERT(dbuf != NULL);
1197 __buflet_set_data_length(dbuf, (dlen0 - dlim));
1198
1199 if (!do_cscum) {
1200 return 0;
1201 }
1202
1203 /* Final fold (reduce 32-bit to 16-bit) */
1204 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
1205 sum = (sum >> 16) + (sum & 0xffff);
1206 return sum;
1207 }
1208
1209 /*
1210 * This is a multi-buflet variant of pkt_copy_from_mbuf().
1211 *
1212 * start/stuff is relative to moff, within [0, len], such that
1213 * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
1214 */
1215 void
pkt_copy_multi_buflet_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1216 pkt_copy_multi_buflet_from_mbuf(const enum txrx t, kern_packet_t ph,
1217 const uint16_t poff, struct mbuf *m, const uint16_t moff,
1218 const uint32_t len, const boolean_t copysum, const uint16_t start)
1219 {
1220 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1221 struct m_tag *ts_tag = NULL;
1222 uint32_t partial;
1223 uint16_t csum = 0;
1224 uint16_t vlan = 0;
1225 uint8_t *baddr;
1226
1227 static_assert(sizeof(csum) == sizeof(uint16_t));
1228
1229 /* get buffer address from packet */
1230 MD_BUFLET_ADDR_ABS(pkt, baddr);
1231 ASSERT(baddr != NULL);
1232 baddr += poff;
1233 VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp) *
1234 __packet_get_buflet_count(ph)));
1235
1236 switch (t) {
1237 case NR_RX:
1238 pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
1239 pkt->pkt_csum_rx_start_off = 0;
1240 pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
1241 pkt->pkt_svc_class = m_get_service_class(m);
1242 if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
1243 != CSUM_RX_FULL_FLAGS) && copysum)) {
1244 /*
1245 * Use m_copydata() to copy the portion up to the
1246 * point where we need to start the checksum, and
1247 * copy the remainder, checksumming as we go.
1248 */
1249 if (start != 0) {
1250 m_copydata(m, moff, start, baddr);
1251 }
1252 partial = m_copypkt_sum(m, start, ph, (poff + start),
1253 (len - start), TRUE);
1254 csum = __packet_fold_sum(partial);
1255 __packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
1256 start, csum, FALSE);
1257 METADATA_ADJUST_LEN(pkt, start, poff);
1258 } else {
1259 (void) m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1260 }
1261
1262 if (mbuf_get_vlan_tag(m, &vlan) == 0) {
1263 __packet_set_vlan_tag(ph, vlan);
1264 }
1265
1266 #if COPY_LOG
1267 SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_RX, current_proc(),
1268 "RX len %u, copy+sum %u (csum 0x%04x), start %u",
1269 len, (copysum ? (len - start) : 0), csum, start);
1270 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1271 " mbuf %p csumf/rxstart/rxval 0x%x/%u/0x%04x",
1272 SK_KVA(m), m->m_pkthdr.csum_flags,
1273 (uint32_t)m->m_pkthdr.csum_rx_start,
1274 (uint32_t)m->m_pkthdr.csum_rx_val);
1275 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1276 " pkt %p poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1277 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1278 (uint32_t)pkt->pkt_csum_rx_start_off,
1279 (uint32_t)pkt->pkt_csum_rx_value);
1280 #endif
1281 break;
1282
1283 case NR_TX:
1284 if (copysum) {
1285 uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
1286 /*
1287 * Use m_copydata() to copy the portion up to the
1288 * point where we need to start the checksum, and
1289 * copy the remainder, checksumming as we go.
1290 */
1291 if (start != 0) {
1292 m_copydata(m, moff, start, baddr);
1293 }
1294 partial = m_copypkt_sum(m, start, ph, (poff + start),
1295 (len - start), TRUE);
1296 csum = __packet_fold_sum_final(partial);
1297
1298 /*
1299 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
1300 * ideally we'd only test for CSUM_ZERO_INVERT
1301 * here, but catch cases where the originator
1302 * did not set it for UDP.
1303 */
1304 if (csum == 0 && (m->m_pkthdr.csum_flags &
1305 (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
1306 csum = 0xffff;
1307 }
1308
1309 /* Insert checksum into packet */
1310 ASSERT(stuff <= (len - sizeof(csum)));
1311 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
1312 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
1313 } else {
1314 bcopy((void *)&csum, baddr + stuff,
1315 sizeof(csum));
1316 }
1317 METADATA_ADJUST_LEN(pkt, start, poff);
1318 } else {
1319 m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1320 }
1321 pkt->pkt_csum_flags = 0;
1322 pkt->pkt_csum_tx_start_off = 0;
1323 pkt->pkt_csum_tx_stuff_off = 0;
1324
1325 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
1326 pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV4;
1327 pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
1328 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
1329 }
1330 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
1331 pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV6;
1332 pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
1333 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
1334 }
1335 if (!copysum) {
1336 pkt->pkt_csum_flags |= _convert_mbuf_csum_flags(m->m_pkthdr.csum_flags);
1337 }
1338
1339 /* translate mbuf metadata */
1340 pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
1341 pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
1342 pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
1343 pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
1344 switch (m->m_pkthdr.pkt_proto) {
1345 case IPPROTO_QUIC:
1346 pkt->pkt_flow_ip_proto = IPPROTO_UDP;
1347 pkt->pkt_transport_protocol = IPPROTO_QUIC;
1348 break;
1349
1350 default:
1351 pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
1352 pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
1353 break;
1354 }
1355 (void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
1356 pkt->pkt_svc_class = m_get_service_class(m);
1357 pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
1358 pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
1359 if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
1360 pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
1361 }
1362 if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_LPW) != 0) {
1363 pkt->pkt_pflags |= __PKT_F_LPW;
1364 }
1365 if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) != 0) {
1366 pkt->pkt_pflags |= PKT_F_L4S;
1367 }
1368 necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
1369 pkt->pkt_policy_id =
1370 (uint32_t)necp_get_policy_id_from_packet(m);
1371 pkt->pkt_skip_policy_id =
1372 (uint32_t)necp_get_skip_policy_id_from_packet(m);
1373
1374 if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
1375 if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
1376 __packet_set_tx_completion_data(ph,
1377 m->m_pkthdr.drv_tx_compl_arg,
1378 m->m_pkthdr.drv_tx_compl_data);
1379 }
1380 pkt->pkt_tx_compl_context =
1381 m->m_pkthdr.pkt_compl_context;
1382 pkt->pkt_tx_compl_callbacks =
1383 m->m_pkthdr.pkt_compl_callbacks;
1384 /*
1385 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
1386 * mbuf can no longer trigger a completion callback.
1387 * callback will be invoked when the kernel packet is
1388 * completed.
1389 */
1390 m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
1391
1392 m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
1393 }
1394
1395 ts_tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM);
1396 if (ts_tag != NULL) {
1397 __packet_set_tx_timestamp(ph, *(uint64_t *)(ts_tag->m_tag_data));
1398 }
1399
1400 if (mbuf_get_vlan_tag(m, &vlan) == 0) {
1401 __packet_set_vlan_tag(ph, vlan);
1402 }
1403
1404 #if COPY_LOG
1405 SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_TX, current_proc(),
1406 "TX len %u, copy+sum %u (csum 0x%04x), start %u",
1407 len, (copysum ? (len - start) : 0), csum, start);
1408 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1409 " mbuf %p csumf/txstart/txstuff 0x%x/%u/%u",
1410 SK_KVA(m), m->m_pkthdr.csum_flags,
1411 (uint32_t)m->m_pkthdr.csum_tx_start,
1412 (uint32_t)m->m_pkthdr.csum_tx_stuff);
1413 #endif
1414 break;
1415
1416 default:
1417 VERIFY(0);
1418 /* NOTREACHED */
1419 __builtin_unreachable();
1420 }
1421
1422 if (m->m_flags & M_BCAST) {
1423 __packet_set_link_broadcast(ph);
1424 } else if (m->m_flags & M_MCAST) {
1425 __packet_set_link_multicast(ph);
1426 }
1427
1428 #if COPY_LOG
1429 SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, current_proc(), "%s %s",
1430 (t == NR_RX) ? "RX" : "TX", sk_dump("buf", baddr, len, 128));
1431 #endif
1432 }
1433
1434 static inline uint32_t
_convert_pkt_csum_flags(uint32_t pkt_flags)1435 _convert_pkt_csum_flags(uint32_t pkt_flags)
1436 {
1437 uint32_t mbuf_flags = 0;
1438 if (pkt_flags & PACKET_CSUM_TCP) {
1439 mbuf_flags |= CSUM_TCP;
1440 }
1441 if (pkt_flags & PACKET_CSUM_TCPIPV6) {
1442 mbuf_flags |= CSUM_TCPIPV6;
1443 }
1444 if (pkt_flags & PACKET_CSUM_UDP) {
1445 mbuf_flags |= CSUM_UDP;
1446 }
1447 if (pkt_flags & PACKET_CSUM_UDPIPV6) {
1448 mbuf_flags |= CSUM_UDPIPV6;
1449 }
1450 if (pkt_flags & PACKET_CSUM_IP) {
1451 mbuf_flags |= CSUM_IP;
1452 }
1453 if (pkt_flags & PACKET_CSUM_ZERO_INVERT) {
1454 mbuf_flags |= CSUM_ZERO_INVERT;
1455 }
1456 if (pkt_flags & PACKET_CSUM_TSO_IPV4) {
1457 mbuf_flags |= CSUM_TSO_IPV4;
1458 }
1459 if (pkt_flags & PACKET_CSUM_TSO_IPV6) {
1460 mbuf_flags |= CSUM_TSO_IPV6;
1461 }
1462
1463 return mbuf_flags;
1464 }
1465
1466 /*
1467 * This routine is used for copying from a packet originating from a native
1468 * skywalk interface to an mbuf destined for the host legacy stack (NR_RX),
1469 * as well as for mbufs destined for the compat network interfaces (NR_TX).
1470 *
1471 * We do adjust the length to reflect the total data span.
1472 *
1473 * This routine supports copying into an mbuf chain for RX but not TX.
1474 *
1475 * start/stuff is relative to poff, within [0, len], such that
1476 * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1477 */
1478 void
pkt_copy_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1479 pkt_copy_to_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
1480 struct mbuf *m, const uint16_t moff, const uint32_t len,
1481 const boolean_t copysum, const uint16_t start)
1482 {
1483 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1484 struct mbuf *curr_m;
1485 uint32_t partial = 0;
1486 uint32_t remaining_len = len, copied_len = 0;
1487 uint16_t csum = 0;
1488 uint16_t vlan = 0;
1489 uint8_t *baddr;
1490 uint8_t *dp;
1491 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1492
1493 ASSERT(len >= start);
1494 static_assert(sizeof(csum) == sizeof(uint16_t));
1495
1496 /* get buffer address from packet */
1497 MD_BUFLET_ADDR_ABS(pkt, baddr);
1498 ASSERT(baddr != NULL);
1499 baddr += poff;
1500 VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
1501
1502 ASSERT((m->m_flags & M_PKTHDR));
1503 m->m_data += moff;
1504
1505 switch (t) {
1506 case NR_RX:
1507 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1508
1509 /*
1510 * Use pkt_copy() to copy the portion up to the
1511 * point where we need to start the checksum, and
1512 * copy the remainder, checksumming as we go.
1513 */
1514 if (__probable(do_sum && start != 0)) {
1515 ASSERT(M_TRAILINGSPACE(m) >= start);
1516 ASSERT(m->m_len == 0);
1517 dp = (uint8_t *)m_mtod_current(m);
1518 _pkt_copy(baddr, dp, start);
1519 remaining_len -= start;
1520 copied_len += start;
1521 m->m_len += start;
1522 m->m_pkthdr.len += start;
1523 }
1524 curr_m = m;
1525 while (curr_m != NULL && remaining_len != 0) {
1526 uint32_t tmp_len = MIN(remaining_len,
1527 (uint32_t)M_TRAILINGSPACE(curr_m));
1528 dp = (uint8_t *)m_mtod_end(curr_m);
1529 if (__probable(do_sum)) {
1530 partial = __packet_copy_and_sum((baddr + copied_len),
1531 dp, tmp_len, partial);
1532 } else {
1533 _pkt_copy((baddr + copied_len), dp, tmp_len);
1534 }
1535
1536 curr_m->m_len += tmp_len;
1537 m->m_pkthdr.len += tmp_len;
1538 copied_len += tmp_len;
1539 remaining_len -= tmp_len;
1540 curr_m = curr_m->m_next;
1541 }
1542 ASSERT(remaining_len == 0);
1543
1544 if (__probable(do_sum)) {
1545 csum = __packet_fold_sum(partial);
1546
1547 m->m_pkthdr.csum_flags |=
1548 (CSUM_DATA_VALID | CSUM_PARTIAL);
1549 m->m_pkthdr.csum_rx_start = start;
1550 m->m_pkthdr.csum_rx_val = csum;
1551 } else {
1552 m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1553 m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1554 static_assert(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1555 m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1556 if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1557 m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1558 }
1559 }
1560
1561 /* translate packet metadata */
1562 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1563 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1564
1565 m->m_pkthdr.rx_seg_cnt = pkt->pkt_seg_cnt;
1566
1567 if (__packet_get_vlan_tag(ph, &vlan) == 0) {
1568 mbuf_set_vlan_tag(m, vlan);
1569 }
1570
1571 #if COPY_LOG
1572 SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_RX, current_proc(),
1573 "RX len %u, copy+sum %u (csum 0x%04x), start %u",
1574 len, (copysum ? (len - start) : 0), csum, start);
1575 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1576 " mbuf %p moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1577 SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1578 (uint32_t)m->m_pkthdr.csum_rx_start,
1579 (uint32_t)m->m_pkthdr.csum_rx_val);
1580 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1581 " pkt %p poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1582 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1583 (uint32_t)pkt->pkt_csum_rx_start_off,
1584 (uint32_t)pkt->pkt_csum_rx_value);
1585 #endif
1586 break;
1587
1588 case NR_TX:
1589 dp = (uint8_t *)m_mtod_current(m);
1590 ASSERT(m->m_next == NULL);
1591
1592 VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1593 (uint32_t)mbuf_maxlen(m));
1594 m->m_len += len;
1595 m->m_pkthdr.len += len;
1596 VERIFY(m->m_len == m->m_pkthdr.len &&
1597 (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1598
1599 if (copysum) {
1600 uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1601 /*
1602 * Use pkt_copy() to copy the portion up to the
1603 * point where we need to start the checksum, and
1604 * copy the remainder, checksumming as we go.
1605 */
1606 if (__probable(start != 0)) {
1607 _pkt_copy(baddr, dp, start);
1608 }
1609 partial = __packet_copy_and_sum((baddr + start),
1610 (dp + start), (len - start), 0);
1611 csum = __packet_fold_sum_final(partial);
1612
1613 /* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1614 if (csum == 0 &&
1615 (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1616 csum = 0xffff;
1617 }
1618
1619 /* Insert checksum into packet */
1620 ASSERT(stuff <= (len - sizeof(csum)));
1621 if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1622 *(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1623 } else {
1624 bcopy((void *)&csum, dp + stuff, sizeof(csum));
1625 }
1626 } else {
1627 _pkt_copy(baddr, dp, len);
1628 }
1629 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1630 m->m_pkthdr.csum_tx_start = 0;
1631 m->m_pkthdr.csum_tx_stuff = 0;
1632 m->m_pkthdr.csum_flags |= _convert_pkt_csum_flags(pkt->pkt_csum_flags);
1633
1634 /* translate packet metadata */
1635 m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1636 m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1637 m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1638 m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1639 m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1640 m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1641 m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1642 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1643 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1644 m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1645 m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1646 if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1647 m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1648 }
1649 if ((pkt->pkt_pflags & __PKT_F_LPW) != 0) {
1650 m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_LPW;
1651 }
1652 if ((pkt->pkt_pflags & PKT_F_L4S) != 0) {
1653 m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_L4S;
1654 }
1655 if ((pkt->pkt_pflags & PKT_F_OPT_TX_TIMESTAMP) != 0) {
1656 struct m_tag *tag = NULL;
1657 tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM,
1658 sizeof(uint64_t), M_WAITOK, m);
1659 if (tag != NULL) {
1660 m_tag_prepend(m, tag);
1661 *(uint64_t *)tag->m_tag_data = pkt->pkt_com_opt->__po_pkt_tx_time;
1662 }
1663 }
1664 m->m_pkthdr.necp_mtag.necp_policy_id = pkt->pkt_policy_id;
1665 m->m_pkthdr.necp_mtag.necp_skip_policy_id = pkt->pkt_skip_policy_id;
1666
1667 if (__packet_get_vlan_tag(ph, &vlan) == 0) {
1668 mbuf_set_vlan_tag(m, vlan);
1669 }
1670
1671 #if COPY_LOG
1672 SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_TX, current_proc(),
1673 "TX len %u, copy+sum %u (csum 0x%04x), start %u",
1674 len, (copysum ? (len - start) : 0), csum, start);
1675 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1676 " pkt %p poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1677 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1678 (uint32_t)pkt->pkt_csum_tx_start_off,
1679 (uint32_t)pkt->pkt_csum_tx_stuff_off);
1680 #endif
1681 break;
1682
1683 default:
1684 VERIFY(0);
1685 /* NOTREACHED */
1686 __builtin_unreachable();
1687 }
1688
1689 if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1690 m->m_flags |= M_BCAST;
1691 } else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1692 m->m_flags |= M_MCAST;
1693 }
1694 #if COPY_LOG
1695 SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, current_proc(), "%s %s",
1696 (t == NR_RX) ? "RX" : "TX",
1697 sk_dump("buf", (uint8_t *)dp, m->m_len, 128));
1698 #endif
1699 }
1700
1701 /*
1702 * This is a multi-buflet variant of pkt_copy_to_mbuf().
1703 * NOTE: poff is the offset within the packet.
1704 *
1705 * This routine supports copying into an mbuf chain for RX but not TX.
1706 *
1707 * start/stuff is relative to poff, within [0, len], such that
1708 * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1709 */
1710 void
pkt_copy_multi_buflet_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1711 pkt_copy_multi_buflet_to_mbuf(const enum txrx t, kern_packet_t ph,
1712 const uint16_t poff, struct mbuf *m, const uint16_t moff,
1713 const uint32_t len, const boolean_t copysum, const uint16_t start)
1714 {
1715 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1716 struct mbuf *curr_m;
1717 uint32_t partial = 0;
1718 uint32_t remaining_len = len, copied_len = 0;
1719 uint16_t csum = 0;
1720 uint16_t vlan = 0;
1721 uint8_t *baddr;
1722 uint8_t *dp;
1723 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1724
1725 ASSERT(len >= start);
1726 static_assert(sizeof(csum) == sizeof(uint16_t));
1727
1728 /* get buffer address from packet */
1729 MD_BUFLET_ADDR_ABS(pkt, baddr);
1730 ASSERT(baddr != NULL);
1731 baddr += poff;
1732
1733 ASSERT((m->m_flags & M_PKTHDR));
1734 m->m_data += moff;
1735
1736 switch (t) {
1737 case NR_RX:
1738 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1739 if (__probable(do_sum && start != 0)) {
1740 ASSERT(M_TRAILINGSPACE(m) >= start);
1741 ASSERT(m->m_len == 0);
1742 dp = (uint8_t *)m_mtod_current(m);
1743 _pkt_copy(baddr, dp, start);
1744 remaining_len -= start;
1745 copied_len += start;
1746 m->m_len += start;
1747 m->m_pkthdr.len += start;
1748 }
1749 curr_m = m;
1750 while (curr_m != NULL && remaining_len != 0) {
1751 uint32_t tmp_len = MIN(remaining_len,
1752 (uint32_t)M_TRAILINGSPACE(curr_m));
1753 uint16_t soff = poff + (uint16_t)copied_len;
1754 dp = (uint8_t *)m_mtod_end(curr_m);
1755
1756 if (__probable(do_sum)) {
1757 partial = _pkt_copyaddr_sum(ph, soff,
1758 dp, tmp_len, TRUE, partial, NULL);
1759 } else {
1760 pkt_copyaddr_sum(ph, soff,
1761 dp, tmp_len, FALSE, 0, NULL);
1762 }
1763
1764 curr_m->m_len += tmp_len;
1765 m->m_pkthdr.len += tmp_len;
1766 copied_len += tmp_len;
1767 remaining_len -= tmp_len;
1768 curr_m = curr_m->m_next;
1769 }
1770 ASSERT(remaining_len == 0);
1771
1772 if (__probable(do_sum)) {
1773 csum = __packet_fold_sum(partial);
1774
1775 m->m_pkthdr.csum_flags |=
1776 (CSUM_DATA_VALID | CSUM_PARTIAL);
1777 m->m_pkthdr.csum_rx_start = start;
1778 m->m_pkthdr.csum_rx_val = csum;
1779 } else {
1780 m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1781 m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1782 static_assert(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1783 m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1784 if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1785 m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1786 }
1787 }
1788
1789 m->m_pkthdr.necp_mtag.necp_policy_id = pkt->pkt_policy_id;
1790 m->m_pkthdr.necp_mtag.necp_skip_policy_id = pkt->pkt_skip_policy_id;
1791
1792 /* translate packet metadata */
1793 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1794 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1795
1796 m->m_pkthdr.rx_seg_cnt = pkt->pkt_seg_cnt;
1797
1798 if (__packet_get_vlan_tag(ph, &vlan) == 0) {
1799 mbuf_set_vlan_tag(m, vlan);
1800 }
1801
1802 #if COPY_LOG
1803 SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_RX, current_proc(),
1804 "RX len %u, copy+sum %u (csum 0x%04x), start %u",
1805 len, (copysum ? (len - start) : 0), csum, start);
1806 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1807 " mbuf %p moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1808 SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1809 (uint32_t)m->m_pkthdr.csum_rx_start,
1810 (uint32_t)m->m_pkthdr.csum_rx_val);
1811 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1812 " pkt %p poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1813 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1814 (uint32_t)pkt->pkt_csum_rx_start_off,
1815 (uint32_t)pkt->pkt_csum_rx_value);
1816 #endif
1817 break;
1818 case NR_TX:
1819 ASSERT(len <= M16KCLBYTES);
1820 dp = (uint8_t *)m_mtod_current(m);
1821 ASSERT(m->m_next == NULL);
1822 VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1823 (uint32_t)mbuf_maxlen(m));
1824 m->m_len += len;
1825 m->m_pkthdr.len += len;
1826 VERIFY(m->m_len == m->m_pkthdr.len &&
1827 (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1828 if (copysum) {
1829 uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1830 /*
1831 * Use pkt_copy() to copy the portion up to the
1832 * point where we need to start the checksum, and
1833 * copy the remainder, checksumming as we go.
1834 */
1835 if (__probable(start != 0)) {
1836 _pkt_copy(baddr, dp, start);
1837 }
1838 partial = _pkt_copyaddr_sum(ph, (poff + start),
1839 (dp + start), (len - start), TRUE, 0, NULL);
1840 csum = __packet_fold_sum_final(partial);
1841
1842 /* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1843 if (csum == 0 &&
1844 (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1845 csum = 0xffff;
1846 }
1847
1848 /* Insert checksum into packet */
1849 ASSERT(stuff <= (len - sizeof(csum)));
1850 if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1851 *(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1852 } else {
1853 bcopy((void *)&csum, dp + stuff, sizeof(csum));
1854 }
1855 } else {
1856 (void) _pkt_copyaddr_sum(ph, poff, dp, len, FALSE, 0, NULL);
1857 }
1858 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1859 m->m_pkthdr.csum_tx_start = 0;
1860 m->m_pkthdr.csum_tx_stuff = 0;
1861 m->m_pkthdr.csum_flags |= _convert_pkt_csum_flags(pkt->pkt_csum_flags);
1862
1863 /* translate packet metadata */
1864 m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1865 m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1866 m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1867 m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1868 m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1869 m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1870 m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1871 mbuf_set_timestamp(m, pkt->pkt_timestamp,
1872 ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1873 m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1874 m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1875 if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1876 m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1877 }
1878 if ((pkt->pkt_pflags & __PKT_F_LPW) != 0) {
1879 m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_LPW;
1880 }
1881 if ((pkt->pkt_pflags & PKT_F_L4S) != 0) {
1882 m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_L4S;
1883 }
1884 if ((pkt->pkt_pflags & PKT_F_OPT_TX_TIMESTAMP) != 0) {
1885 struct m_tag *tag = NULL;
1886 tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM,
1887 sizeof(uint64_t), M_WAITOK, m);
1888 if (tag != NULL) {
1889 m_tag_prepend(m, tag);
1890 *(uint64_t *)tag->m_tag_data = pkt->pkt_com_opt->__po_pkt_tx_time;
1891 }
1892 }
1893
1894 if (__packet_get_vlan_tag(ph, &vlan) == 0) {
1895 mbuf_set_vlan_tag(m, vlan);
1896 }
1897
1898 #if COPY_LOG
1899 SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_TX, current_proc(),
1900 "TX len %u, copy+sum %u (csum 0x%04x), start %u",
1901 len, (copysum ? (len - start) : 0), csum, start);
1902 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1903 " pkt %p poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1904 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1905 (uint32_t)pkt->pkt_csum_tx_start_off,
1906 (uint32_t)pkt->pkt_csum_tx_stuff_off);
1907 #endif
1908 break;
1909
1910 default:
1911 VERIFY(0);
1912 /* NOTREACHED */
1913 __builtin_unreachable();
1914 }
1915
1916 if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1917 m->m_flags |= M_BCAST;
1918 } else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1919 m->m_flags |= M_MCAST;
1920 }
1921 #if COPY_LOG
1922 SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, current_proc(), "%s %s",
1923 (t == NR_RX) ? "RX" : "TX",
1924 sk_dump("buf", (uint8_t *)dp, m->m_len, 128));
1925 #endif
1926 }
1927
1928 /*
1929 * Like m_copydata(), but computes 16-bit sum as the data is copied.
1930 * Caller can provide an initial sum to be folded into the computed
1931 * sum. The accumulated partial sum (32-bit) is returned to caller;
1932 * caller is responsible for further reducing it to 16-bit if needed,
1933 * as well as to perform the final 1's complement on it.
1934 */
1935 uint32_t
m_copydata_sum(struct mbuf * m,int off,int len,void * __sized_by (len)vp,uint32_t initial_sum,boolean_t * odd_start)1936 m_copydata_sum(struct mbuf *m, int off, int len, void *__sized_by(len)vp, uint32_t initial_sum,
1937 boolean_t *odd_start)
1938 {
1939 boolean_t needs_swap, started_on_odd = FALSE;
1940 int off0 = off, len0 = len;
1941 struct mbuf *m0 = m;
1942 uint64_t sum, partial;
1943 unsigned count, odd;
1944 char *cp = vp;
1945
1946 if (__improbable(off < 0 || len < 0)) {
1947 panic("%s: invalid offset %d or len %d", __func__, off, len);
1948 /* NOTREACHED */
1949 __builtin_unreachable();
1950 }
1951
1952 while (off > 0) {
1953 if (__improbable(m == NULL)) {
1954 panic("%s: invalid mbuf chain %p [off %d, len %d]",
1955 __func__, m0, off0, len0);
1956 /* NOTREACHED */
1957 __builtin_unreachable();
1958 }
1959 if (off < m->m_len) {
1960 break;
1961 }
1962 off -= m->m_len;
1963 m = m->m_next;
1964 }
1965
1966 if (odd_start) {
1967 started_on_odd = *odd_start;
1968 }
1969 sum = initial_sum;
1970
1971 for (; len0 > 0; m = m->m_next) {
1972 uint8_t *datap;
1973
1974 if (__improbable(m == NULL)) {
1975 panic("%s: invalid mbuf chain %p [off %d, len %d]",
1976 __func__, m0, off0, len);
1977 /* NOTREACHED */
1978 __builtin_unreachable();
1979 }
1980
1981 datap = mtod(m, uint8_t *) + off;
1982 count = m->m_len;
1983
1984 if (__improbable(count == 0)) {
1985 continue;
1986 }
1987
1988 count = MIN(count - off, (unsigned)len0);
1989 partial = 0;
1990
1991 if ((uintptr_t)datap & 1) {
1992 /* Align on word boundary */
1993 started_on_odd = !started_on_odd;
1994 #if BYTE_ORDER == LITTLE_ENDIAN
1995 partial = *datap << 8;
1996 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1997 partial = *datap;
1998 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1999 *cp++ = *datap++;
2000 count -= 1;
2001 len0 -= 1;
2002 }
2003
2004 needs_swap = started_on_odd;
2005 odd = count & 1u;
2006 count -= odd;
2007
2008 if (count) {
2009 partial = __packet_copy_and_sum(datap,
2010 cp, count, (uint32_t)partial);
2011 datap += count;
2012 cp += count;
2013 len0 -= count;
2014 if (__improbable((partial & (3ULL << 62)) != 0)) {
2015 if (needs_swap) {
2016 partial = (partial << 8) +
2017 (partial >> 56);
2018 }
2019 sum += (partial >> 32);
2020 sum += (partial & 0xffffffff);
2021 partial = 0;
2022 }
2023 }
2024
2025 if (odd) {
2026 #if BYTE_ORDER == LITTLE_ENDIAN
2027 partial += *datap;
2028 #else /* BYTE_ORDER != LITTLE_ENDIAN */
2029 partial += *datap << 8;
2030 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
2031 *cp++ = *datap++;
2032 len0 -= 1;
2033 started_on_odd = !started_on_odd;
2034 }
2035 off = 0;
2036
2037 if (needs_swap) {
2038 partial = (partial << 8) + (partial >> 24);
2039 }
2040 sum += (partial >> 32) + (partial & 0xffffffff);
2041 /*
2042 * Reduce sum to allow potential byte swap
2043 * in the next iteration without carry.
2044 */
2045 sum = (sum >> 32) + (sum & 0xffffffff);
2046 }
2047
2048 if (odd_start) {
2049 *odd_start = started_on_odd;
2050 }
2051
2052 /* Final fold (reduce 64-bit to 32-bit) */
2053 sum = (sum >> 32) + (sum & 0xffffffff); /* 33-bit */
2054 sum = (sum >> 16) + (sum & 0xffff); /* 17-bit + carry */
2055
2056 /* return 32-bit partial sum to caller */
2057 return (uint32_t)sum;
2058 }
2059
2060 #if DEBUG || DEVELOPMENT
2061 #define TRAILERS_MAX 16 /* max trailing bytes */
2062 #define TRAILERS_REGEN (64 * 1024) /* regeneration threshold */
2063 static uint8_t tb[TRAILERS_MAX]; /* random trailing bytes */
2064 static uint32_t regen = TRAILERS_REGEN; /* regeneration counter */
2065
2066 uint32_t
pkt_add_trailers(kern_packet_t ph,const uint32_t len,const uint16_t start)2067 pkt_add_trailers(kern_packet_t ph, const uint32_t len, const uint16_t start)
2068 {
2069 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
2070 uint32_t extra;
2071 uint8_t *baddr;
2072
2073 /* get buffer address from packet */
2074 MD_BUFLET_ADDR_ABS(pkt, baddr);
2075 ASSERT(baddr != NULL);
2076 ASSERT(len <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
2077
2078 extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
2079 if (extra == 0 || extra > sizeof(tb) ||
2080 (len + extra) > PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp)) {
2081 return 0;
2082 }
2083
2084 /* generate random bytes once per TRAILERS_REGEN packets (approx.) */
2085 if (regen++ == TRAILERS_REGEN) {
2086 read_frandom(&tb[0], sizeof(tb));
2087 regen = 0;
2088 }
2089
2090 bcopy(&tb[0], (baddr + len), extra);
2091
2092 /* recompute partial sum (also to exercise related logic) */
2093 pkt->pkt_csum_flags |= PACKET_CSUM_PARTIAL;
2094 pkt->pkt_csum_rx_value = (uint16_t)__packet_cksum((baddr + start),
2095 ((len + extra) - start), 0);
2096 pkt->pkt_csum_rx_start_off = start;
2097
2098 return extra;
2099 }
2100
2101 uint32_t
pkt_add_trailers_mbuf(struct mbuf * m,const uint16_t start)2102 pkt_add_trailers_mbuf(struct mbuf *m, const uint16_t start)
2103 {
2104 uint32_t extra;
2105
2106 extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
2107 if (extra == 0 || extra > sizeof(tb)) {
2108 return 0;
2109 }
2110
2111 if (mbuf_copyback(m, m_pktlen(m), extra, &tb[0], M_NOWAIT) != 0) {
2112 return 0;
2113 }
2114
2115 /* generate random bytes once per TRAILERS_REGEN packets (approx.) */
2116 if (regen++ == TRAILERS_REGEN) {
2117 read_frandom(&tb[0], sizeof(tb));
2118 regen = 0;
2119 }
2120
2121 /* recompute partial sum (also to exercise related logic) */
2122 m->m_pkthdr.csum_rx_val = m_sum16(m, start, (m_pktlen(m) - start));
2123 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
2124 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
2125 m->m_pkthdr.csum_rx_start = start;
2126
2127 return extra;
2128 }
2129 #endif /* DEBUG || DEVELOPMENT */
2130
2131 void
pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint16_t len,uint32_t * partial,boolean_t do_csum)2132 pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
2133 uint16_t doff, uint16_t len, uint32_t *partial, boolean_t do_csum)
2134 {
2135 VERIFY(_pkt_copypkt_sum(sph, soff, dph, doff, len, partial, do_csum));
2136 }
2137
2138 uint32_t
pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * __sized_by (len)dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)2139 pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *__sized_by(len)dbaddr,
2140 uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
2141 {
2142 return _pkt_copyaddr_sum(sph, soff, dbaddr, len, do_csum, initial_sum, odd_start);
2143 }
2144
2145 uint32_t
pkt_mcopypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint16_t len,boolean_t do_cscum)2146 pkt_mcopypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
2147 uint16_t len, boolean_t do_cscum)
2148 {
2149 return m_copypkt_sum(m, soff, dph, doff, len, do_cscum);
2150 }
2151
2152 void
pkt_copy(void * __sized_by (len)src,void * __sized_by (len)dst,size_t len)2153 pkt_copy(void *__sized_by(len)src, void *__sized_by(len)dst, size_t len)
2154 {
2155 return _pkt_copy(src, dst, len);
2156 }
2157