xref: /xnu-11215.41.3/bsd/skywalk/packet/packet_copy.c (revision 33de042d024d46de5ff4e89f2471de6608e37fa4)
1 /*
2  * Copyright (c) 2017-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 #include <machine/endian.h>
31 #include <net/necp.h>
32 
33 uint32_t copy_pkt_tx_time = 1;
34 #if (DEVELOPMENT || DEBUG)
35 SYSCTL_NODE(_kern_skywalk, OID_AUTO, packet,
36     CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Skywalk packet");
37 int pkt_trailers = 0; /* for testing trailing bytes */
38 SYSCTL_INT(_kern_skywalk_packet, OID_AUTO, trailers,
39     CTLFLAG_RW | CTLFLAG_LOCKED, &pkt_trailers, 0, "");
40 
41 SYSCTL_UINT(_kern_skywalk_packet, OID_AUTO, copy_pkt_tx_time,
42     CTLFLAG_RW | CTLFLAG_LOCKED, &copy_pkt_tx_time, 0,
43     "copy tx time from pkt to mbuf");
44 #endif /* !DEVELOPMENT && !DEBUG */
45 
46 
47 __attribute__((always_inline))
48 static inline void
_pkt_copy(void * __sized_by (len)src,void * __sized_by (len)dst,size_t len)49 _pkt_copy(void *__sized_by(len)src, void *__sized_by(len)dst, size_t len)
50 {
51 	if (__probable(IS_P2ALIGNED(src, 8) && IS_P2ALIGNED(dst, 8))) {
52 		switch (len) {
53 		case 20:        /* standard IPv4 header */
54 			sk_copy64_20(src, dst);
55 			return;
56 
57 		case 40:        /* IPv6 header */
58 			sk_copy64_40(src, dst);
59 			return;
60 
61 		default:
62 			if (IS_P2ALIGNED(len, 64)) {
63 				sk_copy64_64x(src, dst, len);
64 				return;
65 			} else if (IS_P2ALIGNED(len, 32)) {
66 				sk_copy64_32x(src, dst, len);
67 				return;
68 			} else if (IS_P2ALIGNED(len, 8)) {
69 				sk_copy64_8x(src, dst, len);
70 				return;
71 			} else if (IS_P2ALIGNED(len, 4)) {
72 				sk_copy64_4x(src, dst, len);
73 				return;
74 			}
75 			break;
76 		}
77 	}
78 	bcopy(src, dst, len);
79 }
80 
81 /*
82  * This routine is used for copying data across two kernel packets.
83  * Can also optionally compute 16-bit partial inet checksum as the
84  * data is copied.
85  * This routine is used by flowswitch while copying packet from vp
86  * adapter pool to packet in native netif pool and vice-a-versa.
87  *
88  * start/stuff is relative to soff, within [0, len], such that
89  * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
90  */
91 void
pkt_copy_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)92 pkt_copy_from_pkt(const enum txrx t, kern_packet_t dph, const uint16_t doff,
93     kern_packet_t sph, const uint16_t soff, const uint32_t len,
94     const boolean_t copysum, const uint16_t start, const uint16_t stuff,
95     const boolean_t invert)
96 {
97 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
98 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
99 	uint32_t partial;
100 	uint16_t csum = 0;
101 	uint8_t *sbaddr, *dbaddr;
102 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
103 
104 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
105 
106 	/* get buffer address from packet */
107 	MD_BUFLET_ADDR_ABS(spkt, sbaddr);
108 	ASSERT(sbaddr != NULL);
109 	sbaddr += soff;
110 	MD_BUFLET_ADDR_ABS(dpkt, dbaddr);
111 	ASSERT(dbaddr != NULL);
112 	dbaddr += doff;
113 	VERIFY((doff + len) <= PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
114 
115 	switch (t) {
116 	case NR_RX:
117 		dpkt->pkt_csum_flags = 0;
118 		if (__probable(do_sum)) {
119 			/*
120 			 * Use pkt_copy() to copy the portion up to the
121 			 * point where we need to start the checksum, and
122 			 * copy the remainder, checksumming as we go.
123 			 */
124 			if (__probable(start != 0)) {
125 				_pkt_copy(sbaddr, dbaddr, start);
126 			}
127 			partial = __packet_copy_and_sum((sbaddr + start),
128 			    (dbaddr + start), (len - start), 0);
129 			csum = __packet_fold_sum(partial);
130 
131 			__packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
132 			    start, csum, FALSE);
133 		} else {
134 			_pkt_copy(sbaddr, dbaddr, len);
135 			dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
136 			dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
137 			dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
138 		}
139 
140 		SK_DF(SK_VERB_COPY | SK_VERB_RX,
141 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
142 		    sk_proc_name_address(current_proc()),
143 		    sk_proc_pid(current_proc()), len,
144 		    (copysum ? (len - start) : 0), csum, start);
145 		SK_DF(SK_VERB_COPY | SK_VERB_RX,
146 		    "   pkt  0x%llx doff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
147 		    SK_KVA(dpkt), doff, dpkt->pkt_csum_flags,
148 		    (uint32_t)dpkt->pkt_csum_rx_start_off,
149 		    (uint32_t)dpkt->pkt_csum_rx_value);
150 		break;
151 
152 	case NR_TX:
153 		if (copysum) {
154 			/*
155 			 * Use pkt_copy() to copy the portion up to the
156 			 * point where we need to start the checksum, and
157 			 * copy the remainder, checksumming as we go.
158 			 */
159 			if (__probable(start != 0)) {
160 				_pkt_copy(sbaddr, dbaddr, start);
161 			}
162 			partial = __packet_copy_and_sum((sbaddr + start),
163 			    (dbaddr + start), (len - start), 0);
164 			csum = __packet_fold_sum_final(partial);
165 
166 			/* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
167 			if (csum == 0 && invert) {
168 				csum = 0xffff;
169 			}
170 
171 			/* Insert checksum into packet */
172 			ASSERT(stuff <= (len - sizeof(csum)));
173 			if (IS_P2ALIGNED(dbaddr + stuff, sizeof(csum))) {
174 				*(uint16_t *)(uintptr_t)(dbaddr + stuff) = csum;
175 			} else {
176 				bcopy((void *)&csum, dbaddr + stuff,
177 				    sizeof(csum));
178 			}
179 		} else {
180 			_pkt_copy(sbaddr, dbaddr, len);
181 		}
182 		dpkt->pkt_csum_flags = spkt->pkt_csum_flags &
183 		    (PACKET_CSUM_TSO_FLAGS | PACKET_TX_CSUM_OFFLOAD_FLAGS);
184 		dpkt->pkt_csum_tx_start_off = 0;
185 		dpkt->pkt_csum_tx_stuff_off = 0;
186 
187 		SK_DF(SK_VERB_COPY | SK_VERB_TX,
188 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u, flags %u",
189 		    sk_proc_name_address(current_proc()),
190 		    sk_proc_pid(current_proc()), len,
191 		    (copysum ? (len - start) : 0), csum, start, dpkt->pkt_csum_flags);
192 		break;
193 
194 	default:
195 		VERIFY(0);
196 		/* NOTREACHED */
197 		__builtin_unreachable();
198 	}
199 	METADATA_ADJUST_LEN(dpkt, len, doff);
200 
201 	SK_DF(SK_VERB_COPY | SK_VERB_DUMP, "%s(%d) %s %s",
202 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
203 	    (t == NR_RX) ? "RX" : "TX",
204 	    sk_dump("buf", dbaddr, len, 128, NULL, 0));
205 }
206 
207 /*
208  * NOTE: soff is the offset within the packet
209  * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
210  * caller is responsible for further reducing it to 16-bit if needed,
211  * as well as to perform the final 1's complement on it.
212  */
213 uint32_t static inline
_pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * __sized_by (len)dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)214 _pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *__sized_by(len)dbaddr,
215     uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
216 {
217 	uint8_t odd = 0;
218 	uint8_t *sbaddr = NULL;
219 	uint32_t sum = initial_sum, partial;
220 	uint32_t len0 = len;
221 	boolean_t needs_swap, started_on_odd = FALSE;
222 	uint16_t sbcnt, off0 = soff;
223 	uint32_t clen, sboff, sblen;
224 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
225 	kern_buflet_t sbuf = NULL, sbufp = NULL;
226 
227 	sbcnt = __packet_get_buflet_count(sph);
228 
229 	if (odd_start) {
230 		started_on_odd = *odd_start;
231 	}
232 
233 	/* fastpath (copy+sum, single buflet, even aligned, even length) */
234 	if (do_csum && sbcnt == 1 && len != 0) {
235 		PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
236 		ASSERT(sbuf != NULL);
237 		sboff = __buflet_get_data_offset(sbuf);
238 		sblen = __buflet_get_data_length(sbuf);
239 		ASSERT(sboff <= soff);
240 		ASSERT(soff < sboff + sblen);
241 		sblen -= (soff - sboff);
242 		sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
243 
244 		clen = (uint16_t)MIN(len, sblen);
245 
246 		if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
247 			sum = __packet_copy_and_sum(sbaddr, dbaddr, clen, sum);
248 			return __packet_fold_sum(sum);
249 		}
250 
251 		sbaddr = NULL;
252 		sbuf = sbufp = NULL;
253 	}
254 
255 	while (len != 0) {
256 		PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
257 		if (__improbable(sbuf == NULL)) {
258 			panic("%s: bad packet, 0x%llx [off %d, len %d]",
259 			    __func__, SK_KVA(spkt), off0, len0);
260 			/* NOTREACHED */
261 			__builtin_unreachable();
262 		}
263 		sbufp = sbuf;
264 		sboff = __buflet_get_data_offset(sbuf);
265 		sblen = __buflet_get_data_length(sbuf);
266 		ASSERT((sboff <= soff) && (soff < sboff + sblen));
267 		sblen -= (soff - sboff);
268 		sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
269 		soff = 0;
270 		clen = (uint16_t)MIN(len, sblen);
271 		if (__probable(do_csum)) {
272 			partial = 0;
273 			if (__improbable((uintptr_t)sbaddr & 1)) {
274 				/* Align on word boundary */
275 				started_on_odd = !started_on_odd;
276 #if BYTE_ORDER == LITTLE_ENDIAN
277 				partial = (uint8_t)*sbaddr << 8;
278 #else /* BYTE_ORDER != LITTLE_ENDIAN */
279 				partial = (uint8_t)*sbaddr;
280 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
281 				/*
282 				 * -fbounds-safety: *dbaddr++ = *sbaddr++ fails
283 				 * to compile. But the following works. Also,
284 				 * grouping dbaddr and len updates led to higher
285 				 * throughput performance, compared to doing
286 				 * dbaddr++; sbaddr++; len -= 1; in that order.
287 				 */
288 				*dbaddr = *sbaddr;
289 				dbaddr++;
290 				sblen -= 1;
291 				clen -= 1;
292 				len -= 1;
293 				sbaddr++;
294 			}
295 			needs_swap = started_on_odd;
296 
297 			odd = clen & 1u;
298 			clen -= odd;
299 
300 			if (clen != 0) {
301 				partial = __packet_copy_and_sum(sbaddr, dbaddr,
302 				    clen, partial);
303 			}
304 
305 			if (__improbable(partial & 0xc0000000)) {
306 				if (needs_swap) {
307 					partial = (partial << 8) +
308 					    (partial >> 24);
309 				}
310 				sum += (partial >> 16);
311 				sum += (partial & 0xffff);
312 				partial = 0;
313 			}
314 		} else {
315 			_pkt_copy(sbaddr, dbaddr, clen);
316 		}
317 
318 		dbaddr += clen;
319 
320 		/*
321 		 * -fbounds-safety: the following 3 lines were moved up from
322 		 * after the if-block. None of these are modified in the
323 		 * if-block, so moving these up here shouldn't change the
324 		 * behavior. Also, updating len before updating sbaddr led to
325 		 * faster throughput than doing: dbaddr += clen; sbaddr += clen;
326 		 * len -= clen + odd;
327 		 */
328 		sblen -= clen + odd;
329 		len -= clen + odd;
330 		ASSERT(sblen == 0 || len == 0);
331 
332 		sbaddr += clen;
333 
334 		if (__probable(do_csum)) {
335 			if (odd != 0) {
336 #if BYTE_ORDER == LITTLE_ENDIAN
337 				partial += (uint8_t)*sbaddr;
338 #else /* BYTE_ORDER != LITTLE_ENDIAN */
339 				partial += (uint8_t)*sbaddr << 8;
340 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
341 				*dbaddr++ = *sbaddr++;
342 				started_on_odd = !started_on_odd;
343 			}
344 
345 			if (needs_swap) {
346 				partial = (partial << 8) + (partial >> 24);
347 			}
348 			sum += (partial >> 16) + (partial & 0xffff);
349 			/*
350 			 * Reduce sum to allow potential byte swap
351 			 * in the next iteration without carry.
352 			 */
353 			sum = (sum >> 16) + (sum & 0xffff);
354 		}
355 	}
356 
357 	if (odd_start) {
358 		*odd_start = started_on_odd;
359 	}
360 
361 	if (__probable(do_csum)) {
362 		/* Final fold (reduce 32-bit to 16-bit) */
363 		sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
364 		sum = (sum >> 16) + (sum & 0xffff);
365 	}
366 	return sum;
367 }
368 
369 /*
370  * NOTE: Caller of this function is responsible to adjust the length and offset
371  * of the first buflet of the destination packet if (doff != 0),
372  * i.e. additional data is being prependend to the packet.
373  * It should also finalize the packet.
374  * To simplify & optimize the routine, we have also assumed that soff & doff
375  * will lie within the first buffer, which is true for the current use cases
376  * where, doff is the offset of the checksum field in the TCP/IP header and
377  * soff is the L3 offset.
378  * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
379  * caller is responsible for further reducing it to 16-bit if needed,
380  * as well as to perform the final 1's complement on it.
381  */
382 static inline boolean_t
_pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint32_t len,uint32_t * csum_partial,boolean_t do_csum)383 _pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
384     uint16_t doff, uint32_t len, uint32_t *csum_partial, boolean_t do_csum)
385 {
386 	uint8_t odd = 0;
387 	uint32_t sum = 0, partial;
388 	boolean_t needs_swap, started_on_odd = FALSE;
389 	uint8_t *sbaddr = NULL, *dbaddr = NULL;
390 	uint16_t sbcnt, dbcnt;
391 	uint32_t clen, dlen0, sboff, sblen, dlim;
392 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
393 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
394 	kern_buflet_t sbuf = NULL, sbufp = NULL, dbuf = NULL, dbufp = NULL;
395 
396 	ASSERT(csum_partial != NULL || !do_csum);
397 	sbcnt = __packet_get_buflet_count(sph);
398 	dbcnt = __packet_get_buflet_count(dph);
399 
400 	while (len != 0) {
401 		ASSERT(sbaddr == NULL || dbaddr == NULL);
402 		if (sbaddr == NULL) {
403 			PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
404 			if (__improbable(sbuf == NULL)) {
405 				break;
406 			}
407 			sbufp = sbuf;
408 			sblen = __buflet_get_data_length(sbuf);
409 			sboff = __buflet_get_data_offset(sbuf);
410 			ASSERT(soff >= sboff);
411 			ASSERT(sboff + sblen > soff);
412 			sblen -= (soff - sboff);
413 			sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
414 			soff = 0;
415 		}
416 
417 		if (dbaddr == NULL) {
418 			if (dbufp != NULL) {
419 				__buflet_set_data_length(dbufp, dlen0);
420 			}
421 
422 			PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
423 			if (__improbable(dbuf == NULL)) {
424 				break;
425 			}
426 			dbufp = dbuf;
427 			dlim = __buflet_get_data_limit(dbuf);
428 			ASSERT(dlim > doff);
429 			dlim -= doff;
430 			if (doff != 0) {
431 				VERIFY(__buflet_set_data_offset(dbuf, doff) == 0);
432 			}
433 			dbaddr = (uint8_t *)__buflet_get_data_address(dbuf) + doff;
434 			dlen0 = dlim;
435 			doff = 0;
436 		}
437 
438 		clen = MIN(len, sblen);
439 		clen = MIN(clen, dlim);
440 
441 		if (__probable(do_csum)) {
442 			partial = 0;
443 			if (__improbable((uintptr_t)sbaddr & 1)) {
444 				/* Align on word boundary */
445 				started_on_odd = !started_on_odd;
446 #if BYTE_ORDER == LITTLE_ENDIAN
447 				partial = (uint8_t)*sbaddr << 8;
448 #else /* BYTE_ORDER != LITTLE_ENDIAN */
449 				partial = (uint8_t)*sbaddr;
450 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
451 				*dbaddr++ = *sbaddr++;
452 				clen -= 1;
453 				dlim -= 1;
454 				len -= 1;
455 			}
456 			needs_swap = started_on_odd;
457 
458 			odd = clen & 1u;
459 			clen -= odd;
460 
461 			if (clen != 0) {
462 				partial = __packet_copy_and_sum(sbaddr, dbaddr,
463 				    clen, partial);
464 			}
465 
466 			if (__improbable(partial & 0xc0000000)) {
467 				if (needs_swap) {
468 					partial = (partial << 8) +
469 					    (partial >> 24);
470 				}
471 				sum += (partial >> 16);
472 				sum += (partial & 0xffff);
473 				partial = 0;
474 			}
475 		} else {
476 			_pkt_copy(sbaddr, dbaddr, clen);
477 		}
478 		sbaddr += clen;
479 		dbaddr += clen;
480 
481 		if (__probable(do_csum)) {
482 			if (odd != 0) {
483 #if BYTE_ORDER == LITTLE_ENDIAN
484 				partial += (uint8_t)*sbaddr;
485 #else /* BYTE_ORDER != LITTLE_ENDIAN */
486 				partial += (uint8_t)*sbaddr << 8;
487 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
488 				*dbaddr++ = *sbaddr++;
489 				started_on_odd = !started_on_odd;
490 			}
491 
492 			if (needs_swap) {
493 				partial = (partial << 8) + (partial >> 24);
494 			}
495 			sum += (partial >> 16) + (partial & 0xffff);
496 			/*
497 			 * Reduce sum to allow potential byte swap
498 			 * in the next iteration without carry.
499 			 */
500 			sum = (sum >> 16) + (sum & 0xffff);
501 		}
502 
503 		sblen -= clen + odd;
504 		dlim -= clen + odd;
505 		len -= clen + odd;
506 
507 		if (sblen == 0) {
508 			sbaddr = NULL;
509 		}
510 
511 		if (dlim == 0) {
512 			dbaddr = NULL;
513 		}
514 	}
515 
516 	if (__probable(dbuf != NULL)) {
517 		__buflet_set_data_length(dbuf, (dlen0 - dlim));
518 	}
519 	if (__probable(do_csum)) {
520 		/* Final fold (reduce 32-bit to 16-bit) */
521 		sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
522 		sum = (sum >> 16) + (sum & 0xffff);
523 		*csum_partial = (uint32_t)sum;
524 	}
525 	return len == 0;
526 }
527 
528 uint32_t
pkt_sum(kern_packet_t sph,uint16_t soff,uint16_t len)529 pkt_sum(kern_packet_t sph, uint16_t soff, uint16_t len)
530 {
531 	uint8_t odd = 0;
532 	uint32_t sum = 0, partial;
533 	boolean_t needs_swap, started_on_odd = FALSE;
534 	uint8_t *sbaddr = NULL;
535 	uint16_t sbcnt;
536 	uint32_t clen, sblen, sboff;
537 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
538 	kern_buflet_t sbuf = NULL, sbufp = NULL;
539 
540 	sbcnt = __packet_get_buflet_count(sph);
541 
542 	/* fastpath (single buflet, even aligned, even length) */
543 	if (sbcnt == 1 && len != 0) {
544 		PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
545 		ASSERT(sbuf != NULL);
546 		sblen = __buflet_get_data_length(sbuf);
547 		sboff = __buflet_get_data_offset(sbuf);
548 		ASSERT(soff >= sboff);
549 		ASSERT(sboff + sblen > soff);
550 		sblen -= (soff - sboff);
551 		sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
552 
553 		clen = MIN(len, sblen);
554 
555 		if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
556 			sum = __packet_cksum(sbaddr, clen, 0);
557 			return __packet_fold_sum(sum);
558 		}
559 
560 		sbaddr = NULL;
561 		sbuf = sbufp = NULL;
562 	}
563 
564 	/* slowpath */
565 	while (len != 0) {
566 		ASSERT(sbaddr == NULL);
567 		if (sbaddr == NULL) {
568 			PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
569 			if (__improbable(sbuf == NULL)) {
570 				break;
571 			}
572 			sbufp = sbuf;
573 			sblen = __buflet_get_data_length(sbuf);
574 			sboff = __buflet_get_data_offset(sbuf);
575 			ASSERT(soff >= sboff);
576 			ASSERT(sboff + sblen > soff);
577 			sblen -= (soff - sboff);
578 			sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
579 			soff = 0;
580 		}
581 
582 		clen = MIN(len, sblen);
583 
584 		partial = 0;
585 		if (__improbable((uintptr_t)sbaddr & 1)) {
586 			/* Align on word boundary */
587 			started_on_odd = !started_on_odd;
588 #if BYTE_ORDER == LITTLE_ENDIAN
589 			partial = (uint8_t)*sbaddr << 8;
590 #else /* BYTE_ORDER != LITTLE_ENDIAN */
591 			partial = (uint8_t)*sbaddr;
592 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
593 			clen -= 1;
594 			len -= 1;
595 		}
596 		needs_swap = started_on_odd;
597 
598 		odd = clen & 1u;
599 		clen -= odd;
600 
601 		if (clen != 0) {
602 			partial = __packet_cksum(sbaddr,
603 			    clen, partial);
604 		}
605 
606 		if (__improbable(partial & 0xc0000000)) {
607 			if (needs_swap) {
608 				partial = (partial << 8) +
609 				    (partial >> 24);
610 			}
611 			sum += (partial >> 16);
612 			sum += (partial & 0xffff);
613 			partial = 0;
614 		}
615 		sbaddr += clen;
616 
617 		if (odd != 0) {
618 #if BYTE_ORDER == LITTLE_ENDIAN
619 			partial += (uint8_t)*sbaddr;
620 #else /* BYTE_ORDER != LITTLE_ENDIAN */
621 			partial += (uint8_t)*sbaddr << 8;
622 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
623 			started_on_odd = !started_on_odd;
624 		}
625 
626 		if (needs_swap) {
627 			partial = (partial << 8) + (partial >> 24);
628 		}
629 		sum += (partial >> 16) + (partial & 0xffff);
630 		/*
631 		 * Reduce sum to allow potential byte swap
632 		 * in the next iteration without carry.
633 		 */
634 		sum = (sum >> 16) + (sum & 0xffff);
635 
636 		sblen -= clen + odd;
637 		len -= clen + odd;
638 
639 		if (sblen == 0) {
640 			sbaddr = NULL;
641 		}
642 	}
643 
644 	/* Final fold (reduce 32-bit to 16-bit) */
645 	sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
646 	sum = (sum >> 16) + (sum & 0xffff);
647 	return (uint32_t)sum;
648 }
649 
650 
651 /*
652  * This is a multi-buflet variant of pkt_copy_from_pkt().
653  *
654  * start/stuff is relative to soff, within [0, len], such that
655  * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
656  */
657 void
pkt_copy_multi_buflet_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)658 pkt_copy_multi_buflet_from_pkt(const enum txrx t, kern_packet_t dph,
659     const uint16_t doff, kern_packet_t sph, const uint16_t soff,
660     const uint32_t len, const boolean_t copysum, const uint16_t start,
661     const uint16_t stuff, const boolean_t invert)
662 {
663 	boolean_t rc;
664 	uint32_t partial;
665 	uint16_t csum = 0;
666 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
667 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
668 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
669 
670 	VERIFY((doff + len) <= (PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp) *
671 	    __packet_get_buflet_count(dph)));
672 
673 	switch (t) {
674 	case NR_RX:
675 		dpkt->pkt_csum_flags = 0;
676 		if (__probable(do_sum)) {
677 			/*
678 			 * copy the portion up to the point where we need to
679 			 * start the checksum, and copy the remainder,
680 			 * checksumming as we go.
681 			 */
682 			if (__probable(start != 0)) {
683 				rc = _pkt_copypkt_sum(sph, soff, dph, doff,
684 				    start, NULL, FALSE);
685 				ASSERT(rc);
686 			}
687 			_pkt_copypkt_sum(sph, (soff + start), dph,
688 			    (doff + start), (len - start), &partial, TRUE);
689 			csum = __packet_fold_sum(partial);
690 			__packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
691 			    start, csum, FALSE);
692 			METADATA_ADJUST_LEN(dpkt, start, doff);
693 		} else {
694 			rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
695 			    FALSE);
696 			ASSERT(rc);
697 			dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
698 			dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
699 			dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
700 		}
701 		break;
702 
703 	case NR_TX:
704 		if (copysum) {
705 			uint8_t *baddr;
706 			/*
707 			 * copy the portion up to the point where we need to
708 			 * start the checksum, and copy the remainder,
709 			 * checksumming as we go.
710 			 */
711 			if (__probable(start != 0)) {
712 				rc = _pkt_copypkt_sum(sph, soff, dph, doff,
713 				    start, NULL, FALSE);
714 				ASSERT(rc);
715 			}
716 			rc = _pkt_copypkt_sum(sph, (soff + start), dph,
717 			    (doff + start), (len - start), &partial, TRUE);
718 			ASSERT(rc);
719 			csum = __packet_fold_sum_final(partial);
720 
721 			/* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
722 			if (csum == 0 && invert) {
723 				csum = 0xffff;
724 			}
725 
726 			/*
727 			 * Insert checksum into packet.
728 			 * Here we assume that checksum will be in the
729 			 * first buffer.
730 			 */
731 			ASSERT((stuff + doff + sizeof(csum)) <=
732 			    PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
733 			ASSERT(stuff <= (len - sizeof(csum)));
734 
735 			/* get first buflet buffer address from packet */
736 			MD_BUFLET_ADDR_ABS(dpkt, baddr);
737 			ASSERT(baddr != NULL);
738 			baddr += doff;
739 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
740 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
741 			} else {
742 				bcopy((void *)&csum, baddr + stuff,
743 				    sizeof(csum));
744 			}
745 			METADATA_ADJUST_LEN(dpkt, start, doff);
746 		} else {
747 			rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
748 			    FALSE);
749 			ASSERT(rc);
750 		}
751 		dpkt->pkt_csum_flags = spkt->pkt_csum_flags &
752 		    (PACKET_CSUM_TSO_FLAGS | PACKET_TX_CSUM_OFFLOAD_FLAGS);
753 		dpkt->pkt_csum_tx_start_off = 0;
754 		dpkt->pkt_csum_tx_stuff_off = 0;
755 
756 		SK_DF(SK_VERB_COPY | SK_VERB_TX,
757 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u, flags %u",
758 		    sk_proc_name_address(current_proc()),
759 		    sk_proc_pid(current_proc()), len,
760 		    (copysum ? (len - start) : 0), csum, start, dpkt->pkt_csum_flags);
761 		break;
762 
763 	default:
764 		VERIFY(0);
765 		/* NOTREACHED */
766 		__builtin_unreachable();
767 	}
768 }
769 
770 static inline uint32_t
_convert_mbuf_csum_flags(uint32_t mbuf_flags)771 _convert_mbuf_csum_flags(uint32_t mbuf_flags)
772 {
773 	uint32_t pkt_flags = 0;
774 
775 	if (mbuf_flags & CSUM_TCP) {
776 		pkt_flags |= PACKET_CSUM_TCP;
777 	}
778 	if (mbuf_flags & CSUM_TCPIPV6) {
779 		pkt_flags |= PACKET_CSUM_TCPIPV6;
780 	}
781 	if (mbuf_flags & CSUM_UDP) {
782 		pkt_flags |= PACKET_CSUM_UDP;
783 	}
784 	if (mbuf_flags & CSUM_UDPIPV6) {
785 		pkt_flags |= PACKET_CSUM_UDPIPV6;
786 	}
787 	if (mbuf_flags & CSUM_IP) {
788 		pkt_flags |= PACKET_CSUM_IP;
789 	}
790 	if (mbuf_flags & CSUM_ZERO_INVERT) {
791 		pkt_flags |= PACKET_CSUM_ZERO_INVERT;
792 	}
793 
794 	return pkt_flags;
795 }
796 
797 /*
798  * This routine is used for copying an mbuf which originated in the host
799  * stack destined to a native skywalk interface (NR_TX), as well as for
800  * mbufs originating on compat network interfaces (NR_RX).
801  *
802  * start/stuff is relative to moff, within [0, len], such that
803  * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
804  */
805 void
pkt_copy_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)806 pkt_copy_from_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
807     struct mbuf *m, const uint16_t moff, const uint32_t len,
808     const boolean_t copysum, const uint16_t start)
809 {
810 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
811 	struct m_tag *ts_tag = NULL;
812 	uint32_t partial;
813 	uint16_t csum = 0;
814 	uint8_t *baddr;
815 
816 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
817 
818 	/* get buffer address from packet */
819 	MD_BUFLET_ADDR_ABS(pkt, baddr);
820 	ASSERT(baddr != NULL);
821 	baddr += poff;
822 	VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
823 
824 	switch (t) {
825 	case NR_RX:
826 		pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
827 		pkt->pkt_csum_rx_start_off = 0;
828 		pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
829 		pkt->pkt_svc_class = m_get_service_class(m);
830 		if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
831 		    != CSUM_RX_FULL_FLAGS) && copysum)) {
832 			/*
833 			 * Use m_copydata() to copy the portion up to the
834 			 * point where we need to start the checksum, and
835 			 * copy the remainder, checksumming as we go.
836 			 */
837 			if (start != 0) {
838 				m_copydata(m, moff, start, baddr);
839 			}
840 			partial = m_copydata_sum(m, start, (len - start),
841 			    (baddr + start), 0, NULL);
842 			csum = __packet_fold_sum(partial);
843 
844 			__packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
845 			    start, csum, FALSE);
846 		} else {
847 			m_copydata(m, moff, len, baddr);
848 		}
849 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
850 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
851 		    sk_proc_name_address(current_proc()),
852 		    sk_proc_pid(current_proc()), len,
853 		    (copysum ? (len - start) : 0), csum, start);
854 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
855 		    "   mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
856 		    SK_KVA(m), m->m_pkthdr.csum_flags,
857 		    (uint32_t)m->m_pkthdr.csum_rx_start,
858 		    (uint32_t)m->m_pkthdr.csum_rx_val);
859 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
860 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
861 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
862 		    (uint32_t)pkt->pkt_csum_rx_start_off,
863 		    (uint32_t)pkt->pkt_csum_rx_value);
864 		break;
865 
866 	case NR_TX:
867 		if (copysum) {
868 			uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
869 			/*
870 			 * Use m_copydata() to copy the portion up to the
871 			 * point where we need to start the checksum, and
872 			 * copy the remainder, checksumming as we go.
873 			 */
874 			if (start != 0) {
875 				m_copydata(m, moff, start, baddr);
876 			}
877 			partial = m_copydata_sum(m, start, (len - start),
878 			    (baddr + start), 0, NULL);
879 			csum = __packet_fold_sum_final(partial);
880 
881 			/*
882 			 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
883 			 * ideally we'd only test for CSUM_ZERO_INVERT
884 			 * here, but catch cases where the originator
885 			 * did not set it for UDP.
886 			 */
887 			if (csum == 0 && (m->m_pkthdr.csum_flags &
888 			    (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
889 				csum = 0xffff;
890 			}
891 
892 			/* Insert checksum into packet */
893 			ASSERT(stuff <= (len - sizeof(csum)));
894 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
895 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
896 			} else {
897 				bcopy((void *)&csum, baddr + stuff,
898 				    sizeof(csum));
899 			}
900 		} else {
901 			m_copydata(m, moff, len, baddr);
902 		}
903 		pkt->pkt_csum_flags = 0;
904 		pkt->pkt_csum_tx_start_off = 0;
905 		pkt->pkt_csum_tx_stuff_off = 0;
906 
907 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
908 			pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV4;
909 			pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
910 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
911 		}
912 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
913 			pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV6;
914 			pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
915 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
916 		}
917 		if (!copysum) {
918 			pkt->pkt_csum_flags |= _convert_mbuf_csum_flags(m->m_pkthdr.csum_flags);
919 		}
920 
921 		/* translate mbuf metadata */
922 		pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
923 		pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
924 		pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
925 		pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
926 		switch (m->m_pkthdr.pkt_proto) {
927 		case IPPROTO_QUIC:
928 			pkt->pkt_flow_ip_proto = IPPROTO_UDP;
929 			pkt->pkt_transport_protocol = IPPROTO_QUIC;
930 			break;
931 
932 		default:
933 			pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
934 			pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
935 			break;
936 		}
937 		(void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
938 		pkt->pkt_svc_class = m_get_service_class(m);
939 		pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
940 		pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
941 		if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
942 			pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
943 		}
944 		if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) != 0) {
945 			pkt->pkt_pflags |= PKT_F_L4S;
946 		}
947 		necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
948 		pkt->pkt_policy_id =
949 		    (uint32_t)necp_get_policy_id_from_packet(m);
950 		pkt->pkt_skip_policy_id =
951 		    (uint32_t)necp_get_skip_policy_id_from_packet(m);
952 
953 		if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
954 			if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
955 				__packet_set_tx_completion_data(ph,
956 				    m->m_pkthdr.drv_tx_compl_arg,
957 				    m->m_pkthdr.drv_tx_compl_data);
958 			}
959 			pkt->pkt_tx_compl_context =
960 			    m->m_pkthdr.pkt_compl_context;
961 			pkt->pkt_tx_compl_callbacks =
962 			    m->m_pkthdr.pkt_compl_callbacks;
963 			/*
964 			 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
965 			 * mbuf can no longer trigger a completion callback.
966 			 * callback will be invoked when the kernel packet is
967 			 * completed.
968 			 */
969 			m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
970 
971 			m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
972 		}
973 
974 		ts_tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM);
975 		if (ts_tag != NULL) {
976 			__packet_set_tx_timestamp(ph, *(uint64_t *)(ts_tag->m_tag_data));
977 		}
978 
979 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
980 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
981 		    sk_proc_name_address(current_proc()),
982 		    sk_proc_pid(current_proc()), len,
983 		    (copysum ? (len - start) : 0), csum, start);
984 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
985 		    "   mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
986 		    SK_KVA(m), m->m_pkthdr.csum_flags,
987 		    (uint32_t)m->m_pkthdr.csum_tx_start,
988 		    (uint32_t)m->m_pkthdr.csum_tx_stuff);
989 		break;
990 
991 	default:
992 		VERIFY(0);
993 		/* NOTREACHED */
994 		__builtin_unreachable();
995 	}
996 	METADATA_ADJUST_LEN(pkt, len, poff);
997 
998 	if (m->m_flags & M_BCAST) {
999 		__packet_set_link_broadcast(ph);
1000 	} else if (m->m_flags & M_MCAST) {
1001 		__packet_set_link_multicast(ph);
1002 	}
1003 
1004 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1005 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1006 	    (t == NR_RX) ? "RX" : "TX",
1007 	    sk_dump("buf", baddr, len, 128, NULL, 0));
1008 }
1009 
1010 /*
1011  * Like m_copydata_sum(), but works on a destination kernel packet.
1012  */
1013 static inline uint32_t
m_copypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint32_t len,boolean_t do_cscum)1014 m_copypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
1015     uint32_t len, boolean_t do_cscum)
1016 {
1017 	boolean_t needs_swap, started_on_odd = FALSE;
1018 	int off0 = soff;
1019 	uint32_t len0 = len;
1020 	struct mbuf *m0 = m;
1021 	uint32_t sum = 0, partial;
1022 	unsigned count0, count, odd, mlen_copied;
1023 	uint8_t *sbaddr = NULL, *dbaddr = NULL;
1024 	uint16_t dbcnt = __packet_get_buflet_count(dph);
1025 	uint32_t dlim, dlen0;
1026 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
1027 	kern_buflet_t dbuf = NULL, dbufp = NULL;
1028 
1029 	while (soff > 0) {
1030 		if (__improbable(m == NULL)) {
1031 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
1032 			    __func__, m0, off0, len0);
1033 			/* NOTREACHED */
1034 			__builtin_unreachable();
1035 		}
1036 		if (soff < m->m_len) {
1037 			break;
1038 		}
1039 		soff -= m->m_len;
1040 		m = m->m_next;
1041 	}
1042 
1043 	if (__improbable(m == NULL)) {
1044 		panic("%s: invalid mbuf chain %p [off %d, len %d]",
1045 		    __func__, m0, off0, len0);
1046 		/* NOTREACHED */
1047 		__builtin_unreachable();
1048 	}
1049 
1050 	sbaddr = mtod(m, uint8_t *) + soff;
1051 	count = m->m_len - soff;
1052 	mlen_copied = 0;
1053 
1054 	while (len != 0) {
1055 		ASSERT(sbaddr == NULL || dbaddr == NULL);
1056 		if (sbaddr == NULL) {
1057 			soff = 0;
1058 			m = m->m_next;
1059 			if (__improbable(m == NULL)) {
1060 				panic("%s: invalid mbuf chain %p [off %d, "
1061 				    "len %d]", __func__, m0, off0, len0);
1062 				/* NOTREACHED */
1063 				__builtin_unreachable();
1064 			}
1065 			sbaddr = mtod(m, uint8_t *);
1066 			count = m->m_len;
1067 			mlen_copied = 0;
1068 		}
1069 
1070 		if (__improbable(count == 0)) {
1071 			sbaddr = NULL;
1072 			continue;
1073 		}
1074 
1075 		if (dbaddr == NULL) {
1076 			if (dbufp != NULL) {
1077 				__buflet_set_data_length(dbufp, dlen0);
1078 			}
1079 
1080 			PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
1081 			if (__improbable(dbuf == NULL)) {
1082 				panic("%s: mbuf too large %p [off %d, "
1083 				    "len %d]", __func__, m0, off0, len0);
1084 				/* NOTREACHED */
1085 				__builtin_unreachable();
1086 			}
1087 			dbufp = dbuf;
1088 			dlim = __buflet_get_data_limit(dbuf) - doff;
1089 			dbaddr = (uint8_t *)__buflet_get_data_address(dbuf) + doff;
1090 			dlen0 = dlim;
1091 			doff = 0;
1092 		}
1093 
1094 		count = MIN(count, (unsigned)len);
1095 		count0 = count = MIN(count, dlim);
1096 
1097 		if (!do_cscum) {
1098 			_pkt_copy(sbaddr, dbaddr, count);
1099 			sbaddr += count;
1100 			dbaddr += count;
1101 			goto skip_csum;
1102 		}
1103 
1104 		partial = 0;
1105 		if ((uintptr_t)sbaddr & 1) {
1106 			/* Align on word boundary */
1107 			started_on_odd = !started_on_odd;
1108 #if BYTE_ORDER == LITTLE_ENDIAN
1109 			partial = *sbaddr << 8;
1110 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1111 			partial = *sbaddr;
1112 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1113 			*dbaddr++ = *sbaddr++;
1114 			count -= 1;
1115 		}
1116 
1117 		needs_swap = started_on_odd;
1118 		odd = count & 1u;
1119 		count -= odd;
1120 
1121 		if (count) {
1122 			partial = __packet_copy_and_sum(sbaddr,
1123 			    dbaddr, count, partial);
1124 			sbaddr += count;
1125 			dbaddr += count;
1126 			if (__improbable(partial & 0xc0000000)) {
1127 				if (needs_swap) {
1128 					partial = (partial << 8) +
1129 					    (partial >> 24);
1130 				}
1131 				sum += (partial >> 16);
1132 				sum += (partial & 0xffff);
1133 				partial = 0;
1134 			}
1135 		}
1136 
1137 		if (odd) {
1138 #if BYTE_ORDER == LITTLE_ENDIAN
1139 			partial += *sbaddr;
1140 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1141 			partial += *sbaddr << 8;
1142 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1143 			*dbaddr++ = *sbaddr++;
1144 			started_on_odd = !started_on_odd;
1145 		}
1146 
1147 		if (needs_swap) {
1148 			partial = (partial << 8) + (partial >> 24);
1149 		}
1150 		sum += (partial >> 16) + (partial & 0xffff);
1151 		/*
1152 		 * Reduce sum to allow potential byte swap
1153 		 * in the next iteration without carry.
1154 		 */
1155 		sum = (sum >> 16) + (sum & 0xffff);
1156 
1157 skip_csum:
1158 		dlim -= count0;
1159 		len -= count0;
1160 		mlen_copied += count0;
1161 
1162 		if (dlim == 0) {
1163 			dbaddr = NULL;
1164 		}
1165 
1166 		count = m->m_len - soff - mlen_copied;
1167 		if (count == 0) {
1168 			sbaddr = NULL;
1169 		}
1170 	}
1171 
1172 	ASSERT(len == 0);
1173 	ASSERT(dbuf != NULL);
1174 	__buflet_set_data_length(dbuf, (dlen0 - dlim));
1175 
1176 	if (!do_cscum) {
1177 		return 0;
1178 	}
1179 
1180 	/* Final fold (reduce 32-bit to 16-bit) */
1181 	sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
1182 	sum = (sum >> 16) + (sum & 0xffff);
1183 	return sum;
1184 }
1185 
1186 /*
1187  * This is a multi-buflet variant of pkt_copy_from_mbuf().
1188  *
1189  * start/stuff is relative to moff, within [0, len], such that
1190  * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
1191  */
1192 void
pkt_copy_multi_buflet_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1193 pkt_copy_multi_buflet_from_mbuf(const enum txrx t, kern_packet_t ph,
1194     const uint16_t poff, struct mbuf *m, const uint16_t moff,
1195     const uint32_t len, const boolean_t copysum, const uint16_t start)
1196 {
1197 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1198 	struct m_tag *ts_tag = NULL;
1199 	uint32_t partial;
1200 	uint16_t csum = 0;
1201 	uint8_t *baddr;
1202 
1203 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
1204 
1205 	/* get buffer address from packet */
1206 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1207 	ASSERT(baddr != NULL);
1208 	baddr += poff;
1209 	VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp) *
1210 	    __packet_get_buflet_count(ph)));
1211 
1212 	switch (t) {
1213 	case NR_RX:
1214 		pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
1215 		pkt->pkt_csum_rx_start_off = 0;
1216 		pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
1217 		pkt->pkt_svc_class = m_get_service_class(m);
1218 		if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
1219 		    != CSUM_RX_FULL_FLAGS) && copysum)) {
1220 			/*
1221 			 * Use m_copydata() to copy the portion up to the
1222 			 * point where we need to start the checksum, and
1223 			 * copy the remainder, checksumming as we go.
1224 			 */
1225 			if (start != 0) {
1226 				m_copydata(m, moff, start, baddr);
1227 			}
1228 			partial = m_copypkt_sum(m, start, ph, (poff + start),
1229 			    (len - start), TRUE);
1230 			csum = __packet_fold_sum(partial);
1231 			__packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
1232 			    start, csum, FALSE);
1233 			METADATA_ADJUST_LEN(pkt, start, poff);
1234 		} else {
1235 			(void) m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1236 		}
1237 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1238 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1239 		    sk_proc_name_address(current_proc()),
1240 		    sk_proc_pid(current_proc()), len,
1241 		    (copysum ? (len - start) : 0), csum, start);
1242 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1243 		    "   mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
1244 		    SK_KVA(m), m->m_pkthdr.csum_flags,
1245 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1246 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1247 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1248 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1249 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1250 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1251 		    (uint32_t)pkt->pkt_csum_rx_value);
1252 		break;
1253 
1254 	case NR_TX:
1255 		if (copysum) {
1256 			uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
1257 			/*
1258 			 * Use m_copydata() to copy the portion up to the
1259 			 * point where we need to start the checksum, and
1260 			 * copy the remainder, checksumming as we go.
1261 			 */
1262 			if (start != 0) {
1263 				m_copydata(m, moff, start, baddr);
1264 			}
1265 			partial = m_copypkt_sum(m, start, ph, (poff + start),
1266 			    (len - start), TRUE);
1267 			csum = __packet_fold_sum_final(partial);
1268 
1269 			/*
1270 			 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
1271 			 * ideally we'd only test for CSUM_ZERO_INVERT
1272 			 * here, but catch cases where the originator
1273 			 * did not set it for UDP.
1274 			 */
1275 			if (csum == 0 && (m->m_pkthdr.csum_flags &
1276 			    (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
1277 				csum = 0xffff;
1278 			}
1279 
1280 			/* Insert checksum into packet */
1281 			ASSERT(stuff <= (len - sizeof(csum)));
1282 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
1283 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
1284 			} else {
1285 				bcopy((void *)&csum, baddr + stuff,
1286 				    sizeof(csum));
1287 			}
1288 			METADATA_ADJUST_LEN(pkt, start, poff);
1289 		} else {
1290 			m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1291 		}
1292 		pkt->pkt_csum_flags = 0;
1293 		pkt->pkt_csum_tx_start_off = 0;
1294 		pkt->pkt_csum_tx_stuff_off = 0;
1295 
1296 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
1297 			pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV4;
1298 			pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
1299 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
1300 		}
1301 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
1302 			pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV6;
1303 			pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
1304 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
1305 		}
1306 		if (!copysum) {
1307 			pkt->pkt_csum_flags |= _convert_mbuf_csum_flags(m->m_pkthdr.csum_flags);
1308 		}
1309 
1310 		/* translate mbuf metadata */
1311 		pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
1312 		pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
1313 		pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
1314 		pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
1315 		switch (m->m_pkthdr.pkt_proto) {
1316 		case IPPROTO_QUIC:
1317 			pkt->pkt_flow_ip_proto = IPPROTO_UDP;
1318 			pkt->pkt_transport_protocol = IPPROTO_QUIC;
1319 			break;
1320 
1321 		default:
1322 			pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
1323 			pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
1324 			break;
1325 		}
1326 		(void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
1327 		pkt->pkt_svc_class = m_get_service_class(m);
1328 		pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
1329 		pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
1330 		if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
1331 			pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
1332 		}
1333 		if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) != 0) {
1334 			pkt->pkt_pflags |= PKT_F_L4S;
1335 		}
1336 		necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
1337 		pkt->pkt_policy_id =
1338 		    (uint32_t)necp_get_policy_id_from_packet(m);
1339 		pkt->pkt_skip_policy_id =
1340 		    (uint32_t)necp_get_skip_policy_id_from_packet(m);
1341 
1342 		if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
1343 			if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
1344 				__packet_set_tx_completion_data(ph,
1345 				    m->m_pkthdr.drv_tx_compl_arg,
1346 				    m->m_pkthdr.drv_tx_compl_data);
1347 			}
1348 			pkt->pkt_tx_compl_context =
1349 			    m->m_pkthdr.pkt_compl_context;
1350 			pkt->pkt_tx_compl_callbacks =
1351 			    m->m_pkthdr.pkt_compl_callbacks;
1352 			/*
1353 			 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
1354 			 * mbuf can no longer trigger a completion callback.
1355 			 * callback will be invoked when the kernel packet is
1356 			 * completed.
1357 			 */
1358 			m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
1359 
1360 			m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
1361 		}
1362 
1363 		ts_tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM);
1364 		if (ts_tag != NULL) {
1365 			__packet_set_tx_timestamp(ph, *(uint64_t *)(ts_tag->m_tag_data));
1366 		}
1367 
1368 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1369 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1370 		    sk_proc_name_address(current_proc()),
1371 		    sk_proc_pid(current_proc()), len,
1372 		    (copysum ? (len - start) : 0), csum, start);
1373 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1374 		    "   mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
1375 		    SK_KVA(m), m->m_pkthdr.csum_flags,
1376 		    (uint32_t)m->m_pkthdr.csum_tx_start,
1377 		    (uint32_t)m->m_pkthdr.csum_tx_stuff);
1378 		break;
1379 
1380 	default:
1381 		VERIFY(0);
1382 		/* NOTREACHED */
1383 		__builtin_unreachable();
1384 	}
1385 
1386 	if (m->m_flags & M_BCAST) {
1387 		__packet_set_link_broadcast(ph);
1388 	} else if (m->m_flags & M_MCAST) {
1389 		__packet_set_link_multicast(ph);
1390 	}
1391 
1392 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1393 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1394 	    (t == NR_RX) ? "RX" : "TX",
1395 	    sk_dump("buf", baddr, len, 128, NULL, 0));
1396 }
1397 
1398 static inline uint32_t
_convert_pkt_csum_flags(uint32_t pkt_flags)1399 _convert_pkt_csum_flags(uint32_t pkt_flags)
1400 {
1401 	uint32_t mbuf_flags = 0;
1402 	if (pkt_flags & PACKET_CSUM_TCP) {
1403 		mbuf_flags |= CSUM_TCP;
1404 	}
1405 	if (pkt_flags & PACKET_CSUM_TCPIPV6) {
1406 		mbuf_flags |= CSUM_TCPIPV6;
1407 	}
1408 	if (pkt_flags & PACKET_CSUM_UDP) {
1409 		mbuf_flags |= CSUM_UDP;
1410 	}
1411 	if (pkt_flags & PACKET_CSUM_UDPIPV6) {
1412 		mbuf_flags |= CSUM_UDPIPV6;
1413 	}
1414 	if (pkt_flags & PACKET_CSUM_IP) {
1415 		mbuf_flags |= CSUM_IP;
1416 	}
1417 	if (pkt_flags & PACKET_CSUM_ZERO_INVERT) {
1418 		mbuf_flags |= CSUM_ZERO_INVERT;
1419 	}
1420 
1421 	return mbuf_flags;
1422 }
1423 
1424 /*
1425  * This routine is used for copying from a packet originating from a native
1426  * skywalk interface to an mbuf destined for the host legacy stack (NR_RX),
1427  * as well as for mbufs destined for the compat network interfaces (NR_TX).
1428  *
1429  * We do adjust the length to reflect the total data span.
1430  *
1431  * This routine supports copying into an mbuf chain for RX but not TX.
1432  *
1433  * start/stuff is relative to poff, within [0, len], such that
1434  * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1435  */
1436 void
pkt_copy_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1437 pkt_copy_to_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
1438     struct mbuf *m, const uint16_t moff, const uint32_t len,
1439     const boolean_t copysum, const uint16_t start)
1440 {
1441 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1442 	struct mbuf *curr_m;
1443 	uint32_t partial = 0;
1444 	uint32_t remaining_len = len, copied_len = 0;
1445 	uint16_t csum = 0;
1446 	uint8_t *baddr;
1447 	uint8_t *dp;
1448 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1449 
1450 	ASSERT(len >= start);
1451 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
1452 
1453 	/* get buffer address from packet */
1454 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1455 	ASSERT(baddr != NULL);
1456 	baddr += poff;
1457 	VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
1458 
1459 	ASSERT((m->m_flags & M_PKTHDR));
1460 	m->m_data += moff;
1461 
1462 	switch (t) {
1463 	case NR_RX:
1464 		m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1465 
1466 		/*
1467 		 * Use pkt_copy() to copy the portion up to the
1468 		 * point where we need to start the checksum, and
1469 		 * copy the remainder, checksumming as we go.
1470 		 */
1471 		if (__probable(do_sum && start != 0)) {
1472 			ASSERT(M_TRAILINGSPACE(m) >= start);
1473 			ASSERT(m->m_len == 0);
1474 			dp = (uint8_t *)m_mtod_current(m);
1475 			_pkt_copy(baddr, dp, start);
1476 			remaining_len -= start;
1477 			copied_len += start;
1478 			m->m_len += start;
1479 			m->m_pkthdr.len += start;
1480 		}
1481 		curr_m = m;
1482 		while (curr_m != NULL && remaining_len != 0) {
1483 			uint32_t tmp_len = MIN(remaining_len,
1484 			    (uint32_t)M_TRAILINGSPACE(curr_m));
1485 			dp = (uint8_t *)m_mtod_end(curr_m);
1486 			if (__probable(do_sum)) {
1487 				partial = __packet_copy_and_sum((baddr + copied_len),
1488 				    dp, tmp_len, partial);
1489 			} else {
1490 				_pkt_copy((baddr + copied_len), dp, tmp_len);
1491 			}
1492 
1493 			curr_m->m_len += tmp_len;
1494 			m->m_pkthdr.len += tmp_len;
1495 			copied_len += tmp_len;
1496 			remaining_len -= tmp_len;
1497 			curr_m = curr_m->m_next;
1498 		}
1499 		ASSERT(remaining_len == 0);
1500 
1501 		if (__probable(do_sum)) {
1502 			csum = __packet_fold_sum(partial);
1503 
1504 			m->m_pkthdr.csum_flags |=
1505 			    (CSUM_DATA_VALID | CSUM_PARTIAL);
1506 			m->m_pkthdr.csum_rx_start = start;
1507 			m->m_pkthdr.csum_rx_val = csum;
1508 		} else {
1509 			m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1510 			m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1511 			_CASSERT(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1512 			m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1513 			if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1514 				m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1515 			}
1516 		}
1517 
1518 		/* translate packet metadata */
1519 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1520 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1521 
1522 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1523 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1524 		    sk_proc_name_address(current_proc()),
1525 		    sk_proc_pid(current_proc()), len,
1526 		    (copysum ? (len - start) : 0), csum, start);
1527 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1528 		    "   mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1529 		    SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1530 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1531 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1532 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1533 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1534 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1535 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1536 		    (uint32_t)pkt->pkt_csum_rx_value);
1537 		break;
1538 
1539 	case NR_TX:
1540 		dp = (uint8_t *)m_mtod_current(m);
1541 		ASSERT(m->m_next == NULL);
1542 
1543 		VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1544 		    (uint32_t)mbuf_maxlen(m));
1545 		m->m_len += len;
1546 		m->m_pkthdr.len += len;
1547 		VERIFY(m->m_len == m->m_pkthdr.len &&
1548 		    (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1549 
1550 		if (copysum) {
1551 			uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1552 			/*
1553 			 * Use pkt_copy() to copy the portion up to the
1554 			 * point where we need to start the checksum, and
1555 			 * copy the remainder, checksumming as we go.
1556 			 */
1557 			if (__probable(start != 0)) {
1558 				_pkt_copy(baddr, dp, start);
1559 			}
1560 			partial = __packet_copy_and_sum((baddr + start),
1561 			    (dp + start), (len - start), 0);
1562 			csum = __packet_fold_sum_final(partial);
1563 
1564 			/* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1565 			if (csum == 0 &&
1566 			    (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1567 				csum = 0xffff;
1568 			}
1569 
1570 			/* Insert checksum into packet */
1571 			ASSERT(stuff <= (len - sizeof(csum)));
1572 			if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1573 				*(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1574 			} else {
1575 				bcopy((void *)&csum, dp + stuff, sizeof(csum));
1576 			}
1577 		} else {
1578 			_pkt_copy(baddr, dp, len);
1579 		}
1580 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1581 		m->m_pkthdr.csum_tx_start = 0;
1582 		m->m_pkthdr.csum_tx_stuff = 0;
1583 		m->m_pkthdr.csum_flags |= _convert_pkt_csum_flags(pkt->pkt_csum_flags);
1584 
1585 		/* translate packet metadata */
1586 		m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1587 		m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1588 		m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1589 		m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1590 		m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1591 		m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1592 		m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1593 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1594 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1595 		m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1596 		m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1597 		if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1598 			m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1599 		}
1600 		if ((pkt->pkt_pflags & PKT_F_L4S) != 0) {
1601 			m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_L4S;
1602 		}
1603 		if (__improbable(copy_pkt_tx_time != 0 &&
1604 		    (pkt->pkt_pflags & PKT_F_OPT_TX_TIMESTAMP) != 0)) {
1605 			struct m_tag *tag = NULL;
1606 			tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM,
1607 			    sizeof(uint64_t), M_WAITOK, m);
1608 			if (tag != NULL) {
1609 				m_tag_prepend(m, tag);
1610 				*(uint64_t *)tag->m_tag_data = pkt->pkt_com_opt->__po_pkt_tx_time;
1611 			}
1612 		}
1613 		m->m_pkthdr.necp_mtag.necp_policy_id = pkt->pkt_policy_id;
1614 		m->m_pkthdr.necp_mtag.necp_skip_policy_id = pkt->pkt_skip_policy_id;
1615 
1616 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1617 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1618 		    sk_proc_name_address(current_proc()),
1619 		    sk_proc_pid(current_proc()), len,
1620 		    (copysum ? (len - start) : 0), csum, start);
1621 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1622 		    "   pkt  0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1623 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1624 		    (uint32_t)pkt->pkt_csum_tx_start_off,
1625 		    (uint32_t)pkt->pkt_csum_tx_stuff_off);
1626 		break;
1627 
1628 	default:
1629 		VERIFY(0);
1630 		/* NOTREACHED */
1631 		__builtin_unreachable();
1632 	}
1633 
1634 	if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1635 		m->m_flags |= M_BCAST;
1636 	} else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1637 		m->m_flags |= M_MCAST;
1638 	}
1639 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1640 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1641 	    (t == NR_RX) ? "RX" : "TX",
1642 	    sk_dump("buf", (uint8_t *)dp, m->m_len, 128, NULL, 0));
1643 }
1644 
1645 /*
1646  * This is a multi-buflet variant of pkt_copy_to_mbuf().
1647  * NOTE: poff is the offset within the packet.
1648  *
1649  * This routine supports copying into an mbuf chain for RX but not TX.
1650  *
1651  * start/stuff is relative to poff, within [0, len], such that
1652  * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1653  */
1654 void
pkt_copy_multi_buflet_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1655 pkt_copy_multi_buflet_to_mbuf(const enum txrx t, kern_packet_t ph,
1656     const uint16_t poff, struct mbuf *m, const uint16_t moff,
1657     const uint32_t len, const boolean_t copysum, const uint16_t start)
1658 {
1659 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1660 	struct mbuf *curr_m;
1661 	uint32_t partial = 0;
1662 	uint32_t remaining_len = len, copied_len = 0;
1663 	uint16_t csum = 0;
1664 	uint8_t *baddr;
1665 	uint8_t *dp;
1666 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1667 
1668 	ASSERT(len >= start);
1669 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
1670 
1671 	/* get buffer address from packet */
1672 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1673 	ASSERT(baddr != NULL);
1674 	baddr += poff;
1675 	VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp) *
1676 	    __packet_get_buflet_count(ph)));
1677 
1678 	ASSERT((m->m_flags & M_PKTHDR));
1679 	m->m_data += moff;
1680 
1681 	switch (t) {
1682 	case NR_RX:
1683 		m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1684 		if (__probable(do_sum && start != 0)) {
1685 			ASSERT(M_TRAILINGSPACE(m) >= start);
1686 			ASSERT(m->m_len == 0);
1687 			dp = (uint8_t *)m_mtod_current(m);
1688 			_pkt_copy(baddr, dp, start);
1689 			remaining_len -= start;
1690 			copied_len += start;
1691 			m->m_len += start;
1692 			m->m_pkthdr.len += start;
1693 		}
1694 		curr_m = m;
1695 		while (curr_m != NULL && remaining_len != 0) {
1696 			uint32_t tmp_len = MIN(remaining_len,
1697 			    (uint32_t)M_TRAILINGSPACE(curr_m));
1698 			uint16_t soff = poff + (uint16_t)copied_len;
1699 			dp = (uint8_t *)m_mtod_end(curr_m);
1700 
1701 			if (__probable(do_sum)) {
1702 				partial = _pkt_copyaddr_sum(ph, soff,
1703 				    dp, tmp_len, TRUE, partial, NULL);
1704 			} else {
1705 				pkt_copyaddr_sum(ph, soff,
1706 				    dp, tmp_len, FALSE, 0, NULL);
1707 			}
1708 
1709 			curr_m->m_len += tmp_len;
1710 			m->m_pkthdr.len += tmp_len;
1711 			copied_len += tmp_len;
1712 			remaining_len -= tmp_len;
1713 			curr_m = curr_m->m_next;
1714 		}
1715 		ASSERT(remaining_len == 0);
1716 
1717 		if (__probable(do_sum)) {
1718 			csum = __packet_fold_sum(partial);
1719 
1720 			m->m_pkthdr.csum_flags |=
1721 			    (CSUM_DATA_VALID | CSUM_PARTIAL);
1722 			m->m_pkthdr.csum_rx_start = start;
1723 			m->m_pkthdr.csum_rx_val = csum;
1724 		} else {
1725 			m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1726 			m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1727 			_CASSERT(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1728 			m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1729 			if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1730 				m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1731 			}
1732 		}
1733 
1734 		m->m_pkthdr.necp_mtag.necp_policy_id = pkt->pkt_policy_id;
1735 		m->m_pkthdr.necp_mtag.necp_skip_policy_id = pkt->pkt_skip_policy_id;
1736 
1737 		/* translate packet metadata */
1738 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1739 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1740 
1741 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1742 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1743 		    sk_proc_name_address(current_proc()),
1744 		    sk_proc_pid(current_proc()), len,
1745 		    (copysum ? (len - start) : 0), csum, start);
1746 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1747 		    "   mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1748 		    SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1749 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1750 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1751 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1752 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1753 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1754 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1755 		    (uint32_t)pkt->pkt_csum_rx_value);
1756 		break;
1757 	case NR_TX:
1758 		dp = (uint8_t *)m_mtod_current(m);
1759 		ASSERT(m->m_next == NULL);
1760 		VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1761 		    (uint32_t)mbuf_maxlen(m));
1762 		m->m_len += len;
1763 		m->m_pkthdr.len += len;
1764 		VERIFY(m->m_len == m->m_pkthdr.len &&
1765 		    (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1766 		if (copysum) {
1767 			uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1768 			/*
1769 			 * Use pkt_copy() to copy the portion up to the
1770 			 * point where we need to start the checksum, and
1771 			 * copy the remainder, checksumming as we go.
1772 			 */
1773 			if (__probable(start != 0)) {
1774 				_pkt_copy(baddr, dp, start);
1775 			}
1776 			partial = _pkt_copyaddr_sum(ph, (poff + start),
1777 			    (dp + start), (len - start), TRUE, 0, NULL);
1778 			csum = __packet_fold_sum_final(partial);
1779 
1780 			/* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1781 			if (csum == 0 &&
1782 			    (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1783 				csum = 0xffff;
1784 			}
1785 
1786 			/* Insert checksum into packet */
1787 			ASSERT(stuff <= (len - sizeof(csum)));
1788 			if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1789 				*(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1790 			} else {
1791 				bcopy((void *)&csum, dp + stuff, sizeof(csum));
1792 			}
1793 		} else {
1794 			(void) _pkt_copyaddr_sum(ph, poff, dp, len, FALSE, 0, NULL);
1795 		}
1796 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1797 		m->m_pkthdr.csum_tx_start = 0;
1798 		m->m_pkthdr.csum_tx_stuff = 0;
1799 		m->m_pkthdr.csum_flags |= _convert_pkt_csum_flags(pkt->pkt_csum_flags);
1800 
1801 		/* translate packet metadata */
1802 		m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1803 		m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1804 		m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1805 		m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1806 		m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1807 		m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1808 		m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1809 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1810 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1811 		m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1812 		m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1813 		if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1814 			m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1815 		}
1816 		if ((pkt->pkt_pflags & PKT_F_L4S) != 0) {
1817 			m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_L4S;
1818 		}
1819 		if (__improbable(copy_pkt_tx_time != 0 &&
1820 		    (pkt->pkt_pflags & PKT_F_OPT_TX_TIMESTAMP) != 0)) {
1821 			struct m_tag *tag = NULL;
1822 			tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM,
1823 			    sizeof(uint64_t), M_WAITOK, m);
1824 			if (tag != NULL) {
1825 				m_tag_prepend(m, tag);
1826 				*(uint64_t *)tag->m_tag_data = pkt->pkt_com_opt->__po_pkt_tx_time;
1827 			}
1828 		}
1829 
1830 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1831 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1832 		    sk_proc_name_address(current_proc()),
1833 		    sk_proc_pid(current_proc()), len,
1834 		    (copysum ? (len - start) : 0), csum, start);
1835 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1836 		    "   pkt  0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1837 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1838 		    (uint32_t)pkt->pkt_csum_tx_start_off,
1839 		    (uint32_t)pkt->pkt_csum_tx_stuff_off);
1840 		break;
1841 
1842 	default:
1843 		VERIFY(0);
1844 		/* NOTREACHED */
1845 		__builtin_unreachable();
1846 	}
1847 
1848 	if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1849 		m->m_flags |= M_BCAST;
1850 	} else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1851 		m->m_flags |= M_MCAST;
1852 	}
1853 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1854 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1855 	    (t == NR_RX) ? "RX" : "TX",
1856 	    sk_dump("buf", (uint8_t *)dp, m->m_len, 128, NULL, 0));
1857 }
1858 
1859 /*
1860  * Like m_copydata(), but computes 16-bit sum as the data is copied.
1861  * Caller can provide an initial sum to be folded into the computed
1862  * sum.  The accumulated partial sum (32-bit) is returned to caller;
1863  * caller is responsible for further reducing it to 16-bit if needed,
1864  * as well as to perform the final 1's complement on it.
1865  */
1866 uint32_t
m_copydata_sum(struct mbuf * m,int off,int len,void * __sized_by (len)vp,uint32_t initial_sum,boolean_t * odd_start)1867 m_copydata_sum(struct mbuf *m, int off, int len, void *__sized_by(len)vp, uint32_t initial_sum,
1868     boolean_t *odd_start)
1869 {
1870 	boolean_t needs_swap, started_on_odd = FALSE;
1871 	int off0 = off, len0 = len;
1872 	struct mbuf *m0 = m;
1873 	uint64_t sum, partial;
1874 	unsigned count, odd;
1875 	char *cp = vp;
1876 
1877 	if (__improbable(off < 0 || len < 0)) {
1878 		panic("%s: invalid offset %d or len %d", __func__, off, len);
1879 		/* NOTREACHED */
1880 		__builtin_unreachable();
1881 	}
1882 
1883 	while (off > 0) {
1884 		if (__improbable(m == NULL)) {
1885 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
1886 			    __func__, m0, off0, len0);
1887 			/* NOTREACHED */
1888 			__builtin_unreachable();
1889 		}
1890 		if (off < m->m_len) {
1891 			break;
1892 		}
1893 		off -= m->m_len;
1894 		m = m->m_next;
1895 	}
1896 
1897 	if (odd_start) {
1898 		started_on_odd = *odd_start;
1899 	}
1900 	sum = initial_sum;
1901 
1902 	for (; len0 > 0; m = m->m_next) {
1903 		uint8_t *datap;
1904 
1905 		if (__improbable(m == NULL)) {
1906 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
1907 			    __func__, m0, off0, len);
1908 			/* NOTREACHED */
1909 			__builtin_unreachable();
1910 		}
1911 
1912 		datap = mtod(m, uint8_t *) + off;
1913 		count = m->m_len;
1914 
1915 		if (__improbable(count == 0)) {
1916 			continue;
1917 		}
1918 
1919 		count = MIN(count - off, (unsigned)len0);
1920 		partial = 0;
1921 
1922 		if ((uintptr_t)datap & 1) {
1923 			/* Align on word boundary */
1924 			started_on_odd = !started_on_odd;
1925 #if BYTE_ORDER == LITTLE_ENDIAN
1926 			partial = *datap << 8;
1927 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1928 			partial = *datap;
1929 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1930 			*cp++ = *datap++;
1931 			count -= 1;
1932 			len0 -= 1;
1933 		}
1934 
1935 		needs_swap = started_on_odd;
1936 		odd = count & 1u;
1937 		count -= odd;
1938 
1939 		if (count) {
1940 			partial = __packet_copy_and_sum(datap,
1941 			    cp, count, (uint32_t)partial);
1942 			datap += count;
1943 			cp += count;
1944 			len0 -= count;
1945 			if (__improbable((partial & (3ULL << 62)) != 0)) {
1946 				if (needs_swap) {
1947 					partial = (partial << 8) +
1948 					    (partial >> 56);
1949 				}
1950 				sum += (partial >> 32);
1951 				sum += (partial & 0xffffffff);
1952 				partial = 0;
1953 			}
1954 		}
1955 
1956 		if (odd) {
1957 #if BYTE_ORDER == LITTLE_ENDIAN
1958 			partial += *datap;
1959 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1960 			partial += *datap << 8;
1961 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1962 			*cp++ = *datap++;
1963 			len0 -= 1;
1964 			started_on_odd = !started_on_odd;
1965 		}
1966 		off = 0;
1967 
1968 		if (needs_swap) {
1969 			partial = (partial << 8) + (partial >> 24);
1970 		}
1971 		sum += (partial >> 32) + (partial & 0xffffffff);
1972 		/*
1973 		 * Reduce sum to allow potential byte swap
1974 		 * in the next iteration without carry.
1975 		 */
1976 		sum = (sum >> 32) + (sum & 0xffffffff);
1977 	}
1978 
1979 	if (odd_start) {
1980 		*odd_start = started_on_odd;
1981 	}
1982 
1983 	/* Final fold (reduce 64-bit to 32-bit) */
1984 	sum = (sum >> 32) + (sum & 0xffffffff); /* 33-bit */
1985 	sum = (sum >> 16) + (sum & 0xffff);     /* 17-bit + carry */
1986 
1987 	/* return 32-bit partial sum to caller */
1988 	return (uint32_t)sum;
1989 }
1990 
1991 #if DEBUG || DEVELOPMENT
1992 #define TRAILERS_MAX    16              /* max trailing bytes */
1993 #define TRAILERS_REGEN  (64 * 1024)     /* regeneration threshold */
1994 static uint8_t tb[TRAILERS_MAX];        /* random trailing bytes */
1995 static uint32_t regen = TRAILERS_REGEN; /* regeneration counter */
1996 
1997 uint32_t
pkt_add_trailers(kern_packet_t ph,const uint32_t len,const uint16_t start)1998 pkt_add_trailers(kern_packet_t ph, const uint32_t len, const uint16_t start)
1999 {
2000 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
2001 	uint32_t extra;
2002 	uint8_t *baddr;
2003 
2004 	/* get buffer address from packet */
2005 	MD_BUFLET_ADDR_ABS(pkt, baddr);
2006 	ASSERT(baddr != NULL);
2007 	ASSERT(len <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
2008 
2009 	extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
2010 	if (extra == 0 || extra > sizeof(tb) ||
2011 	    (len + extra) > PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp)) {
2012 		return 0;
2013 	}
2014 
2015 	/* generate random bytes once per TRAILERS_REGEN packets (approx.) */
2016 	if (regen++ == TRAILERS_REGEN) {
2017 		read_frandom(&tb[0], sizeof(tb));
2018 		regen = 0;
2019 	}
2020 
2021 	bcopy(&tb[0], (baddr + len), extra);
2022 
2023 	/* recompute partial sum (also to exercise related logic) */
2024 	pkt->pkt_csum_flags |= PACKET_CSUM_PARTIAL;
2025 	pkt->pkt_csum_rx_value = (uint16_t)__packet_cksum((baddr + start),
2026 	    ((len + extra) - start), 0);
2027 	pkt->pkt_csum_rx_start_off = start;
2028 
2029 	return extra;
2030 }
2031 
2032 uint32_t
pkt_add_trailers_mbuf(struct mbuf * m,const uint16_t start)2033 pkt_add_trailers_mbuf(struct mbuf *m, const uint16_t start)
2034 {
2035 	uint32_t extra;
2036 
2037 	extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
2038 	if (extra == 0 || extra > sizeof(tb)) {
2039 		return 0;
2040 	}
2041 
2042 	if (mbuf_copyback(m, m_pktlen(m), extra, &tb[0], M_NOWAIT) != 0) {
2043 		return 0;
2044 	}
2045 
2046 	/* generate random bytes once per TRAILERS_REGEN packets (approx.) */
2047 	if (regen++ == TRAILERS_REGEN) {
2048 		read_frandom(&tb[0], sizeof(tb));
2049 		regen = 0;
2050 	}
2051 
2052 	/* recompute partial sum (also to exercise related logic) */
2053 	m->m_pkthdr.csum_rx_val = m_sum16(m, start, (m_pktlen(m) - start));
2054 	m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
2055 	m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
2056 	m->m_pkthdr.csum_rx_start = start;
2057 
2058 	return extra;
2059 }
2060 #endif /* DEBUG || DEVELOPMENT */
2061 
2062 void
pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint16_t len,uint32_t * partial,boolean_t do_csum)2063 pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
2064     uint16_t doff, uint16_t len, uint32_t *partial, boolean_t do_csum)
2065 {
2066 	VERIFY(_pkt_copypkt_sum(sph, soff, dph, doff, len, partial, do_csum));
2067 }
2068 
2069 uint32_t
pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * __sized_by (len)dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)2070 pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *__sized_by(len)dbaddr,
2071     uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
2072 {
2073 	return _pkt_copyaddr_sum(sph, soff, dbaddr, len, do_csum, initial_sum, odd_start);
2074 }
2075 
2076 uint32_t
pkt_mcopypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint16_t len,boolean_t do_cscum)2077 pkt_mcopypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
2078     uint16_t len, boolean_t do_cscum)
2079 {
2080 	return m_copypkt_sum(m, soff, dph, doff, len, do_cscum);
2081 }
2082 
2083 void
pkt_copy(void * __sized_by (len)src,void * __sized_by (len)dst,size_t len)2084 pkt_copy(void *__sized_by(len)src, void *__sized_by(len)dst, size_t len)
2085 {
2086 	return _pkt_copy(src, dst, len);
2087 }
2088