xref: /xnu-11417.140.69/bsd/skywalk/packet/packet_copy.c (revision 43a90889846e00bfb5cf1d255cdc0a701a1e05a4)
1 /*
2  * Copyright (c) 2017-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 #include <machine/endian.h>
31 #include <net/necp.h>
32 
33 uint32_t copy_pkt_tx_time = 1;
34 #if (DEVELOPMENT || DEBUG)
35 SYSCTL_NODE(_kern_skywalk, OID_AUTO, packet,
36     CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Skywalk packet");
37 int pkt_trailers = 0; /* for testing trailing bytes */
38 SYSCTL_INT(_kern_skywalk_packet, OID_AUTO, trailers,
39     CTLFLAG_RW | CTLFLAG_LOCKED, &pkt_trailers, 0, "");
40 
41 SYSCTL_UINT(_kern_skywalk_packet, OID_AUTO, copy_pkt_tx_time,
42     CTLFLAG_RW | CTLFLAG_LOCKED, &copy_pkt_tx_time, 0,
43     "copy tx time from pkt to mbuf");
44 #endif /* !DEVELOPMENT && !DEBUG */
45 
46 
47 __attribute__((always_inline))
48 static inline void
_pkt_copy(void * __sized_by (len)src,void * __sized_by (len)dst,size_t len)49 _pkt_copy(void *__sized_by(len)src, void *__sized_by(len)dst, size_t len)
50 {
51 	if (__probable(IS_P2ALIGNED(src, 8) && IS_P2ALIGNED(dst, 8))) {
52 		switch (len) {
53 		case 20:        /* standard IPv4 header */
54 			sk_copy64_20(src, dst);
55 			return;
56 
57 		case 40:        /* IPv6 header */
58 			sk_copy64_40(src, dst);
59 			return;
60 
61 		default:
62 			if (IS_P2ALIGNED(len, 64)) {
63 				sk_copy64_64x(src, dst, len);
64 				return;
65 			} else if (IS_P2ALIGNED(len, 32)) {
66 				sk_copy64_32x(src, dst, len);
67 				return;
68 			} else if (IS_P2ALIGNED(len, 8)) {
69 				sk_copy64_8x(src, dst, len);
70 				return;
71 			} else if (IS_P2ALIGNED(len, 4)) {
72 				sk_copy64_4x(src, dst, len);
73 				return;
74 			}
75 			break;
76 		}
77 	}
78 	bcopy(src, dst, len);
79 }
80 
81 /*
82  * This routine is used for copying data across two kernel packets.
83  * Can also optionally compute 16-bit partial inet checksum as the
84  * data is copied.
85  * This routine is used by flowswitch while copying packet from vp
86  * adapter pool to packet in native netif pool and vice-a-versa.
87  *
88  * start/stuff is relative to soff, within [0, len], such that
89  * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
90  */
91 void
pkt_copy_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)92 pkt_copy_from_pkt(const enum txrx t, kern_packet_t dph, const uint16_t doff,
93     kern_packet_t sph, const uint16_t soff, const uint32_t len,
94     const boolean_t copysum, const uint16_t start, const uint16_t stuff,
95     const boolean_t invert)
96 {
97 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
98 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
99 	uint32_t partial;
100 	uint16_t csum = 0;
101 	uint8_t *sbaddr, *dbaddr;
102 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
103 
104 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
105 
106 	/* get buffer address from packet */
107 	MD_BUFLET_ADDR_ABS(spkt, sbaddr);
108 	ASSERT(sbaddr != NULL);
109 	sbaddr += soff;
110 	MD_BUFLET_ADDR_ABS(dpkt, dbaddr);
111 	ASSERT(dbaddr != NULL);
112 	dbaddr += doff;
113 	VERIFY((doff + len) <= PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
114 
115 	switch (t) {
116 	case NR_RX:
117 		dpkt->pkt_csum_flags = 0;
118 		if (__probable(do_sum)) {
119 			/*
120 			 * Use pkt_copy() to copy the portion up to the
121 			 * point where we need to start the checksum, and
122 			 * copy the remainder, checksumming as we go.
123 			 */
124 			if (__probable(start != 0)) {
125 				_pkt_copy(sbaddr, dbaddr, start);
126 			}
127 			partial = __packet_copy_and_sum((sbaddr + start),
128 			    (dbaddr + start), (len - start), 0);
129 			csum = __packet_fold_sum(partial);
130 
131 			__packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
132 			    start, csum, FALSE);
133 		} else {
134 			_pkt_copy(sbaddr, dbaddr, len);
135 			dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
136 			dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
137 			dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
138 		}
139 
140 		SK_DF(SK_VERB_COPY | SK_VERB_RX,
141 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
142 		    sk_proc_name_address(current_proc()),
143 		    sk_proc_pid(current_proc()), len,
144 		    (copysum ? (len - start) : 0), csum, start);
145 		SK_DF(SK_VERB_COPY | SK_VERB_RX,
146 		    "   pkt  0x%llx doff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
147 		    SK_KVA(dpkt), doff, dpkt->pkt_csum_flags,
148 		    (uint32_t)dpkt->pkt_csum_rx_start_off,
149 		    (uint32_t)dpkt->pkt_csum_rx_value);
150 		break;
151 
152 	case NR_TX:
153 		if (copysum) {
154 			/*
155 			 * Use pkt_copy() to copy the portion up to the
156 			 * point where we need to start the checksum, and
157 			 * copy the remainder, checksumming as we go.
158 			 */
159 			if (__probable(start != 0)) {
160 				_pkt_copy(sbaddr, dbaddr, start);
161 			}
162 			partial = __packet_copy_and_sum((sbaddr + start),
163 			    (dbaddr + start), (len - start), 0);
164 			csum = __packet_fold_sum_final(partial);
165 
166 			/* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
167 			if (csum == 0 && invert) {
168 				csum = 0xffff;
169 			}
170 
171 			/* Insert checksum into packet */
172 			ASSERT(stuff <= (len - sizeof(csum)));
173 			if (IS_P2ALIGNED(dbaddr + stuff, sizeof(csum))) {
174 				*(uint16_t *)(uintptr_t)(dbaddr + stuff) = csum;
175 			} else {
176 				bcopy((void *)&csum, dbaddr + stuff,
177 				    sizeof(csum));
178 			}
179 		} else {
180 			_pkt_copy(sbaddr, dbaddr, len);
181 		}
182 		dpkt->pkt_csum_flags = spkt->pkt_csum_flags &
183 		    (PACKET_CSUM_TSO_FLAGS | PACKET_TX_CSUM_OFFLOAD_FLAGS);
184 		dpkt->pkt_csum_tx_start_off = 0;
185 		dpkt->pkt_csum_tx_stuff_off = 0;
186 
187 		SK_DF(SK_VERB_COPY | SK_VERB_TX,
188 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u, flags %u",
189 		    sk_proc_name_address(current_proc()),
190 		    sk_proc_pid(current_proc()), len,
191 		    (copysum ? (len - start) : 0), csum, start, dpkt->pkt_csum_flags);
192 		break;
193 
194 	default:
195 		VERIFY(0);
196 		/* NOTREACHED */
197 		__builtin_unreachable();
198 	}
199 	METADATA_ADJUST_LEN(dpkt, len, doff);
200 
201 	SK_DF(SK_VERB_COPY | SK_VERB_DUMP, "%s(%d) %s %s",
202 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
203 	    (t == NR_RX) ? "RX" : "TX",
204 	    sk_dump("buf", dbaddr, len, 128, NULL, 0));
205 }
206 
207 /*
208  * NOTE: soff is the offset within the packet
209  * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
210  * caller is responsible for further reducing it to 16-bit if needed,
211  * as well as to perform the final 1's complement on it.
212  */
213 uint32_t static inline
_pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * __sized_by (len)dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)214 _pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *__sized_by(len)dbaddr,
215     uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
216 {
217 	uint8_t odd = 0;
218 	uint8_t *sbaddr = NULL;
219 	uint32_t sum = initial_sum, partial;
220 	uint32_t len0 = len;
221 	boolean_t needs_swap, started_on_odd = FALSE;
222 	uint16_t sbcnt, off0 = soff;
223 	uint32_t clen, sboff, sblen;
224 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
225 	kern_buflet_t sbuf = NULL, sbufp = NULL;
226 
227 	sbcnt = __packet_get_buflet_count(sph);
228 
229 	if (odd_start) {
230 		started_on_odd = *odd_start;
231 	}
232 
233 	/* fastpath (copy+sum, single buflet, even aligned, even length) */
234 	if (do_csum && sbcnt == 1 && len != 0) {
235 		PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
236 		ASSERT(sbuf != NULL);
237 		sboff = __buflet_get_data_offset(sbuf);
238 		sblen = __buflet_get_data_length(sbuf);
239 		ASSERT(sboff <= soff);
240 		ASSERT(soff < sboff + sblen);
241 		sblen -= (soff - sboff);
242 		sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
243 
244 		clen = (uint16_t)MIN(len, sblen);
245 
246 		if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
247 			sum = __packet_copy_and_sum(sbaddr, dbaddr, clen, sum);
248 			return __packet_fold_sum(sum);
249 		}
250 
251 		sbaddr = NULL;
252 		sbuf = sbufp = NULL;
253 	}
254 
255 	while (len != 0) {
256 		PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
257 		if (__improbable(sbuf == NULL)) {
258 			panic("%s: bad packet, 0x%llx [off %d, len %d]",
259 			    __func__, SK_KVA(spkt), off0, len0);
260 			/* NOTREACHED */
261 			__builtin_unreachable();
262 		}
263 		sbufp = sbuf;
264 		sboff = __buflet_get_data_offset(sbuf);
265 		sblen = __buflet_get_data_length(sbuf);
266 		ASSERT((sboff <= soff) && (soff < sboff + sblen));
267 		sblen -= (soff - sboff);
268 		sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
269 		soff = 0;
270 		clen = (uint16_t)MIN(len, sblen);
271 		if (__probable(do_csum)) {
272 			partial = 0;
273 			if (__improbable((uintptr_t)sbaddr & 1)) {
274 				/* Align on word boundary */
275 				started_on_odd = !started_on_odd;
276 #if BYTE_ORDER == LITTLE_ENDIAN
277 				partial = (uint8_t)*sbaddr << 8;
278 #else /* BYTE_ORDER != LITTLE_ENDIAN */
279 				partial = (uint8_t)*sbaddr;
280 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
281 				/*
282 				 * -fbounds-safety: *dbaddr++ = *sbaddr++ fails
283 				 * to compile. But the following works. Also,
284 				 * grouping dbaddr and len updates led to higher
285 				 * throughput performance, compared to doing
286 				 * dbaddr++; sbaddr++; len -= 1; in that order.
287 				 */
288 				*dbaddr = *sbaddr;
289 				dbaddr++;
290 				sblen -= 1;
291 				clen -= 1;
292 				len -= 1;
293 				sbaddr++;
294 			}
295 			needs_swap = started_on_odd;
296 
297 			odd = clen & 1u;
298 			clen -= odd;
299 
300 			if (clen != 0) {
301 				partial = __packet_copy_and_sum(sbaddr, dbaddr,
302 				    clen, partial);
303 			}
304 
305 			if (__improbable(partial & 0xc0000000)) {
306 				if (needs_swap) {
307 					partial = (partial << 8) +
308 					    (partial >> 24);
309 				}
310 				sum += (partial >> 16);
311 				sum += (partial & 0xffff);
312 				partial = 0;
313 			}
314 		} else {
315 			_pkt_copy(sbaddr, dbaddr, clen);
316 		}
317 
318 		dbaddr += clen;
319 
320 		/*
321 		 * -fbounds-safety: the following 3 lines were moved up from
322 		 * after the if-block. None of these are modified in the
323 		 * if-block, so moving these up here shouldn't change the
324 		 * behavior. Also, updating len before updating sbaddr led to
325 		 * faster throughput than doing: dbaddr += clen; sbaddr += clen;
326 		 * len -= clen + odd;
327 		 */
328 		sblen -= clen + odd;
329 		len -= clen + odd;
330 		ASSERT(sblen == 0 || len == 0);
331 
332 		sbaddr += clen;
333 
334 		if (__probable(do_csum)) {
335 			if (odd != 0) {
336 #if BYTE_ORDER == LITTLE_ENDIAN
337 				partial += (uint8_t)*sbaddr;
338 #else /* BYTE_ORDER != LITTLE_ENDIAN */
339 				partial += (uint8_t)*sbaddr << 8;
340 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
341 				*dbaddr++ = *sbaddr++;
342 				started_on_odd = !started_on_odd;
343 			}
344 
345 			if (needs_swap) {
346 				partial = (partial << 8) + (partial >> 24);
347 			}
348 			sum += (partial >> 16) + (partial & 0xffff);
349 			/*
350 			 * Reduce sum to allow potential byte swap
351 			 * in the next iteration without carry.
352 			 */
353 			sum = (sum >> 16) + (sum & 0xffff);
354 		}
355 	}
356 
357 	if (odd_start) {
358 		*odd_start = started_on_odd;
359 	}
360 
361 	if (__probable(do_csum)) {
362 		/* Final fold (reduce 32-bit to 16-bit) */
363 		sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
364 		sum = (sum >> 16) + (sum & 0xffff);
365 	}
366 	return sum;
367 }
368 
369 /*
370  * NOTE: Caller of this function is responsible to adjust the length and offset
371  * of the first buflet of the destination packet if (doff != 0),
372  * i.e. additional data is being prependend to the packet.
373  * It should also finalize the packet.
374  * To simplify & optimize the routine, we have also assumed that soff & doff
375  * will lie within the first buffer, which is true for the current use cases
376  * where, doff is the offset of the checksum field in the TCP/IP header and
377  * soff is the L3 offset.
378  * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
379  * caller is responsible for further reducing it to 16-bit if needed,
380  * as well as to perform the final 1's complement on it.
381  */
382 static inline boolean_t
_pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint32_t len,uint32_t * csum_partial,boolean_t do_csum)383 _pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
384     uint16_t doff, uint32_t len, uint32_t *csum_partial, boolean_t do_csum)
385 {
386 	uint8_t odd = 0;
387 	uint32_t sum = 0, partial;
388 	boolean_t needs_swap, started_on_odd = FALSE;
389 	uint8_t *sbaddr = NULL, *dbaddr = NULL;
390 	uint16_t sbcnt, dbcnt;
391 	uint32_t clen, dlen0, sboff, sblen, dlim;
392 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
393 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
394 	kern_buflet_t sbuf = NULL, sbufp = NULL, dbuf = NULL, dbufp = NULL;
395 
396 	ASSERT(csum_partial != NULL || !do_csum);
397 	sbcnt = __packet_get_buflet_count(sph);
398 	dbcnt = __packet_get_buflet_count(dph);
399 
400 	while (len != 0) {
401 		ASSERT(sbaddr == NULL || dbaddr == NULL);
402 		if (sbaddr == NULL) {
403 			PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
404 			if (__improbable(sbuf == NULL)) {
405 				break;
406 			}
407 			sbufp = sbuf;
408 			sblen = __buflet_get_data_length(sbuf);
409 			sboff = __buflet_get_data_offset(sbuf);
410 			ASSERT(soff >= sboff);
411 			ASSERT(sboff + sblen > soff);
412 			sblen -= (soff - sboff);
413 			sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
414 			soff = 0;
415 		}
416 
417 		if (dbaddr == NULL) {
418 			if (dbufp != NULL) {
419 				__buflet_set_data_length(dbufp, dlen0);
420 			}
421 
422 			PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
423 			if (__improbable(dbuf == NULL)) {
424 				break;
425 			}
426 			dbufp = dbuf;
427 			dlim = __buflet_get_data_limit(dbuf);
428 			ASSERT(dlim > doff);
429 			dlim -= doff;
430 			if (doff != 0) {
431 				VERIFY(__buflet_set_data_offset(dbuf, doff) == 0);
432 			}
433 			dbaddr = (uint8_t *)__buflet_get_data_address(dbuf) + doff;
434 			dlen0 = dlim;
435 			doff = 0;
436 		}
437 
438 		clen = MIN(len, sblen);
439 		clen = MIN(clen, dlim);
440 
441 		if (__probable(do_csum)) {
442 			partial = 0;
443 			if (__improbable((uintptr_t)sbaddr & 1)) {
444 				/* Align on word boundary */
445 				started_on_odd = !started_on_odd;
446 #if BYTE_ORDER == LITTLE_ENDIAN
447 				partial = (uint8_t)*sbaddr << 8;
448 #else /* BYTE_ORDER != LITTLE_ENDIAN */
449 				partial = (uint8_t)*sbaddr;
450 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
451 				*dbaddr++ = *sbaddr++;
452 				clen -= 1;
453 				dlim -= 1;
454 				len -= 1;
455 			}
456 			needs_swap = started_on_odd;
457 
458 			odd = clen & 1u;
459 			clen -= odd;
460 
461 			if (clen != 0) {
462 				partial = __packet_copy_and_sum(sbaddr, dbaddr,
463 				    clen, partial);
464 			}
465 
466 			if (__improbable(partial & 0xc0000000)) {
467 				if (needs_swap) {
468 					partial = (partial << 8) +
469 					    (partial >> 24);
470 				}
471 				sum += (partial >> 16);
472 				sum += (partial & 0xffff);
473 				partial = 0;
474 			}
475 		} else {
476 			_pkt_copy(sbaddr, dbaddr, clen);
477 		}
478 		sbaddr += clen;
479 		dbaddr += clen;
480 
481 		if (__probable(do_csum)) {
482 			if (odd != 0) {
483 #if BYTE_ORDER == LITTLE_ENDIAN
484 				partial += (uint8_t)*sbaddr;
485 #else /* BYTE_ORDER != LITTLE_ENDIAN */
486 				partial += (uint8_t)*sbaddr << 8;
487 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
488 				*dbaddr++ = *sbaddr++;
489 				started_on_odd = !started_on_odd;
490 			}
491 
492 			if (needs_swap) {
493 				partial = (partial << 8) + (partial >> 24);
494 			}
495 			sum += (partial >> 16) + (partial & 0xffff);
496 			/*
497 			 * Reduce sum to allow potential byte swap
498 			 * in the next iteration without carry.
499 			 */
500 			sum = (sum >> 16) + (sum & 0xffff);
501 		}
502 
503 		sblen -= clen + odd;
504 		dlim -= clen + odd;
505 		len -= clen + odd;
506 
507 		if (sblen == 0) {
508 			sbaddr = NULL;
509 		}
510 
511 		if (dlim == 0) {
512 			dbaddr = NULL;
513 		}
514 	}
515 
516 	if (__probable(dbuf != NULL)) {
517 		__buflet_set_data_length(dbuf, (dlen0 - dlim));
518 	}
519 	if (__probable(do_csum)) {
520 		/* Final fold (reduce 32-bit to 16-bit) */
521 		sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
522 		sum = (sum >> 16) + (sum & 0xffff);
523 		*csum_partial = (uint32_t)sum;
524 	}
525 	return len == 0;
526 }
527 
528 uint32_t
pkt_sum(kern_packet_t sph,uint16_t soff,uint16_t len)529 pkt_sum(kern_packet_t sph, uint16_t soff, uint16_t len)
530 {
531 	uint8_t odd = 0;
532 	uint32_t sum = 0, partial;
533 	boolean_t needs_swap, started_on_odd = FALSE;
534 	uint8_t *sbaddr = NULL;
535 	uint16_t sbcnt;
536 	uint32_t clen, sblen, sboff;
537 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
538 	kern_buflet_t sbuf = NULL, sbufp = NULL;
539 
540 	sbcnt = __packet_get_buflet_count(sph);
541 
542 	/* fastpath (single buflet, even aligned, even length) */
543 	if (sbcnt == 1 && len != 0) {
544 		PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
545 		ASSERT(sbuf != NULL);
546 		sblen = __buflet_get_data_length(sbuf);
547 		sboff = __buflet_get_data_offset(sbuf);
548 		ASSERT(soff >= sboff);
549 		ASSERT(sboff + sblen > soff);
550 		sblen -= (soff - sboff);
551 		sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
552 
553 		clen = MIN(len, sblen);
554 
555 		if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
556 			sum = __packet_cksum(sbaddr, clen, 0);
557 			return __packet_fold_sum(sum);
558 		}
559 
560 		sbaddr = NULL;
561 		sbuf = sbufp = NULL;
562 	}
563 
564 	/* slowpath */
565 	while (len != 0) {
566 		ASSERT(sbaddr == NULL);
567 		if (sbaddr == NULL) {
568 			PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
569 			if (__improbable(sbuf == NULL)) {
570 				break;
571 			}
572 			sbufp = sbuf;
573 			sblen = __buflet_get_data_length(sbuf);
574 			sboff = __buflet_get_data_offset(sbuf);
575 			ASSERT(soff >= sboff);
576 			ASSERT(sboff + sblen > soff);
577 			sblen -= (soff - sboff);
578 			sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
579 			soff = 0;
580 		}
581 
582 		clen = MIN(len, sblen);
583 
584 		partial = 0;
585 		if (__improbable((uintptr_t)sbaddr & 1)) {
586 			/* Align on word boundary */
587 			started_on_odd = !started_on_odd;
588 #if BYTE_ORDER == LITTLE_ENDIAN
589 			partial = (uint8_t)*sbaddr << 8;
590 #else /* BYTE_ORDER != LITTLE_ENDIAN */
591 			partial = (uint8_t)*sbaddr;
592 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
593 			clen -= 1;
594 			len -= 1;
595 		}
596 		needs_swap = started_on_odd;
597 
598 		odd = clen & 1u;
599 		clen -= odd;
600 
601 		if (clen != 0) {
602 			partial = __packet_cksum(sbaddr,
603 			    clen, partial);
604 		}
605 
606 		if (__improbable(partial & 0xc0000000)) {
607 			if (needs_swap) {
608 				partial = (partial << 8) +
609 				    (partial >> 24);
610 			}
611 			sum += (partial >> 16);
612 			sum += (partial & 0xffff);
613 			partial = 0;
614 		}
615 		sbaddr += clen;
616 
617 		if (odd != 0) {
618 #if BYTE_ORDER == LITTLE_ENDIAN
619 			partial += (uint8_t)*sbaddr;
620 #else /* BYTE_ORDER != LITTLE_ENDIAN */
621 			partial += (uint8_t)*sbaddr << 8;
622 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
623 			started_on_odd = !started_on_odd;
624 		}
625 
626 		if (needs_swap) {
627 			partial = (partial << 8) + (partial >> 24);
628 		}
629 		sum += (partial >> 16) + (partial & 0xffff);
630 		/*
631 		 * Reduce sum to allow potential byte swap
632 		 * in the next iteration without carry.
633 		 */
634 		sum = (sum >> 16) + (sum & 0xffff);
635 
636 		sblen -= clen + odd;
637 		len -= clen + odd;
638 
639 		if (sblen == 0) {
640 			sbaddr = NULL;
641 		}
642 	}
643 
644 	/* Final fold (reduce 32-bit to 16-bit) */
645 	sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
646 	sum = (sum >> 16) + (sum & 0xffff);
647 	return (uint32_t)sum;
648 }
649 
650 
651 /*
652  * This is a multi-buflet variant of pkt_copy_from_pkt().
653  *
654  * start/stuff is relative to soff, within [0, len], such that
655  * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
656  */
657 void
pkt_copy_multi_buflet_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)658 pkt_copy_multi_buflet_from_pkt(const enum txrx t, kern_packet_t dph,
659     const uint16_t doff, kern_packet_t sph, const uint16_t soff,
660     const uint32_t len, const boolean_t copysum, const uint16_t start,
661     const uint16_t stuff, const boolean_t invert)
662 {
663 	boolean_t rc;
664 	uint32_t partial;
665 	uint16_t csum = 0;
666 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
667 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
668 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
669 
670 	VERIFY((doff + len) <= (PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp) *
671 	    __packet_get_buflet_count(dph)));
672 
673 	switch (t) {
674 	case NR_RX:
675 		dpkt->pkt_csum_flags = 0;
676 		if (__probable(do_sum)) {
677 			/*
678 			 * copy the portion up to the point where we need to
679 			 * start the checksum, and copy the remainder,
680 			 * checksumming as we go.
681 			 */
682 			if (__probable(start != 0)) {
683 				rc = _pkt_copypkt_sum(sph, soff, dph, doff,
684 				    start, NULL, FALSE);
685 				ASSERT(rc);
686 			}
687 			_pkt_copypkt_sum(sph, (soff + start), dph,
688 			    (doff + start), (len - start), &partial, TRUE);
689 			csum = __packet_fold_sum(partial);
690 			__packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
691 			    start, csum, FALSE);
692 			METADATA_ADJUST_LEN(dpkt, start, doff);
693 		} else {
694 			rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
695 			    FALSE);
696 			ASSERT(rc);
697 			dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
698 			dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
699 			dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
700 		}
701 		break;
702 
703 	case NR_TX:
704 		if (copysum) {
705 			uint8_t *baddr;
706 			/*
707 			 * copy the portion up to the point where we need to
708 			 * start the checksum, and copy the remainder,
709 			 * checksumming as we go.
710 			 */
711 			if (__probable(start != 0)) {
712 				rc = _pkt_copypkt_sum(sph, soff, dph, doff,
713 				    start, NULL, FALSE);
714 				ASSERT(rc);
715 			}
716 			rc = _pkt_copypkt_sum(sph, (soff + start), dph,
717 			    (doff + start), (len - start), &partial, TRUE);
718 			ASSERT(rc);
719 			csum = __packet_fold_sum_final(partial);
720 
721 			/* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
722 			if (csum == 0 && invert) {
723 				csum = 0xffff;
724 			}
725 
726 			/*
727 			 * Insert checksum into packet.
728 			 * Here we assume that checksum will be in the
729 			 * first buffer.
730 			 */
731 			ASSERT((stuff + doff + sizeof(csum)) <=
732 			    PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
733 			ASSERT(stuff <= (len - sizeof(csum)));
734 
735 			/* get first buflet buffer address from packet */
736 			MD_BUFLET_ADDR_ABS(dpkt, baddr);
737 			ASSERT(baddr != NULL);
738 			baddr += doff;
739 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
740 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
741 			} else {
742 				bcopy((void *)&csum, baddr + stuff,
743 				    sizeof(csum));
744 			}
745 			METADATA_ADJUST_LEN(dpkt, start, doff);
746 		} else {
747 			rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
748 			    FALSE);
749 			ASSERT(rc);
750 		}
751 		dpkt->pkt_csum_flags = spkt->pkt_csum_flags &
752 		    (PACKET_CSUM_TSO_FLAGS | PACKET_TX_CSUM_OFFLOAD_FLAGS);
753 		dpkt->pkt_csum_tx_start_off = 0;
754 		dpkt->pkt_csum_tx_stuff_off = 0;
755 
756 		SK_DF(SK_VERB_COPY | SK_VERB_TX,
757 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u, flags %u",
758 		    sk_proc_name_address(current_proc()),
759 		    sk_proc_pid(current_proc()), len,
760 		    (copysum ? (len - start) : 0), csum, start, dpkt->pkt_csum_flags);
761 		break;
762 
763 	default:
764 		VERIFY(0);
765 		/* NOTREACHED */
766 		__builtin_unreachable();
767 	}
768 }
769 
770 static inline uint32_t
_convert_mbuf_csum_flags(uint32_t mbuf_flags)771 _convert_mbuf_csum_flags(uint32_t mbuf_flags)
772 {
773 	uint32_t pkt_flags = 0;
774 
775 	if (mbuf_flags & CSUM_TCP) {
776 		pkt_flags |= PACKET_CSUM_TCP;
777 	}
778 	if (mbuf_flags & CSUM_TCPIPV6) {
779 		pkt_flags |= PACKET_CSUM_TCPIPV6;
780 	}
781 	if (mbuf_flags & CSUM_UDP) {
782 		pkt_flags |= PACKET_CSUM_UDP;
783 	}
784 	if (mbuf_flags & CSUM_UDPIPV6) {
785 		pkt_flags |= PACKET_CSUM_UDPIPV6;
786 	}
787 	if (mbuf_flags & CSUM_IP) {
788 		pkt_flags |= PACKET_CSUM_IP;
789 	}
790 	if (mbuf_flags & CSUM_ZERO_INVERT) {
791 		pkt_flags |= PACKET_CSUM_ZERO_INVERT;
792 	}
793 
794 	return pkt_flags;
795 }
796 
797 /*
798  * This routine is used for copying an mbuf which originated in the host
799  * stack destined to a native skywalk interface (NR_TX), as well as for
800  * mbufs originating on compat network interfaces (NR_RX).
801  *
802  * start/stuff is relative to moff, within [0, len], such that
803  * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
804  */
805 void
pkt_copy_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)806 pkt_copy_from_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
807     struct mbuf *m, const uint16_t moff, const uint32_t len,
808     const boolean_t copysum, const uint16_t start)
809 {
810 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
811 	struct m_tag *ts_tag = NULL;
812 	uint32_t partial;
813 	uint16_t csum = 0;
814 	uint8_t *baddr;
815 
816 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
817 
818 	/* get buffer address from packet */
819 	MD_BUFLET_ADDR_ABS(pkt, baddr);
820 	ASSERT(baddr != NULL);
821 	baddr += poff;
822 	VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
823 
824 	switch (t) {
825 	case NR_RX:
826 		pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
827 		pkt->pkt_csum_rx_start_off = 0;
828 		pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
829 		pkt->pkt_svc_class = m_get_service_class(m);
830 		if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
831 		    != CSUM_RX_FULL_FLAGS) && copysum)) {
832 			/*
833 			 * Use m_copydata() to copy the portion up to the
834 			 * point where we need to start the checksum, and
835 			 * copy the remainder, checksumming as we go.
836 			 */
837 			if (start != 0) {
838 				m_copydata(m, moff, start, baddr);
839 			}
840 			partial = m_copydata_sum(m, start, (len - start),
841 			    (baddr + start), 0, NULL);
842 			csum = __packet_fold_sum(partial);
843 
844 			__packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
845 			    start, csum, FALSE);
846 		} else {
847 			m_copydata(m, moff, len, baddr);
848 		}
849 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
850 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
851 		    sk_proc_name_address(current_proc()),
852 		    sk_proc_pid(current_proc()), len,
853 		    (copysum ? (len - start) : 0), csum, start);
854 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
855 		    "   mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
856 		    SK_KVA(m), m->m_pkthdr.csum_flags,
857 		    (uint32_t)m->m_pkthdr.csum_rx_start,
858 		    (uint32_t)m->m_pkthdr.csum_rx_val);
859 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
860 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
861 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
862 		    (uint32_t)pkt->pkt_csum_rx_start_off,
863 		    (uint32_t)pkt->pkt_csum_rx_value);
864 		break;
865 
866 	case NR_TX:
867 		if (copysum) {
868 			uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
869 			/*
870 			 * Use m_copydata() to copy the portion up to the
871 			 * point where we need to start the checksum, and
872 			 * copy the remainder, checksumming as we go.
873 			 */
874 			if (start != 0) {
875 				m_copydata(m, moff, start, baddr);
876 			}
877 			partial = m_copydata_sum(m, start, (len - start),
878 			    (baddr + start), 0, NULL);
879 			csum = __packet_fold_sum_final(partial);
880 
881 			/*
882 			 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
883 			 * ideally we'd only test for CSUM_ZERO_INVERT
884 			 * here, but catch cases where the originator
885 			 * did not set it for UDP.
886 			 */
887 			if (csum == 0 && (m->m_pkthdr.csum_flags &
888 			    (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
889 				csum = 0xffff;
890 			}
891 
892 			/* Insert checksum into packet */
893 			ASSERT(stuff <= (len - sizeof(csum)));
894 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
895 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
896 			} else {
897 				bcopy((void *)&csum, baddr + stuff,
898 				    sizeof(csum));
899 			}
900 		} else {
901 			m_copydata(m, moff, len, baddr);
902 		}
903 		pkt->pkt_csum_flags = 0;
904 		pkt->pkt_csum_tx_start_off = 0;
905 		pkt->pkt_csum_tx_stuff_off = 0;
906 
907 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
908 			pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV4;
909 			pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
910 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
911 		}
912 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
913 			pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV6;
914 			pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
915 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
916 		}
917 		if (!copysum) {
918 			pkt->pkt_csum_flags |= _convert_mbuf_csum_flags(m->m_pkthdr.csum_flags);
919 		}
920 
921 		/* translate mbuf metadata */
922 		pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
923 		pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
924 		pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
925 		pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
926 		switch (m->m_pkthdr.pkt_proto) {
927 		case IPPROTO_QUIC:
928 			pkt->pkt_flow_ip_proto = IPPROTO_UDP;
929 			pkt->pkt_transport_protocol = IPPROTO_QUIC;
930 			break;
931 
932 		default:
933 			pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
934 			pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
935 			break;
936 		}
937 		(void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
938 		pkt->pkt_svc_class = m_get_service_class(m);
939 		pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
940 		pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
941 		if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
942 			pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
943 		}
944 		if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) != 0) {
945 			pkt->pkt_pflags |= PKT_F_L4S;
946 		}
947 		necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
948 		pkt->pkt_policy_id =
949 		    (uint32_t)necp_get_policy_id_from_packet(m);
950 		pkt->pkt_skip_policy_id =
951 		    (uint32_t)necp_get_skip_policy_id_from_packet(m);
952 
953 		if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
954 			if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
955 				__packet_set_tx_completion_data(ph,
956 				    m->m_pkthdr.drv_tx_compl_arg,
957 				    m->m_pkthdr.drv_tx_compl_data);
958 			}
959 			pkt->pkt_tx_compl_context =
960 			    m->m_pkthdr.pkt_compl_context;
961 			pkt->pkt_tx_compl_callbacks =
962 			    m->m_pkthdr.pkt_compl_callbacks;
963 			/*
964 			 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
965 			 * mbuf can no longer trigger a completion callback.
966 			 * callback will be invoked when the kernel packet is
967 			 * completed.
968 			 */
969 			m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
970 
971 			m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
972 		}
973 
974 		ts_tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM);
975 		if (ts_tag != NULL) {
976 			__packet_set_tx_timestamp(ph, *(uint64_t *)(ts_tag->m_tag_data));
977 		}
978 
979 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
980 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
981 		    sk_proc_name_address(current_proc()),
982 		    sk_proc_pid(current_proc()), len,
983 		    (copysum ? (len - start) : 0), csum, start);
984 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
985 		    "   mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
986 		    SK_KVA(m), m->m_pkthdr.csum_flags,
987 		    (uint32_t)m->m_pkthdr.csum_tx_start,
988 		    (uint32_t)m->m_pkthdr.csum_tx_stuff);
989 		break;
990 
991 	default:
992 		VERIFY(0);
993 		/* NOTREACHED */
994 		__builtin_unreachable();
995 	}
996 	METADATA_ADJUST_LEN(pkt, len, poff);
997 
998 	if (m->m_flags & M_BCAST) {
999 		__packet_set_link_broadcast(ph);
1000 	} else if (m->m_flags & M_MCAST) {
1001 		__packet_set_link_multicast(ph);
1002 	}
1003 
1004 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1005 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1006 	    (t == NR_RX) ? "RX" : "TX",
1007 	    sk_dump("buf", baddr, len, 128, NULL, 0));
1008 }
1009 
1010 /*
1011  * Like m_copydata_sum(), but works on a destination kernel packet.
1012  */
1013 static inline uint32_t
m_copypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint32_t len,boolean_t do_cscum)1014 m_copypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
1015     uint32_t len, boolean_t do_cscum)
1016 {
1017 	boolean_t needs_swap, started_on_odd = FALSE;
1018 	int off0 = soff;
1019 	uint32_t len0 = len;
1020 	struct mbuf *m0 = m;
1021 	uint32_t sum = 0, partial;
1022 	unsigned count0, count, odd, mlen_copied;
1023 	uint8_t *sbaddr = NULL, *dbaddr = NULL;
1024 	uint16_t dbcnt = __packet_get_buflet_count(dph);
1025 	uint32_t dlim, dlen0;
1026 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
1027 	kern_buflet_t dbuf = NULL, dbufp = NULL;
1028 
1029 	while (soff > 0) {
1030 		if (__improbable(m == NULL)) {
1031 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
1032 			    __func__, m0, off0, len0);
1033 			/* NOTREACHED */
1034 			__builtin_unreachable();
1035 		}
1036 		if (soff < m->m_len) {
1037 			break;
1038 		}
1039 		soff -= m->m_len;
1040 		m = m->m_next;
1041 	}
1042 
1043 	if (__improbable(m == NULL)) {
1044 		panic("%s: invalid mbuf chain %p [off %d, len %d]",
1045 		    __func__, m0, off0, len0);
1046 		/* NOTREACHED */
1047 		__builtin_unreachable();
1048 	}
1049 
1050 	sbaddr = mtod(m, uint8_t *) + soff;
1051 	count = m->m_len - soff;
1052 	mlen_copied = 0;
1053 
1054 	while (len != 0) {
1055 		ASSERT(sbaddr == NULL || dbaddr == NULL);
1056 		if (sbaddr == NULL) {
1057 			soff = 0;
1058 			m = m->m_next;
1059 			if (__improbable(m == NULL)) {
1060 				panic("%s: invalid mbuf chain %p [off %d, "
1061 				    "len %d]", __func__, m0, off0, len0);
1062 				/* NOTREACHED */
1063 				__builtin_unreachable();
1064 			}
1065 			sbaddr = mtod(m, uint8_t *);
1066 			count = m->m_len;
1067 			mlen_copied = 0;
1068 		}
1069 
1070 		if (__improbable(count == 0)) {
1071 			sbaddr = NULL;
1072 			continue;
1073 		}
1074 
1075 		if (dbaddr == NULL) {
1076 			if (dbufp != NULL) {
1077 				__buflet_set_data_length(dbufp, dlen0);
1078 			}
1079 
1080 			PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
1081 			if (__improbable(dbuf == NULL)) {
1082 				panic("%s: mbuf too large %p [off %d, "
1083 				    "len %d]", __func__, m0, off0, len0);
1084 				/* NOTREACHED */
1085 				__builtin_unreachable();
1086 			}
1087 			dbufp = dbuf;
1088 			dlim = __buflet_get_data_limit(dbuf) - doff;
1089 			dbaddr = (uint8_t *)__buflet_get_data_address(dbuf) + doff;
1090 			dlen0 = dlim;
1091 			doff = 0;
1092 		}
1093 
1094 		count = MIN(count, (unsigned)len);
1095 		count0 = count = MIN(count, dlim);
1096 
1097 		if (!do_cscum) {
1098 			_pkt_copy(sbaddr, dbaddr, count);
1099 			sbaddr += count;
1100 			dbaddr += count;
1101 			goto skip_csum;
1102 		}
1103 
1104 		partial = 0;
1105 		if ((uintptr_t)sbaddr & 1) {
1106 			/* Align on word boundary */
1107 			started_on_odd = !started_on_odd;
1108 #if BYTE_ORDER == LITTLE_ENDIAN
1109 			partial = *sbaddr << 8;
1110 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1111 			partial = *sbaddr;
1112 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1113 			*dbaddr++ = *sbaddr++;
1114 			count -= 1;
1115 		}
1116 
1117 		needs_swap = started_on_odd;
1118 		odd = count & 1u;
1119 		count -= odd;
1120 
1121 		if (count) {
1122 			partial = __packet_copy_and_sum(sbaddr,
1123 			    dbaddr, count, partial);
1124 			sbaddr += count;
1125 			dbaddr += count;
1126 			if (__improbable(partial & 0xc0000000)) {
1127 				if (needs_swap) {
1128 					partial = (partial << 8) +
1129 					    (partial >> 24);
1130 				}
1131 				sum += (partial >> 16);
1132 				sum += (partial & 0xffff);
1133 				partial = 0;
1134 			}
1135 		}
1136 
1137 		if (odd) {
1138 #if BYTE_ORDER == LITTLE_ENDIAN
1139 			partial += *sbaddr;
1140 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1141 			partial += *sbaddr << 8;
1142 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1143 			*dbaddr++ = *sbaddr++;
1144 			started_on_odd = !started_on_odd;
1145 		}
1146 
1147 		if (needs_swap) {
1148 			partial = (partial << 8) + (partial >> 24);
1149 		}
1150 		sum += (partial >> 16) + (partial & 0xffff);
1151 		/*
1152 		 * Reduce sum to allow potential byte swap
1153 		 * in the next iteration without carry.
1154 		 */
1155 		sum = (sum >> 16) + (sum & 0xffff);
1156 
1157 skip_csum:
1158 		dlim -= count0;
1159 		len -= count0;
1160 		mlen_copied += count0;
1161 
1162 		if (dlim == 0) {
1163 			dbaddr = NULL;
1164 		}
1165 
1166 		count = m->m_len - soff - mlen_copied;
1167 		if (count == 0) {
1168 			sbaddr = NULL;
1169 		}
1170 	}
1171 
1172 	ASSERT(len == 0);
1173 	ASSERT(dbuf != NULL);
1174 	__buflet_set_data_length(dbuf, (dlen0 - dlim));
1175 
1176 	if (!do_cscum) {
1177 		return 0;
1178 	}
1179 
1180 	/* Final fold (reduce 32-bit to 16-bit) */
1181 	sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
1182 	sum = (sum >> 16) + (sum & 0xffff);
1183 	return sum;
1184 }
1185 
1186 /*
1187  * This is a multi-buflet variant of pkt_copy_from_mbuf().
1188  *
1189  * start/stuff is relative to moff, within [0, len], such that
1190  * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
1191  */
1192 void
pkt_copy_multi_buflet_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1193 pkt_copy_multi_buflet_from_mbuf(const enum txrx t, kern_packet_t ph,
1194     const uint16_t poff, struct mbuf *m, const uint16_t moff,
1195     const uint32_t len, const boolean_t copysum, const uint16_t start)
1196 {
1197 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1198 	struct m_tag *ts_tag = NULL;
1199 	uint32_t partial;
1200 	uint16_t csum = 0;
1201 	uint8_t *baddr;
1202 
1203 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
1204 
1205 	/* get buffer address from packet */
1206 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1207 	ASSERT(baddr != NULL);
1208 	baddr += poff;
1209 	VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp) *
1210 	    __packet_get_buflet_count(ph)));
1211 
1212 	switch (t) {
1213 	case NR_RX:
1214 		pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
1215 		pkt->pkt_csum_rx_start_off = 0;
1216 		pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
1217 		pkt->pkt_svc_class = m_get_service_class(m);
1218 		if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
1219 		    != CSUM_RX_FULL_FLAGS) && copysum)) {
1220 			/*
1221 			 * Use m_copydata() to copy the portion up to the
1222 			 * point where we need to start the checksum, and
1223 			 * copy the remainder, checksumming as we go.
1224 			 */
1225 			if (start != 0) {
1226 				m_copydata(m, moff, start, baddr);
1227 			}
1228 			partial = m_copypkt_sum(m, start, ph, (poff + start),
1229 			    (len - start), TRUE);
1230 			csum = __packet_fold_sum(partial);
1231 			__packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
1232 			    start, csum, FALSE);
1233 			METADATA_ADJUST_LEN(pkt, start, poff);
1234 		} else {
1235 			(void) m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1236 		}
1237 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1238 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1239 		    sk_proc_name_address(current_proc()),
1240 		    sk_proc_pid(current_proc()), len,
1241 		    (copysum ? (len - start) : 0), csum, start);
1242 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1243 		    "   mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
1244 		    SK_KVA(m), m->m_pkthdr.csum_flags,
1245 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1246 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1247 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1248 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1249 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1250 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1251 		    (uint32_t)pkt->pkt_csum_rx_value);
1252 		break;
1253 
1254 	case NR_TX:
1255 		if (copysum) {
1256 			uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
1257 			/*
1258 			 * Use m_copydata() to copy the portion up to the
1259 			 * point where we need to start the checksum, and
1260 			 * copy the remainder, checksumming as we go.
1261 			 */
1262 			if (start != 0) {
1263 				m_copydata(m, moff, start, baddr);
1264 			}
1265 			partial = m_copypkt_sum(m, start, ph, (poff + start),
1266 			    (len - start), TRUE);
1267 			csum = __packet_fold_sum_final(partial);
1268 
1269 			/*
1270 			 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
1271 			 * ideally we'd only test for CSUM_ZERO_INVERT
1272 			 * here, but catch cases where the originator
1273 			 * did not set it for UDP.
1274 			 */
1275 			if (csum == 0 && (m->m_pkthdr.csum_flags &
1276 			    (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
1277 				csum = 0xffff;
1278 			}
1279 
1280 			/* Insert checksum into packet */
1281 			ASSERT(stuff <= (len - sizeof(csum)));
1282 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
1283 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
1284 			} else {
1285 				bcopy((void *)&csum, baddr + stuff,
1286 				    sizeof(csum));
1287 			}
1288 			METADATA_ADJUST_LEN(pkt, start, poff);
1289 		} else {
1290 			m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1291 		}
1292 		pkt->pkt_csum_flags = 0;
1293 		pkt->pkt_csum_tx_start_off = 0;
1294 		pkt->pkt_csum_tx_stuff_off = 0;
1295 
1296 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
1297 			pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV4;
1298 			pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
1299 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
1300 		}
1301 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
1302 			pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV6;
1303 			pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
1304 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
1305 		}
1306 		if (!copysum) {
1307 			pkt->pkt_csum_flags |= _convert_mbuf_csum_flags(m->m_pkthdr.csum_flags);
1308 		}
1309 
1310 		/* translate mbuf metadata */
1311 		pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
1312 		pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
1313 		pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
1314 		pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
1315 		switch (m->m_pkthdr.pkt_proto) {
1316 		case IPPROTO_QUIC:
1317 			pkt->pkt_flow_ip_proto = IPPROTO_UDP;
1318 			pkt->pkt_transport_protocol = IPPROTO_QUIC;
1319 			break;
1320 
1321 		default:
1322 			pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
1323 			pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
1324 			break;
1325 		}
1326 		(void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
1327 		pkt->pkt_svc_class = m_get_service_class(m);
1328 		pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
1329 		pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
1330 		if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
1331 			pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
1332 		}
1333 		if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) != 0) {
1334 			pkt->pkt_pflags |= PKT_F_L4S;
1335 		}
1336 		necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
1337 		pkt->pkt_policy_id =
1338 		    (uint32_t)necp_get_policy_id_from_packet(m);
1339 		pkt->pkt_skip_policy_id =
1340 		    (uint32_t)necp_get_skip_policy_id_from_packet(m);
1341 
1342 		if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
1343 			if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
1344 				__packet_set_tx_completion_data(ph,
1345 				    m->m_pkthdr.drv_tx_compl_arg,
1346 				    m->m_pkthdr.drv_tx_compl_data);
1347 			}
1348 			pkt->pkt_tx_compl_context =
1349 			    m->m_pkthdr.pkt_compl_context;
1350 			pkt->pkt_tx_compl_callbacks =
1351 			    m->m_pkthdr.pkt_compl_callbacks;
1352 			/*
1353 			 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
1354 			 * mbuf can no longer trigger a completion callback.
1355 			 * callback will be invoked when the kernel packet is
1356 			 * completed.
1357 			 */
1358 			m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
1359 
1360 			m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
1361 		}
1362 
1363 		ts_tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM);
1364 		if (ts_tag != NULL) {
1365 			__packet_set_tx_timestamp(ph, *(uint64_t *)(ts_tag->m_tag_data));
1366 		}
1367 
1368 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1369 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1370 		    sk_proc_name_address(current_proc()),
1371 		    sk_proc_pid(current_proc()), len,
1372 		    (copysum ? (len - start) : 0), csum, start);
1373 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1374 		    "   mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
1375 		    SK_KVA(m), m->m_pkthdr.csum_flags,
1376 		    (uint32_t)m->m_pkthdr.csum_tx_start,
1377 		    (uint32_t)m->m_pkthdr.csum_tx_stuff);
1378 		break;
1379 
1380 	default:
1381 		VERIFY(0);
1382 		/* NOTREACHED */
1383 		__builtin_unreachable();
1384 	}
1385 
1386 	if (m->m_flags & M_BCAST) {
1387 		__packet_set_link_broadcast(ph);
1388 	} else if (m->m_flags & M_MCAST) {
1389 		__packet_set_link_multicast(ph);
1390 	}
1391 
1392 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1393 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1394 	    (t == NR_RX) ? "RX" : "TX",
1395 	    sk_dump("buf", baddr, len, 128, NULL, 0));
1396 }
1397 
1398 static inline uint32_t
_convert_pkt_csum_flags(uint32_t pkt_flags)1399 _convert_pkt_csum_flags(uint32_t pkt_flags)
1400 {
1401 	uint32_t mbuf_flags = 0;
1402 	if (pkt_flags & PACKET_CSUM_TCP) {
1403 		mbuf_flags |= CSUM_TCP;
1404 	}
1405 	if (pkt_flags & PACKET_CSUM_TCPIPV6) {
1406 		mbuf_flags |= CSUM_TCPIPV6;
1407 	}
1408 	if (pkt_flags & PACKET_CSUM_UDP) {
1409 		mbuf_flags |= CSUM_UDP;
1410 	}
1411 	if (pkt_flags & PACKET_CSUM_UDPIPV6) {
1412 		mbuf_flags |= CSUM_UDPIPV6;
1413 	}
1414 	if (pkt_flags & PACKET_CSUM_IP) {
1415 		mbuf_flags |= CSUM_IP;
1416 	}
1417 	if (pkt_flags & PACKET_CSUM_ZERO_INVERT) {
1418 		mbuf_flags |= CSUM_ZERO_INVERT;
1419 	}
1420 	if (pkt_flags & PACKET_CSUM_TSO_IPV4) {
1421 		mbuf_flags |= CSUM_TSO_IPV4;
1422 	}
1423 	if (pkt_flags & PACKET_CSUM_TSO_IPV6) {
1424 		mbuf_flags |= CSUM_TSO_IPV6;
1425 	}
1426 
1427 	return mbuf_flags;
1428 }
1429 
1430 /*
1431  * This routine is used for copying from a packet originating from a native
1432  * skywalk interface to an mbuf destined for the host legacy stack (NR_RX),
1433  * as well as for mbufs destined for the compat network interfaces (NR_TX).
1434  *
1435  * We do adjust the length to reflect the total data span.
1436  *
1437  * This routine supports copying into an mbuf chain for RX but not TX.
1438  *
1439  * start/stuff is relative to poff, within [0, len], such that
1440  * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1441  */
1442 void
pkt_copy_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1443 pkt_copy_to_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
1444     struct mbuf *m, const uint16_t moff, const uint32_t len,
1445     const boolean_t copysum, const uint16_t start)
1446 {
1447 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1448 	struct mbuf *curr_m;
1449 	uint32_t partial = 0;
1450 	uint32_t remaining_len = len, copied_len = 0;
1451 	uint16_t csum = 0;
1452 	uint8_t *baddr;
1453 	uint8_t *dp;
1454 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1455 
1456 	ASSERT(len >= start);
1457 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
1458 
1459 	/* get buffer address from packet */
1460 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1461 	ASSERT(baddr != NULL);
1462 	baddr += poff;
1463 	VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
1464 
1465 	ASSERT((m->m_flags & M_PKTHDR));
1466 	m->m_data += moff;
1467 
1468 	switch (t) {
1469 	case NR_RX:
1470 		m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1471 
1472 		/*
1473 		 * Use pkt_copy() to copy the portion up to the
1474 		 * point where we need to start the checksum, and
1475 		 * copy the remainder, checksumming as we go.
1476 		 */
1477 		if (__probable(do_sum && start != 0)) {
1478 			ASSERT(M_TRAILINGSPACE(m) >= start);
1479 			ASSERT(m->m_len == 0);
1480 			dp = (uint8_t *)m_mtod_current(m);
1481 			_pkt_copy(baddr, dp, start);
1482 			remaining_len -= start;
1483 			copied_len += start;
1484 			m->m_len += start;
1485 			m->m_pkthdr.len += start;
1486 		}
1487 		curr_m = m;
1488 		while (curr_m != NULL && remaining_len != 0) {
1489 			uint32_t tmp_len = MIN(remaining_len,
1490 			    (uint32_t)M_TRAILINGSPACE(curr_m));
1491 			dp = (uint8_t *)m_mtod_end(curr_m);
1492 			if (__probable(do_sum)) {
1493 				partial = __packet_copy_and_sum((baddr + copied_len),
1494 				    dp, tmp_len, partial);
1495 			} else {
1496 				_pkt_copy((baddr + copied_len), dp, tmp_len);
1497 			}
1498 
1499 			curr_m->m_len += tmp_len;
1500 			m->m_pkthdr.len += tmp_len;
1501 			copied_len += tmp_len;
1502 			remaining_len -= tmp_len;
1503 			curr_m = curr_m->m_next;
1504 		}
1505 		ASSERT(remaining_len == 0);
1506 
1507 		if (__probable(do_sum)) {
1508 			csum = __packet_fold_sum(partial);
1509 
1510 			m->m_pkthdr.csum_flags |=
1511 			    (CSUM_DATA_VALID | CSUM_PARTIAL);
1512 			m->m_pkthdr.csum_rx_start = start;
1513 			m->m_pkthdr.csum_rx_val = csum;
1514 		} else {
1515 			m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1516 			m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1517 			_CASSERT(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1518 			m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1519 			if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1520 				m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1521 			}
1522 		}
1523 
1524 		/* translate packet metadata */
1525 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1526 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1527 
1528 		m->m_pkthdr.rx_seg_cnt = pkt->pkt_seg_cnt;
1529 
1530 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1531 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1532 		    sk_proc_name_address(current_proc()),
1533 		    sk_proc_pid(current_proc()), len,
1534 		    (copysum ? (len - start) : 0), csum, start);
1535 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1536 		    "   mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1537 		    SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1538 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1539 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1540 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1541 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1542 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1543 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1544 		    (uint32_t)pkt->pkt_csum_rx_value);
1545 		break;
1546 
1547 	case NR_TX:
1548 		dp = (uint8_t *)m_mtod_current(m);
1549 		ASSERT(m->m_next == NULL);
1550 
1551 		VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1552 		    (uint32_t)mbuf_maxlen(m));
1553 		m->m_len += len;
1554 		m->m_pkthdr.len += len;
1555 		VERIFY(m->m_len == m->m_pkthdr.len &&
1556 		    (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1557 
1558 		if (copysum) {
1559 			uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1560 			/*
1561 			 * Use pkt_copy() to copy the portion up to the
1562 			 * point where we need to start the checksum, and
1563 			 * copy the remainder, checksumming as we go.
1564 			 */
1565 			if (__probable(start != 0)) {
1566 				_pkt_copy(baddr, dp, start);
1567 			}
1568 			partial = __packet_copy_and_sum((baddr + start),
1569 			    (dp + start), (len - start), 0);
1570 			csum = __packet_fold_sum_final(partial);
1571 
1572 			/* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1573 			if (csum == 0 &&
1574 			    (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1575 				csum = 0xffff;
1576 			}
1577 
1578 			/* Insert checksum into packet */
1579 			ASSERT(stuff <= (len - sizeof(csum)));
1580 			if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1581 				*(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1582 			} else {
1583 				bcopy((void *)&csum, dp + stuff, sizeof(csum));
1584 			}
1585 		} else {
1586 			_pkt_copy(baddr, dp, len);
1587 		}
1588 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1589 		m->m_pkthdr.csum_tx_start = 0;
1590 		m->m_pkthdr.csum_tx_stuff = 0;
1591 		m->m_pkthdr.csum_flags |= _convert_pkt_csum_flags(pkt->pkt_csum_flags);
1592 
1593 		/* translate packet metadata */
1594 		m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1595 		m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1596 		m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1597 		m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1598 		m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1599 		m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1600 		m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1601 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1602 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1603 		m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1604 		m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1605 		if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1606 			m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1607 		}
1608 		if ((pkt->pkt_pflags & PKT_F_L4S) != 0) {
1609 			m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_L4S;
1610 		}
1611 		if (__improbable(copy_pkt_tx_time != 0 &&
1612 		    (pkt->pkt_pflags & PKT_F_OPT_TX_TIMESTAMP) != 0)) {
1613 			struct m_tag *tag = NULL;
1614 			tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM,
1615 			    sizeof(uint64_t), M_WAITOK, m);
1616 			if (tag != NULL) {
1617 				m_tag_prepend(m, tag);
1618 				*(uint64_t *)tag->m_tag_data = pkt->pkt_com_opt->__po_pkt_tx_time;
1619 			}
1620 		}
1621 		m->m_pkthdr.necp_mtag.necp_policy_id = pkt->pkt_policy_id;
1622 		m->m_pkthdr.necp_mtag.necp_skip_policy_id = pkt->pkt_skip_policy_id;
1623 
1624 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1625 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1626 		    sk_proc_name_address(current_proc()),
1627 		    sk_proc_pid(current_proc()), len,
1628 		    (copysum ? (len - start) : 0), csum, start);
1629 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1630 		    "   pkt  0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1631 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1632 		    (uint32_t)pkt->pkt_csum_tx_start_off,
1633 		    (uint32_t)pkt->pkt_csum_tx_stuff_off);
1634 		break;
1635 
1636 	default:
1637 		VERIFY(0);
1638 		/* NOTREACHED */
1639 		__builtin_unreachable();
1640 	}
1641 
1642 	if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1643 		m->m_flags |= M_BCAST;
1644 	} else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1645 		m->m_flags |= M_MCAST;
1646 	}
1647 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1648 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1649 	    (t == NR_RX) ? "RX" : "TX",
1650 	    sk_dump("buf", (uint8_t *)dp, m->m_len, 128, NULL, 0));
1651 }
1652 
1653 /*
1654  * This is a multi-buflet variant of pkt_copy_to_mbuf().
1655  * NOTE: poff is the offset within the packet.
1656  *
1657  * This routine supports copying into an mbuf chain for RX but not TX.
1658  *
1659  * start/stuff is relative to poff, within [0, len], such that
1660  * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1661  */
1662 void
pkt_copy_multi_buflet_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1663 pkt_copy_multi_buflet_to_mbuf(const enum txrx t, kern_packet_t ph,
1664     const uint16_t poff, struct mbuf *m, const uint16_t moff,
1665     const uint32_t len, const boolean_t copysum, const uint16_t start)
1666 {
1667 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1668 	struct mbuf *curr_m;
1669 	uint32_t partial = 0;
1670 	uint32_t remaining_len = len, copied_len = 0;
1671 	uint16_t csum = 0;
1672 	uint8_t *baddr;
1673 	uint8_t *dp;
1674 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1675 
1676 	ASSERT(len >= start);
1677 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
1678 
1679 	/* get buffer address from packet */
1680 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1681 	ASSERT(baddr != NULL);
1682 	baddr += poff;
1683 
1684 	ASSERT((m->m_flags & M_PKTHDR));
1685 	m->m_data += moff;
1686 
1687 	switch (t) {
1688 	case NR_RX:
1689 		m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1690 		if (__probable(do_sum && start != 0)) {
1691 			ASSERT(M_TRAILINGSPACE(m) >= start);
1692 			ASSERT(m->m_len == 0);
1693 			dp = (uint8_t *)m_mtod_current(m);
1694 			_pkt_copy(baddr, dp, start);
1695 			remaining_len -= start;
1696 			copied_len += start;
1697 			m->m_len += start;
1698 			m->m_pkthdr.len += start;
1699 		}
1700 		curr_m = m;
1701 		while (curr_m != NULL && remaining_len != 0) {
1702 			uint32_t tmp_len = MIN(remaining_len,
1703 			    (uint32_t)M_TRAILINGSPACE(curr_m));
1704 			uint16_t soff = poff + (uint16_t)copied_len;
1705 			dp = (uint8_t *)m_mtod_end(curr_m);
1706 
1707 			if (__probable(do_sum)) {
1708 				partial = _pkt_copyaddr_sum(ph, soff,
1709 				    dp, tmp_len, TRUE, partial, NULL);
1710 			} else {
1711 				pkt_copyaddr_sum(ph, soff,
1712 				    dp, tmp_len, FALSE, 0, NULL);
1713 			}
1714 
1715 			curr_m->m_len += tmp_len;
1716 			m->m_pkthdr.len += tmp_len;
1717 			copied_len += tmp_len;
1718 			remaining_len -= tmp_len;
1719 			curr_m = curr_m->m_next;
1720 		}
1721 		ASSERT(remaining_len == 0);
1722 
1723 		if (__probable(do_sum)) {
1724 			csum = __packet_fold_sum(partial);
1725 
1726 			m->m_pkthdr.csum_flags |=
1727 			    (CSUM_DATA_VALID | CSUM_PARTIAL);
1728 			m->m_pkthdr.csum_rx_start = start;
1729 			m->m_pkthdr.csum_rx_val = csum;
1730 		} else {
1731 			m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1732 			m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1733 			_CASSERT(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1734 			m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1735 			if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1736 				m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1737 			}
1738 		}
1739 
1740 		m->m_pkthdr.necp_mtag.necp_policy_id = pkt->pkt_policy_id;
1741 		m->m_pkthdr.necp_mtag.necp_skip_policy_id = pkt->pkt_skip_policy_id;
1742 
1743 		/* translate packet metadata */
1744 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1745 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1746 
1747 		m->m_pkthdr.rx_seg_cnt = pkt->pkt_seg_cnt;
1748 
1749 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1750 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1751 		    sk_proc_name_address(current_proc()),
1752 		    sk_proc_pid(current_proc()), len,
1753 		    (copysum ? (len - start) : 0), csum, start);
1754 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1755 		    "   mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1756 		    SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1757 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1758 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1759 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1760 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1761 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1762 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1763 		    (uint32_t)pkt->pkt_csum_rx_value);
1764 		break;
1765 	case NR_TX:
1766 		ASSERT(len <= M16KCLBYTES);
1767 		dp = (uint8_t *)m_mtod_current(m);
1768 		ASSERT(m->m_next == NULL);
1769 		VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1770 		    (uint32_t)mbuf_maxlen(m));
1771 		m->m_len += len;
1772 		m->m_pkthdr.len += len;
1773 		VERIFY(m->m_len == m->m_pkthdr.len &&
1774 		    (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1775 		if (copysum) {
1776 			uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1777 			/*
1778 			 * Use pkt_copy() to copy the portion up to the
1779 			 * point where we need to start the checksum, and
1780 			 * copy the remainder, checksumming as we go.
1781 			 */
1782 			if (__probable(start != 0)) {
1783 				_pkt_copy(baddr, dp, start);
1784 			}
1785 			partial = _pkt_copyaddr_sum(ph, (poff + start),
1786 			    (dp + start), (len - start), TRUE, 0, NULL);
1787 			csum = __packet_fold_sum_final(partial);
1788 
1789 			/* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1790 			if (csum == 0 &&
1791 			    (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1792 				csum = 0xffff;
1793 			}
1794 
1795 			/* Insert checksum into packet */
1796 			ASSERT(stuff <= (len - sizeof(csum)));
1797 			if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1798 				*(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1799 			} else {
1800 				bcopy((void *)&csum, dp + stuff, sizeof(csum));
1801 			}
1802 		} else {
1803 			(void) _pkt_copyaddr_sum(ph, poff, dp, len, FALSE, 0, NULL);
1804 		}
1805 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1806 		m->m_pkthdr.csum_tx_start = 0;
1807 		m->m_pkthdr.csum_tx_stuff = 0;
1808 		m->m_pkthdr.csum_flags |= _convert_pkt_csum_flags(pkt->pkt_csum_flags);
1809 
1810 		/* translate packet metadata */
1811 		m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1812 		m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1813 		m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1814 		m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1815 		m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1816 		m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1817 		m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1818 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1819 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1820 		m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1821 		m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1822 		if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1823 			m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1824 		}
1825 		if ((pkt->pkt_pflags & PKT_F_L4S) != 0) {
1826 			m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_L4S;
1827 		}
1828 		if (__improbable(copy_pkt_tx_time != 0 &&
1829 		    (pkt->pkt_pflags & PKT_F_OPT_TX_TIMESTAMP) != 0)) {
1830 			struct m_tag *tag = NULL;
1831 			tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM,
1832 			    sizeof(uint64_t), M_WAITOK, m);
1833 			if (tag != NULL) {
1834 				m_tag_prepend(m, tag);
1835 				*(uint64_t *)tag->m_tag_data = pkt->pkt_com_opt->__po_pkt_tx_time;
1836 			}
1837 		}
1838 
1839 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1840 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1841 		    sk_proc_name_address(current_proc()),
1842 		    sk_proc_pid(current_proc()), len,
1843 		    (copysum ? (len - start) : 0), csum, start);
1844 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1845 		    "   pkt  0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1846 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1847 		    (uint32_t)pkt->pkt_csum_tx_start_off,
1848 		    (uint32_t)pkt->pkt_csum_tx_stuff_off);
1849 		break;
1850 
1851 	default:
1852 		VERIFY(0);
1853 		/* NOTREACHED */
1854 		__builtin_unreachable();
1855 	}
1856 
1857 	if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1858 		m->m_flags |= M_BCAST;
1859 	} else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1860 		m->m_flags |= M_MCAST;
1861 	}
1862 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1863 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1864 	    (t == NR_RX) ? "RX" : "TX",
1865 	    sk_dump("buf", (uint8_t *)dp, m->m_len, 128, NULL, 0));
1866 }
1867 
1868 /*
1869  * Like m_copydata(), but computes 16-bit sum as the data is copied.
1870  * Caller can provide an initial sum to be folded into the computed
1871  * sum.  The accumulated partial sum (32-bit) is returned to caller;
1872  * caller is responsible for further reducing it to 16-bit if needed,
1873  * as well as to perform the final 1's complement on it.
1874  */
1875 uint32_t
m_copydata_sum(struct mbuf * m,int off,int len,void * __sized_by (len)vp,uint32_t initial_sum,boolean_t * odd_start)1876 m_copydata_sum(struct mbuf *m, int off, int len, void *__sized_by(len)vp, uint32_t initial_sum,
1877     boolean_t *odd_start)
1878 {
1879 	boolean_t needs_swap, started_on_odd = FALSE;
1880 	int off0 = off, len0 = len;
1881 	struct mbuf *m0 = m;
1882 	uint64_t sum, partial;
1883 	unsigned count, odd;
1884 	char *cp = vp;
1885 
1886 	if (__improbable(off < 0 || len < 0)) {
1887 		panic("%s: invalid offset %d or len %d", __func__, off, len);
1888 		/* NOTREACHED */
1889 		__builtin_unreachable();
1890 	}
1891 
1892 	while (off > 0) {
1893 		if (__improbable(m == NULL)) {
1894 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
1895 			    __func__, m0, off0, len0);
1896 			/* NOTREACHED */
1897 			__builtin_unreachable();
1898 		}
1899 		if (off < m->m_len) {
1900 			break;
1901 		}
1902 		off -= m->m_len;
1903 		m = m->m_next;
1904 	}
1905 
1906 	if (odd_start) {
1907 		started_on_odd = *odd_start;
1908 	}
1909 	sum = initial_sum;
1910 
1911 	for (; len0 > 0; m = m->m_next) {
1912 		uint8_t *datap;
1913 
1914 		if (__improbable(m == NULL)) {
1915 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
1916 			    __func__, m0, off0, len);
1917 			/* NOTREACHED */
1918 			__builtin_unreachable();
1919 		}
1920 
1921 		datap = mtod(m, uint8_t *) + off;
1922 		count = m->m_len;
1923 
1924 		if (__improbable(count == 0)) {
1925 			continue;
1926 		}
1927 
1928 		count = MIN(count - off, (unsigned)len0);
1929 		partial = 0;
1930 
1931 		if ((uintptr_t)datap & 1) {
1932 			/* Align on word boundary */
1933 			started_on_odd = !started_on_odd;
1934 #if BYTE_ORDER == LITTLE_ENDIAN
1935 			partial = *datap << 8;
1936 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1937 			partial = *datap;
1938 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1939 			*cp++ = *datap++;
1940 			count -= 1;
1941 			len0 -= 1;
1942 		}
1943 
1944 		needs_swap = started_on_odd;
1945 		odd = count & 1u;
1946 		count -= odd;
1947 
1948 		if (count) {
1949 			partial = __packet_copy_and_sum(datap,
1950 			    cp, count, (uint32_t)partial);
1951 			datap += count;
1952 			cp += count;
1953 			len0 -= count;
1954 			if (__improbable((partial & (3ULL << 62)) != 0)) {
1955 				if (needs_swap) {
1956 					partial = (partial << 8) +
1957 					    (partial >> 56);
1958 				}
1959 				sum += (partial >> 32);
1960 				sum += (partial & 0xffffffff);
1961 				partial = 0;
1962 			}
1963 		}
1964 
1965 		if (odd) {
1966 #if BYTE_ORDER == LITTLE_ENDIAN
1967 			partial += *datap;
1968 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1969 			partial += *datap << 8;
1970 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1971 			*cp++ = *datap++;
1972 			len0 -= 1;
1973 			started_on_odd = !started_on_odd;
1974 		}
1975 		off = 0;
1976 
1977 		if (needs_swap) {
1978 			partial = (partial << 8) + (partial >> 24);
1979 		}
1980 		sum += (partial >> 32) + (partial & 0xffffffff);
1981 		/*
1982 		 * Reduce sum to allow potential byte swap
1983 		 * in the next iteration without carry.
1984 		 */
1985 		sum = (sum >> 32) + (sum & 0xffffffff);
1986 	}
1987 
1988 	if (odd_start) {
1989 		*odd_start = started_on_odd;
1990 	}
1991 
1992 	/* Final fold (reduce 64-bit to 32-bit) */
1993 	sum = (sum >> 32) + (sum & 0xffffffff); /* 33-bit */
1994 	sum = (sum >> 16) + (sum & 0xffff);     /* 17-bit + carry */
1995 
1996 	/* return 32-bit partial sum to caller */
1997 	return (uint32_t)sum;
1998 }
1999 
2000 #if DEBUG || DEVELOPMENT
2001 #define TRAILERS_MAX    16              /* max trailing bytes */
2002 #define TRAILERS_REGEN  (64 * 1024)     /* regeneration threshold */
2003 static uint8_t tb[TRAILERS_MAX];        /* random trailing bytes */
2004 static uint32_t regen = TRAILERS_REGEN; /* regeneration counter */
2005 
2006 uint32_t
pkt_add_trailers(kern_packet_t ph,const uint32_t len,const uint16_t start)2007 pkt_add_trailers(kern_packet_t ph, const uint32_t len, const uint16_t start)
2008 {
2009 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
2010 	uint32_t extra;
2011 	uint8_t *baddr;
2012 
2013 	/* get buffer address from packet */
2014 	MD_BUFLET_ADDR_ABS(pkt, baddr);
2015 	ASSERT(baddr != NULL);
2016 	ASSERT(len <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
2017 
2018 	extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
2019 	if (extra == 0 || extra > sizeof(tb) ||
2020 	    (len + extra) > PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp)) {
2021 		return 0;
2022 	}
2023 
2024 	/* generate random bytes once per TRAILERS_REGEN packets (approx.) */
2025 	if (regen++ == TRAILERS_REGEN) {
2026 		read_frandom(&tb[0], sizeof(tb));
2027 		regen = 0;
2028 	}
2029 
2030 	bcopy(&tb[0], (baddr + len), extra);
2031 
2032 	/* recompute partial sum (also to exercise related logic) */
2033 	pkt->pkt_csum_flags |= PACKET_CSUM_PARTIAL;
2034 	pkt->pkt_csum_rx_value = (uint16_t)__packet_cksum((baddr + start),
2035 	    ((len + extra) - start), 0);
2036 	pkt->pkt_csum_rx_start_off = start;
2037 
2038 	return extra;
2039 }
2040 
2041 uint32_t
pkt_add_trailers_mbuf(struct mbuf * m,const uint16_t start)2042 pkt_add_trailers_mbuf(struct mbuf *m, const uint16_t start)
2043 {
2044 	uint32_t extra;
2045 
2046 	extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
2047 	if (extra == 0 || extra > sizeof(tb)) {
2048 		return 0;
2049 	}
2050 
2051 	if (mbuf_copyback(m, m_pktlen(m), extra, &tb[0], M_NOWAIT) != 0) {
2052 		return 0;
2053 	}
2054 
2055 	/* generate random bytes once per TRAILERS_REGEN packets (approx.) */
2056 	if (regen++ == TRAILERS_REGEN) {
2057 		read_frandom(&tb[0], sizeof(tb));
2058 		regen = 0;
2059 	}
2060 
2061 	/* recompute partial sum (also to exercise related logic) */
2062 	m->m_pkthdr.csum_rx_val = m_sum16(m, start, (m_pktlen(m) - start));
2063 	m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
2064 	m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
2065 	m->m_pkthdr.csum_rx_start = start;
2066 
2067 	return extra;
2068 }
2069 #endif /* DEBUG || DEVELOPMENT */
2070 
2071 void
pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint16_t len,uint32_t * partial,boolean_t do_csum)2072 pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
2073     uint16_t doff, uint16_t len, uint32_t *partial, boolean_t do_csum)
2074 {
2075 	VERIFY(_pkt_copypkt_sum(sph, soff, dph, doff, len, partial, do_csum));
2076 }
2077 
2078 uint32_t
pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * __sized_by (len)dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)2079 pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *__sized_by(len)dbaddr,
2080     uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
2081 {
2082 	return _pkt_copyaddr_sum(sph, soff, dbaddr, len, do_csum, initial_sum, odd_start);
2083 }
2084 
2085 uint32_t
pkt_mcopypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint16_t len,boolean_t do_cscum)2086 pkt_mcopypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
2087     uint16_t len, boolean_t do_cscum)
2088 {
2089 	return m_copypkt_sum(m, soff, dph, doff, len, do_cscum);
2090 }
2091 
2092 void
pkt_copy(void * __sized_by (len)src,void * __sized_by (len)dst,size_t len)2093 pkt_copy(void *__sized_by(len)src, void *__sized_by(len)dst, size_t len)
2094 {
2095 	return _pkt_copy(src, dst, len);
2096 }
2097