xref: /xnu-12377.41.6/bsd/skywalk/packet/packet_copy.c (revision bbb1b6f9e71b8cdde6e5cd6f4841f207dee3d828)
1 /*
2  * Copyright (c) 2017-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 #include <machine/endian.h>
31 #include <net/necp.h>
32 
33 #if (DEVELOPMENT || DEBUG)
34 
35 /* per-packet logging is wasteful in release */
36 #define COPY_LOG 1
37 
38 SYSCTL_NODE(_kern_skywalk, OID_AUTO, packet,
39     CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Skywalk packet");
40 int pkt_trailers = 0; /* for testing trailing bytes */
41 SYSCTL_INT(_kern_skywalk_packet, OID_AUTO, trailers,
42     CTLFLAG_RW | CTLFLAG_LOCKED, &pkt_trailers, 0, "");
43 #endif /* !DEVELOPMENT && !DEBUG */
44 
45 
46 __attribute__((always_inline))
47 static inline void
_pkt_copy(void * __sized_by (len)src,void * __sized_by (len)dst,size_t len)48 _pkt_copy(void *__sized_by(len)src, void *__sized_by(len)dst, size_t len)
49 {
50 	if (__probable(IS_P2ALIGNED(src, 8) && IS_P2ALIGNED(dst, 8))) {
51 		switch (len) {
52 		case 20:        /* standard IPv4 header */
53 			sk_copy64_20(src, dst);
54 			return;
55 
56 		case 40:        /* IPv6 header */
57 			sk_copy64_40(src, dst);
58 			return;
59 
60 		default:
61 			if (IS_P2ALIGNED(len, 64)) {
62 				sk_copy64_64x(src, dst, len);
63 				return;
64 			} else if (IS_P2ALIGNED(len, 32)) {
65 				sk_copy64_32x(src, dst, len);
66 				return;
67 			} else if (IS_P2ALIGNED(len, 8)) {
68 				sk_copy64_8x(src, dst, len);
69 				return;
70 			} else if (IS_P2ALIGNED(len, 4)) {
71 				sk_copy64_4x(src, dst, len);
72 				return;
73 			}
74 			break;
75 		}
76 	}
77 	bcopy(src, dst, len);
78 }
79 
80 /*
81  * This routine is used for copying data across two kernel packets.
82  * Can also optionally compute 16-bit partial inet checksum as the
83  * data is copied.
84  * This routine is used by flowswitch while copying packet from vp
85  * adapter pool to packet in native netif pool and vice-a-versa.
86  *
87  * start/stuff is relative to soff, within [0, len], such that
88  * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
89  */
90 void
pkt_copy_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)91 pkt_copy_from_pkt(const enum txrx t, kern_packet_t dph, const uint16_t doff,
92     kern_packet_t sph, const uint16_t soff, const uint32_t len,
93     const boolean_t copysum, const uint16_t start, const uint16_t stuff,
94     const boolean_t invert)
95 {
96 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
97 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
98 	uint32_t partial;
99 	uint16_t csum = 0;
100 	uint8_t *sbaddr, *dbaddr;
101 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
102 
103 	static_assert(sizeof(csum) == sizeof(uint16_t));
104 
105 	/* get buffer address from packet */
106 	MD_BUFLET_ADDR_ABS(spkt, sbaddr);
107 	ASSERT(sbaddr != NULL);
108 	sbaddr += soff;
109 	MD_BUFLET_ADDR_ABS(dpkt, dbaddr);
110 	ASSERT(dbaddr != NULL);
111 	dbaddr += doff;
112 	VERIFY((doff + len) <= PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
113 
114 	switch (t) {
115 	case NR_RX:
116 		dpkt->pkt_csum_flags = 0;
117 		if (__probable(do_sum)) {
118 			/*
119 			 * Use pkt_copy() to copy the portion up to the
120 			 * point where we need to start the checksum, and
121 			 * copy the remainder, checksumming as we go.
122 			 */
123 			if (__probable(start != 0)) {
124 				_pkt_copy(sbaddr, dbaddr, start);
125 			}
126 			partial = __packet_copy_and_sum((sbaddr + start),
127 			    (dbaddr + start), (len - start), 0);
128 			csum = __packet_fold_sum(partial);
129 
130 			__packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
131 			    start, csum, FALSE);
132 		} else {
133 			_pkt_copy(sbaddr, dbaddr, len);
134 			dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
135 			dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
136 			dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
137 		}
138 
139 #if COPY_LOG
140 		SK_DF(SK_VERB_COPY | SK_VERB_RX,
141 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
142 		    sk_proc_name(current_proc()), sk_proc_pid(current_proc()),
143 		    len, (copysum ? (len - start) : 0), csum, start);
144 		SK_DF(SK_VERB_COPY | SK_VERB_RX,
145 		    "   pkt  %p doff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
146 		    SK_KVA(dpkt), doff, dpkt->pkt_csum_flags,
147 		    (uint32_t)dpkt->pkt_csum_rx_start_off,
148 		    (uint32_t)dpkt->pkt_csum_rx_value);
149 #endif
150 		break;
151 
152 	case NR_TX:
153 		if (copysum) {
154 			/*
155 			 * Use pkt_copy() to copy the portion up to the
156 			 * point where we need to start the checksum, and
157 			 * copy the remainder, checksumming as we go.
158 			 */
159 			if (__probable(start != 0)) {
160 				_pkt_copy(sbaddr, dbaddr, start);
161 			}
162 			partial = __packet_copy_and_sum((sbaddr + start),
163 			    (dbaddr + start), (len - start), 0);
164 			csum = __packet_fold_sum_final(partial);
165 
166 			/* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
167 			if (csum == 0 && invert) {
168 				csum = 0xffff;
169 			}
170 
171 			/* Insert checksum into packet */
172 			ASSERT(stuff <= (len - sizeof(csum)));
173 			if (IS_P2ALIGNED(dbaddr + stuff, sizeof(csum))) {
174 				*(uint16_t *)(uintptr_t)(dbaddr + stuff) = csum;
175 			} else {
176 				bcopy((void *)&csum, dbaddr + stuff,
177 				    sizeof(csum));
178 			}
179 		} else {
180 			_pkt_copy(sbaddr, dbaddr, len);
181 		}
182 		dpkt->pkt_csum_flags = spkt->pkt_csum_flags &
183 		    (PACKET_CSUM_TSO_FLAGS | PACKET_TX_CSUM_OFFLOAD_FLAGS);
184 		dpkt->pkt_csum_tx_start_off = 0;
185 		dpkt->pkt_csum_tx_stuff_off = 0;
186 
187 #if COPY_LOG
188 		SK_DF(SK_VERB_COPY | SK_VERB_TX,
189 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u, flags %u",
190 		    sk_proc_name(current_proc()),
191 		    sk_proc_pid(current_proc()), len,
192 		    (copysum ? (len - start) : 0), csum, start, dpkt->pkt_csum_flags);
193 #endif
194 		break;
195 
196 	default:
197 		VERIFY(0);
198 		/* NOTREACHED */
199 		__builtin_unreachable();
200 	}
201 	METADATA_ADJUST_LEN(dpkt, len, doff);
202 
203 #if COPY_LOG
204 	SK_DF(SK_VERB_COPY | SK_VERB_DUMP, "%s(%d) %s %s",
205 	    sk_proc_name(current_proc()), sk_proc_pid(current_proc()),
206 	    (t == NR_RX) ? "RX" : "TX",
207 	    sk_dump("buf", dbaddr, len, 128));
208 #endif
209 }
210 
211 /*
212  * NOTE: soff is the offset within the packet
213  * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
214  * caller is responsible for further reducing it to 16-bit if needed,
215  * as well as to perform the final 1's complement on it.
216  */
217 uint32_t static inline
_pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * __sized_by (len)dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)218 _pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *__sized_by(len)dbaddr,
219     uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
220 {
221 	uint8_t odd = 0;
222 	uint8_t *sbaddr = NULL;
223 	uint32_t sum = initial_sum, partial;
224 	uint32_t len0 = len;
225 	boolean_t needs_swap, started_on_odd = FALSE;
226 	uint16_t sbcnt, off0 = soff;
227 	uint32_t clen, sboff, sblen;
228 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
229 	kern_buflet_t sbuf = NULL, sbufp = NULL;
230 
231 	sbcnt = __packet_get_buflet_count(sph);
232 
233 	if (odd_start) {
234 		started_on_odd = *odd_start;
235 	}
236 
237 	/* fastpath (copy+sum, single buflet, even aligned, even length) */
238 	if (do_csum && sbcnt == 1 && len != 0) {
239 		PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
240 		ASSERT(sbuf != NULL);
241 		sboff = __buflet_get_data_offset(sbuf);
242 		sblen = __buflet_get_data_length(sbuf);
243 		ASSERT(sboff <= soff);
244 		ASSERT(soff < sboff + sblen);
245 		sblen -= (soff - sboff);
246 		sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
247 
248 		clen = (uint16_t)MIN(len, sblen);
249 
250 		if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
251 			sum = __packet_copy_and_sum(sbaddr, dbaddr, clen, sum);
252 			return __packet_fold_sum(sum);
253 		}
254 
255 		sbaddr = NULL;
256 		sbuf = sbufp = NULL;
257 	}
258 
259 	while (len != 0) {
260 		PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
261 		if (__improbable(sbuf == NULL)) {
262 			panic("%s: bad packet, %p [off %d, len %d]",
263 			    __func__, SK_KVA(spkt), off0, len0);
264 			/* NOTREACHED */
265 			__builtin_unreachable();
266 		}
267 		sbufp = sbuf;
268 		sboff = __buflet_get_data_offset(sbuf);
269 		sblen = __buflet_get_data_length(sbuf);
270 		ASSERT((sboff <= soff) && (soff < sboff + sblen));
271 		sblen -= (soff - sboff);
272 		sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
273 		soff = 0;
274 		clen = (uint16_t)MIN(len, sblen);
275 		if (__probable(do_csum)) {
276 			partial = 0;
277 			if (__improbable((uintptr_t)sbaddr & 1)) {
278 				/* Align on word boundary */
279 				started_on_odd = !started_on_odd;
280 #if BYTE_ORDER == LITTLE_ENDIAN
281 				partial = (uint8_t)*sbaddr << 8;
282 #else /* BYTE_ORDER != LITTLE_ENDIAN */
283 				partial = (uint8_t)*sbaddr;
284 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
285 				/*
286 				 * -fbounds-safety: *dbaddr++ = *sbaddr++ fails
287 				 * to compile. But the following works. Also,
288 				 * grouping dbaddr and len updates led to higher
289 				 * throughput performance, compared to doing
290 				 * dbaddr++; sbaddr++; len -= 1; in that order.
291 				 */
292 				*dbaddr = *sbaddr;
293 				dbaddr++;
294 				sblen -= 1;
295 				clen -= 1;
296 				len -= 1;
297 				sbaddr++;
298 			}
299 			needs_swap = started_on_odd;
300 
301 			odd = clen & 1u;
302 			clen -= odd;
303 
304 			if (clen != 0) {
305 				partial = __packet_copy_and_sum(sbaddr, dbaddr,
306 				    clen, partial);
307 			}
308 
309 			if (__improbable(partial & 0xc0000000)) {
310 				if (needs_swap) {
311 					partial = (partial << 8) +
312 					    (partial >> 24);
313 				}
314 				sum += (partial >> 16);
315 				sum += (partial & 0xffff);
316 				partial = 0;
317 			}
318 		} else {
319 			_pkt_copy(sbaddr, dbaddr, clen);
320 		}
321 
322 		dbaddr += clen;
323 		/*
324 		 * Updating len before updating sbaddr led to faster throughput
325 		 * than doing: dbaddr += clen; sbaddr += clen;
326 		 * len -= clen + odd;
327 		 */
328 		len -= clen;
329 		sblen -= clen;
330 		sbaddr += clen;
331 
332 		if (__probable(do_csum)) {
333 			if (odd != 0) {
334 #if BYTE_ORDER == LITTLE_ENDIAN
335 				partial += (uint8_t)*sbaddr;
336 #else /* BYTE_ORDER != LITTLE_ENDIAN */
337 				partial += (uint8_t)*sbaddr << 8;
338 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
339 				ASSERT(odd == 1);
340 				/*
341 				 * -fbounds-safety: Not written as `*dbaddr++ = *sbaddr++`
342 				 * to avoid compiler bug (rdar://98749526). This
343 				 * bug is only fixed when using `bound-checks-new-checks`.
344 				 */
345 				*dbaddr = *sbaddr++;
346 				dbaddr++;
347 				len -= 1;
348 				sblen -= 1;
349 				started_on_odd = !started_on_odd;
350 			}
351 
352 			if (needs_swap) {
353 				partial = (partial << 8) + (partial >> 24);
354 			}
355 			sum += (partial >> 16) + (partial & 0xffff);
356 			/*
357 			 * Reduce sum to allow potential byte swap
358 			 * in the next iteration without carry.
359 			 */
360 			sum = (sum >> 16) + (sum & 0xffff);
361 		}
362 		ASSERT(sblen == 0 || len == 0);
363 	}
364 
365 	if (odd_start) {
366 		*odd_start = started_on_odd;
367 	}
368 
369 	if (__probable(do_csum)) {
370 		/* Final fold (reduce 32-bit to 16-bit) */
371 		sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
372 		sum = (sum >> 16) + (sum & 0xffff);
373 	}
374 	return sum;
375 }
376 
377 /*
378  * NOTE: Caller of this function is responsible to adjust the length and offset
379  * of the first buflet of the destination packet if (doff != 0),
380  * i.e. additional data is being prependend to the packet.
381  * It should also finalize the packet.
382  * To simplify & optimize the routine, we have also assumed that soff & doff
383  * will lie within the first buffer, which is true for the current use cases
384  * where, doff is the offset of the checksum field in the TCP/IP header and
385  * soff is the L3 offset.
386  * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
387  * caller is responsible for further reducing it to 16-bit if needed,
388  * as well as to perform the final 1's complement on it.
389  */
390 static inline boolean_t
_pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint32_t len,uint32_t * csum_partial,boolean_t do_csum)391 _pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
392     uint16_t doff, uint32_t len, uint32_t *csum_partial, boolean_t do_csum)
393 {
394 	uint8_t odd = 0;
395 	uint32_t sum = 0, partial;
396 	boolean_t needs_swap, started_on_odd = FALSE;
397 	uint8_t *sbaddr = NULL, *dbaddr = NULL;
398 	uint16_t sbcnt, dbcnt;
399 	uint32_t clen, dlen0, sboff, sblen, dlim;
400 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
401 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
402 	kern_buflet_t sbuf = NULL, sbufp = NULL, dbuf = NULL, dbufp = NULL;
403 
404 	ASSERT(csum_partial != NULL || !do_csum);
405 	sbcnt = __packet_get_buflet_count(sph);
406 	dbcnt = __packet_get_buflet_count(dph);
407 
408 	while (len != 0) {
409 		ASSERT(sbaddr == NULL || dbaddr == NULL);
410 		if (sbaddr == NULL) {
411 			PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
412 			if (__improbable(sbuf == NULL)) {
413 				break;
414 			}
415 			sbufp = sbuf;
416 			sblen = __buflet_get_data_length(sbuf);
417 			sboff = __buflet_get_data_offset(sbuf);
418 			ASSERT(soff >= sboff);
419 			ASSERT(sboff + sblen > soff);
420 			sblen -= (soff - sboff);
421 			sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
422 			soff = 0;
423 		}
424 
425 		if (dbaddr == NULL) {
426 			if (dbufp != NULL) {
427 				__buflet_set_data_length(dbufp, dlen0);
428 			}
429 
430 			PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
431 			if (__improbable(dbuf == NULL)) {
432 				break;
433 			}
434 			dbufp = dbuf;
435 			dlim = __buflet_get_data_limit(dbuf);
436 			ASSERT(dlim > doff);
437 			dlim -= doff;
438 			if (doff != 0) {
439 				VERIFY(__buflet_set_data_offset(dbuf, doff) == 0);
440 			}
441 			dbaddr = (uint8_t *)__buflet_get_data_address(dbuf) + doff;
442 			dlen0 = dlim;
443 			doff = 0;
444 		}
445 
446 		clen = MIN(len, sblen);
447 		clen = MIN(clen, dlim);
448 
449 		if (__probable(do_csum)) {
450 			partial = 0;
451 			if (__improbable((uintptr_t)sbaddr & 1)) {
452 				/* Align on word boundary */
453 				started_on_odd = !started_on_odd;
454 #if BYTE_ORDER == LITTLE_ENDIAN
455 				partial = (uint8_t)*sbaddr << 8;
456 #else /* BYTE_ORDER != LITTLE_ENDIAN */
457 				partial = (uint8_t)*sbaddr;
458 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
459 				*dbaddr++ = *sbaddr++;
460 				clen -= 1;
461 				dlim -= 1;
462 				len -= 1;
463 			}
464 			needs_swap = started_on_odd;
465 
466 			odd = clen & 1u;
467 			clen -= odd;
468 
469 			if (clen != 0) {
470 				partial = __packet_copy_and_sum(sbaddr, dbaddr,
471 				    clen, partial);
472 			}
473 
474 			if (__improbable(partial & 0xc0000000)) {
475 				if (needs_swap) {
476 					partial = (partial << 8) +
477 					    (partial >> 24);
478 				}
479 				sum += (partial >> 16);
480 				sum += (partial & 0xffff);
481 				partial = 0;
482 			}
483 		} else {
484 			_pkt_copy(sbaddr, dbaddr, clen);
485 		}
486 		sbaddr += clen;
487 		dbaddr += clen;
488 
489 		if (__probable(do_csum)) {
490 			if (odd != 0) {
491 #if BYTE_ORDER == LITTLE_ENDIAN
492 				partial += (uint8_t)*sbaddr;
493 #else /* BYTE_ORDER != LITTLE_ENDIAN */
494 				partial += (uint8_t)*sbaddr << 8;
495 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
496 				*dbaddr++ = *sbaddr++;
497 				started_on_odd = !started_on_odd;
498 			}
499 
500 			if (needs_swap) {
501 				partial = (partial << 8) + (partial >> 24);
502 			}
503 			sum += (partial >> 16) + (partial & 0xffff);
504 			/*
505 			 * Reduce sum to allow potential byte swap
506 			 * in the next iteration without carry.
507 			 */
508 			sum = (sum >> 16) + (sum & 0xffff);
509 		}
510 
511 		sblen -= clen + odd;
512 		dlim -= clen + odd;
513 		len -= clen + odd;
514 
515 		if (sblen == 0) {
516 			sbaddr = NULL;
517 		}
518 
519 		if (dlim == 0) {
520 			dbaddr = NULL;
521 		}
522 	}
523 
524 	if (__probable(dbuf != NULL)) {
525 		__buflet_set_data_length(dbuf, (dlen0 - dlim));
526 	}
527 	if (__probable(do_csum)) {
528 		/* Final fold (reduce 32-bit to 16-bit) */
529 		sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
530 		sum = (sum >> 16) + (sum & 0xffff);
531 		*csum_partial = (uint32_t)sum;
532 	}
533 	return len == 0;
534 }
535 
536 uint32_t
pkt_sum(kern_packet_t sph,uint16_t soff,uint16_t len)537 pkt_sum(kern_packet_t sph, uint16_t soff, uint16_t len)
538 {
539 	uint8_t odd = 0;
540 	uint32_t sum = 0, partial;
541 	boolean_t needs_swap, started_on_odd = FALSE;
542 	uint8_t *sbaddr = NULL;
543 	uint16_t sbcnt;
544 	uint32_t clen, sblen, sboff;
545 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
546 	kern_buflet_t sbuf = NULL, sbufp = NULL;
547 
548 	sbcnt = __packet_get_buflet_count(sph);
549 
550 	/* fastpath (single buflet, even aligned, even length) */
551 	if (sbcnt == 1 && len != 0) {
552 		PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
553 		ASSERT(sbuf != NULL);
554 		sblen = __buflet_get_data_length(sbuf);
555 		sboff = __buflet_get_data_offset(sbuf);
556 		ASSERT(soff >= sboff);
557 		ASSERT(sboff + sblen > soff);
558 		sblen -= (soff - sboff);
559 		sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
560 
561 		clen = MIN(len, sblen);
562 
563 		if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
564 			sum = __packet_cksum(sbaddr, clen, 0);
565 			return __packet_fold_sum(sum);
566 		}
567 
568 		sbaddr = NULL;
569 		sbuf = sbufp = NULL;
570 	}
571 
572 	/* slowpath */
573 	while (len != 0) {
574 		ASSERT(sbaddr == NULL);
575 		if (sbaddr == NULL) {
576 			PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
577 			if (__improbable(sbuf == NULL)) {
578 				break;
579 			}
580 			sbufp = sbuf;
581 			sblen = __buflet_get_data_length(sbuf);
582 			sboff = __buflet_get_data_offset(sbuf);
583 			ASSERT(soff >= sboff);
584 			ASSERT(sboff + sblen > soff);
585 			sblen -= (soff - sboff);
586 			sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
587 			soff = 0;
588 		}
589 
590 		clen = MIN(len, sblen);
591 
592 		partial = 0;
593 		if (__improbable((uintptr_t)sbaddr & 1)) {
594 			/* Align on word boundary */
595 			started_on_odd = !started_on_odd;
596 #if BYTE_ORDER == LITTLE_ENDIAN
597 			partial = (uint8_t)*sbaddr << 8;
598 #else /* BYTE_ORDER != LITTLE_ENDIAN */
599 			partial = (uint8_t)*sbaddr;
600 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
601 			clen -= 1;
602 			len -= 1;
603 		}
604 		needs_swap = started_on_odd;
605 
606 		odd = clen & 1u;
607 		clen -= odd;
608 
609 		if (clen != 0) {
610 			partial = __packet_cksum(sbaddr,
611 			    clen, partial);
612 		}
613 
614 		if (__improbable(partial & 0xc0000000)) {
615 			if (needs_swap) {
616 				partial = (partial << 8) +
617 				    (partial >> 24);
618 			}
619 			sum += (partial >> 16);
620 			sum += (partial & 0xffff);
621 			partial = 0;
622 		}
623 		sbaddr += clen;
624 
625 		if (odd != 0) {
626 #if BYTE_ORDER == LITTLE_ENDIAN
627 			partial += (uint8_t)*sbaddr;
628 #else /* BYTE_ORDER != LITTLE_ENDIAN */
629 			partial += (uint8_t)*sbaddr << 8;
630 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
631 			started_on_odd = !started_on_odd;
632 		}
633 
634 		if (needs_swap) {
635 			partial = (partial << 8) + (partial >> 24);
636 		}
637 		sum += (partial >> 16) + (partial & 0xffff);
638 		/*
639 		 * Reduce sum to allow potential byte swap
640 		 * in the next iteration without carry.
641 		 */
642 		sum = (sum >> 16) + (sum & 0xffff);
643 
644 		sblen -= clen + odd;
645 		len -= clen + odd;
646 
647 		if (sblen == 0) {
648 			sbaddr = NULL;
649 		}
650 	}
651 
652 	/* Final fold (reduce 32-bit to 16-bit) */
653 	sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
654 	sum = (sum >> 16) + (sum & 0xffff);
655 	return (uint32_t)sum;
656 }
657 
658 
659 /*
660  * This is a multi-buflet variant of pkt_copy_from_pkt().
661  *
662  * start/stuff is relative to soff, within [0, len], such that
663  * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
664  */
665 void
pkt_copy_multi_buflet_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)666 pkt_copy_multi_buflet_from_pkt(const enum txrx t, kern_packet_t dph,
667     const uint16_t doff, kern_packet_t sph, const uint16_t soff,
668     const uint32_t len, const boolean_t copysum, const uint16_t start,
669     const uint16_t stuff, const boolean_t invert)
670 {
671 	boolean_t rc;
672 	uint32_t partial;
673 	uint16_t csum = 0;
674 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
675 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
676 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
677 
678 	VERIFY((doff + len) <= (PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp) *
679 	    __packet_get_buflet_count(dph)));
680 
681 	switch (t) {
682 	case NR_RX:
683 		dpkt->pkt_csum_flags = 0;
684 		if (__probable(do_sum)) {
685 			/*
686 			 * copy the portion up to the point where we need to
687 			 * start the checksum, and copy the remainder,
688 			 * checksumming as we go.
689 			 */
690 			if (__probable(start != 0)) {
691 				rc = _pkt_copypkt_sum(sph, soff, dph, doff,
692 				    start, NULL, FALSE);
693 				ASSERT(rc);
694 			}
695 			_pkt_copypkt_sum(sph, (soff + start), dph,
696 			    (doff + start), (len - start), &partial, TRUE);
697 			csum = __packet_fold_sum(partial);
698 			__packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
699 			    start, csum, FALSE);
700 			METADATA_ADJUST_LEN(dpkt, start, doff);
701 		} else {
702 			rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
703 			    FALSE);
704 			ASSERT(rc);
705 			dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
706 			dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
707 			dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
708 		}
709 		break;
710 
711 	case NR_TX:
712 		if (copysum) {
713 			uint8_t *baddr;
714 			/*
715 			 * copy the portion up to the point where we need to
716 			 * start the checksum, and copy the remainder,
717 			 * checksumming as we go.
718 			 */
719 			if (__probable(start != 0)) {
720 				rc = _pkt_copypkt_sum(sph, soff, dph, doff,
721 				    start, NULL, FALSE);
722 				ASSERT(rc);
723 			}
724 			rc = _pkt_copypkt_sum(sph, (soff + start), dph,
725 			    (doff + start), (len - start), &partial, TRUE);
726 			ASSERT(rc);
727 			csum = __packet_fold_sum_final(partial);
728 
729 			/* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
730 			if (csum == 0 && invert) {
731 				csum = 0xffff;
732 			}
733 
734 			/*
735 			 * Insert checksum into packet.
736 			 * Here we assume that checksum will be in the
737 			 * first buffer.
738 			 */
739 			ASSERT((stuff + doff + sizeof(csum)) <=
740 			    PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
741 			ASSERT(stuff <= (len - sizeof(csum)));
742 
743 			/* get first buflet buffer address from packet */
744 			MD_BUFLET_ADDR_ABS(dpkt, baddr);
745 			ASSERT(baddr != NULL);
746 			baddr += doff;
747 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
748 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
749 			} else {
750 				bcopy((void *)&csum, baddr + stuff,
751 				    sizeof(csum));
752 			}
753 			METADATA_ADJUST_LEN(dpkt, start, doff);
754 		} else {
755 			rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
756 			    FALSE);
757 			ASSERT(rc);
758 		}
759 		dpkt->pkt_csum_flags = spkt->pkt_csum_flags &
760 		    (PACKET_CSUM_TSO_FLAGS | PACKET_TX_CSUM_OFFLOAD_FLAGS);
761 		dpkt->pkt_csum_tx_start_off = 0;
762 		dpkt->pkt_csum_tx_stuff_off = 0;
763 
764 #if COPY_LOG
765 		SK_DF(SK_VERB_COPY | SK_VERB_TX,
766 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u, flags %u",
767 		    sk_proc_name(current_proc()), sk_proc_pid(current_proc()),
768 		    len, (copysum ? (len - start) : 0), csum, start,
769 		    dpkt->pkt_csum_flags);
770 #endif
771 		break;
772 
773 	default:
774 		VERIFY(0);
775 		/* NOTREACHED */
776 		__builtin_unreachable();
777 	}
778 }
779 
780 static inline uint32_t
_convert_mbuf_csum_flags(uint32_t mbuf_flags)781 _convert_mbuf_csum_flags(uint32_t mbuf_flags)
782 {
783 	uint32_t pkt_flags = 0;
784 
785 	if (mbuf_flags & CSUM_TCP) {
786 		pkt_flags |= PACKET_CSUM_TCP;
787 	}
788 	if (mbuf_flags & CSUM_TCPIPV6) {
789 		pkt_flags |= PACKET_CSUM_TCPIPV6;
790 	}
791 	if (mbuf_flags & CSUM_UDP) {
792 		pkt_flags |= PACKET_CSUM_UDP;
793 	}
794 	if (mbuf_flags & CSUM_UDPIPV6) {
795 		pkt_flags |= PACKET_CSUM_UDPIPV6;
796 	}
797 	if (mbuf_flags & CSUM_IP) {
798 		pkt_flags |= PACKET_CSUM_IP;
799 	}
800 	if (mbuf_flags & CSUM_ZERO_INVERT) {
801 		pkt_flags |= PACKET_CSUM_ZERO_INVERT;
802 	}
803 
804 	return pkt_flags;
805 }
806 
807 /*
808  * This routine is used for copying an mbuf which originated in the host
809  * stack destined to a native skywalk interface (NR_TX), as well as for
810  * mbufs originating on compat network interfaces (NR_RX).
811  *
812  * start/stuff is relative to moff, within [0, len], such that
813  * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
814  */
815 void
pkt_copy_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)816 pkt_copy_from_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
817     struct mbuf *m, const uint16_t moff, const uint32_t len,
818     const boolean_t copysum, const uint16_t start)
819 {
820 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
821 	struct m_tag *ts_tag = NULL;
822 	uint32_t partial;
823 	uint16_t csum = 0;
824 	uint16_t vlan = 0;
825 	uint8_t *baddr;
826 
827 	static_assert(sizeof(csum) == sizeof(uint16_t));
828 
829 	/* get buffer address from packet */
830 	MD_BUFLET_ADDR_ABS(pkt, baddr);
831 	ASSERT(baddr != NULL);
832 	baddr += poff;
833 	VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
834 
835 	switch (t) {
836 	case NR_RX:
837 		pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
838 		pkt->pkt_csum_rx_start_off = 0;
839 		pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
840 		pkt->pkt_svc_class = m_get_service_class(m);
841 		if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
842 		    != CSUM_RX_FULL_FLAGS) && copysum)) {
843 			/*
844 			 * Use m_copydata() to copy the portion up to the
845 			 * point where we need to start the checksum, and
846 			 * copy the remainder, checksumming as we go.
847 			 */
848 			if (start != 0) {
849 				m_copydata(m, moff, start, baddr);
850 			}
851 			partial = m_copydata_sum(m, start, (len - start),
852 			    (baddr + start), 0, NULL);
853 			csum = __packet_fold_sum(partial);
854 
855 			__packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
856 			    start, csum, FALSE);
857 		} else {
858 			m_copydata(m, moff, len, baddr);
859 		}
860 
861 		if (mbuf_get_vlan_tag(m, &vlan) == 0) {
862 			__packet_set_vlan_tag(ph, vlan);
863 		}
864 
865 #if COPY_LOG
866 		SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_RX, current_proc(),
867 		    "RX len %u, copy+sum %u (csum 0x%04x), start %u",
868 		    len, (copysum ? (len - start) : 0), csum, start);
869 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
870 		    "   mbuf %p csumf/rxstart/rxval 0x%x/%u/0x%04x",
871 		    SK_KVA(m), m->m_pkthdr.csum_flags,
872 		    (uint32_t)m->m_pkthdr.csum_rx_start,
873 		    (uint32_t)m->m_pkthdr.csum_rx_val);
874 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
875 		    "   pkt  %p poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
876 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
877 		    (uint32_t)pkt->pkt_csum_rx_start_off,
878 		    (uint32_t)pkt->pkt_csum_rx_value);
879 #endif
880 		break;
881 
882 	case NR_TX:
883 		if (copysum) {
884 			uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
885 			/*
886 			 * Use m_copydata() to copy the portion up to the
887 			 * point where we need to start the checksum, and
888 			 * copy the remainder, checksumming as we go.
889 			 */
890 			if (start != 0) {
891 				m_copydata(m, moff, start, baddr);
892 			}
893 			partial = m_copydata_sum(m, start, (len - start),
894 			    (baddr + start), 0, NULL);
895 			csum = __packet_fold_sum_final(partial);
896 
897 			/*
898 			 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
899 			 * ideally we'd only test for CSUM_ZERO_INVERT
900 			 * here, but catch cases where the originator
901 			 * did not set it for UDP.
902 			 */
903 			if (csum == 0 && (m->m_pkthdr.csum_flags &
904 			    (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
905 				csum = 0xffff;
906 			}
907 
908 			/* Insert checksum into packet */
909 			ASSERT(stuff <= (len - sizeof(csum)));
910 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
911 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
912 			} else {
913 				bcopy((void *)&csum, baddr + stuff,
914 				    sizeof(csum));
915 			}
916 		} else {
917 			m_copydata(m, moff, len, baddr);
918 		}
919 		pkt->pkt_csum_flags = 0;
920 		pkt->pkt_csum_tx_start_off = 0;
921 		pkt->pkt_csum_tx_stuff_off = 0;
922 
923 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
924 			pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV4;
925 			pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
926 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
927 		}
928 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
929 			pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV6;
930 			pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
931 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
932 		}
933 		if (!copysum) {
934 			pkt->pkt_csum_flags |= _convert_mbuf_csum_flags(m->m_pkthdr.csum_flags);
935 		}
936 
937 		/* translate mbuf metadata */
938 		pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
939 		pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
940 		pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
941 		pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
942 		switch (m->m_pkthdr.pkt_proto) {
943 		case IPPROTO_QUIC:
944 			pkt->pkt_flow_ip_proto = IPPROTO_UDP;
945 			pkt->pkt_transport_protocol = IPPROTO_QUIC;
946 			break;
947 
948 		default:
949 			pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
950 			pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
951 			break;
952 		}
953 		(void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
954 		pkt->pkt_svc_class = m_get_service_class(m);
955 		pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
956 		pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
957 		if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
958 			pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
959 		}
960 		if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_LPW) != 0) {
961 			pkt->pkt_pflags |= __PKT_F_LPW;
962 		}
963 		if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) != 0) {
964 			pkt->pkt_pflags |= PKT_F_L4S;
965 		}
966 		necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
967 		pkt->pkt_policy_id =
968 		    (uint32_t)necp_get_policy_id_from_packet(m);
969 		pkt->pkt_skip_policy_id =
970 		    (uint32_t)necp_get_skip_policy_id_from_packet(m);
971 
972 		if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
973 			if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
974 				__packet_set_tx_completion_data(ph,
975 				    m->m_pkthdr.drv_tx_compl_arg,
976 				    m->m_pkthdr.drv_tx_compl_data);
977 			}
978 			pkt->pkt_tx_compl_context =
979 			    m->m_pkthdr.pkt_compl_context;
980 			pkt->pkt_tx_compl_callbacks =
981 			    m->m_pkthdr.pkt_compl_callbacks;
982 			/*
983 			 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
984 			 * mbuf can no longer trigger a completion callback.
985 			 * callback will be invoked when the kernel packet is
986 			 * completed.
987 			 */
988 			m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
989 
990 			m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
991 		}
992 
993 		ts_tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM);
994 		if (ts_tag != NULL) {
995 			__packet_set_tx_timestamp(ph, *(uint64_t *)(ts_tag->m_tag_data));
996 		}
997 
998 		if (mbuf_get_vlan_tag(m, &vlan) == 0) {
999 			__packet_set_vlan_tag(ph, vlan);
1000 		}
1001 
1002 #if COPY_LOG
1003 		SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_TX, current_proc(),
1004 		    "TX len %u, copy+sum %u (csum 0x%04x), start %u",
1005 		    len, (copysum ? (len - start) : 0), csum, start);
1006 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1007 		    "   mbuf %p csumf/txstart/txstuff 0x%x/%u/%u",
1008 		    SK_KVA(m), m->m_pkthdr.csum_flags,
1009 		    (uint32_t)m->m_pkthdr.csum_tx_start,
1010 		    (uint32_t)m->m_pkthdr.csum_tx_stuff);
1011 #endif
1012 		break;
1013 
1014 	default:
1015 		VERIFY(0);
1016 		/* NOTREACHED */
1017 		__builtin_unreachable();
1018 	}
1019 	METADATA_ADJUST_LEN(pkt, len, poff);
1020 
1021 	if (m->m_flags & M_BCAST) {
1022 		__packet_set_link_broadcast(ph);
1023 	} else if (m->m_flags & M_MCAST) {
1024 		__packet_set_link_multicast(ph);
1025 	}
1026 
1027 #if COPY_LOG
1028 	SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, current_proc(), "%s %s",
1029 	    (t == NR_RX) ? "RX" : "TX", sk_dump("buf", baddr, len, 128));
1030 #endif
1031 }
1032 
1033 /*
1034  * Like m_copydata_sum(), but works on a destination kernel packet.
1035  */
1036 static inline uint32_t
m_copypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint32_t len,boolean_t do_cscum)1037 m_copypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
1038     uint32_t len, boolean_t do_cscum)
1039 {
1040 	boolean_t needs_swap, started_on_odd = FALSE;
1041 	int off0 = soff;
1042 	uint32_t len0 = len;
1043 	struct mbuf *m0 = m;
1044 	uint32_t sum = 0, partial;
1045 	unsigned count0, count, odd, mlen_copied;
1046 	uint8_t *sbaddr = NULL, *dbaddr = NULL;
1047 	uint16_t dbcnt = __packet_get_buflet_count(dph);
1048 	uint32_t dlim, dlen0;
1049 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
1050 	kern_buflet_t dbuf = NULL, dbufp = NULL;
1051 
1052 	while (soff > 0) {
1053 		if (__improbable(m == NULL)) {
1054 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
1055 			    __func__, m0, off0, len0);
1056 			/* NOTREACHED */
1057 			__builtin_unreachable();
1058 		}
1059 		if (soff < m->m_len) {
1060 			break;
1061 		}
1062 		soff -= m->m_len;
1063 		m = m->m_next;
1064 	}
1065 
1066 	if (__improbable(m == NULL)) {
1067 		panic("%s: invalid mbuf chain %p [off %d, len %d]",
1068 		    __func__, m0, off0, len0);
1069 		/* NOTREACHED */
1070 		__builtin_unreachable();
1071 	}
1072 
1073 	sbaddr = mtod(m, uint8_t *) + soff;
1074 	count = m->m_len - soff;
1075 	mlen_copied = 0;
1076 
1077 	while (len != 0) {
1078 		ASSERT(sbaddr == NULL || dbaddr == NULL);
1079 		if (sbaddr == NULL) {
1080 			soff = 0;
1081 			m = m->m_next;
1082 			if (__improbable(m == NULL)) {
1083 				panic("%s: invalid mbuf chain %p [off %d, "
1084 				    "len %d]", __func__, m0, off0, len0);
1085 				/* NOTREACHED */
1086 				__builtin_unreachable();
1087 			}
1088 			sbaddr = mtod(m, uint8_t *);
1089 			count = m->m_len;
1090 			mlen_copied = 0;
1091 		}
1092 
1093 		if (__improbable(count == 0)) {
1094 			sbaddr = NULL;
1095 			continue;
1096 		}
1097 
1098 		if (dbaddr == NULL) {
1099 			if (dbufp != NULL) {
1100 				__buflet_set_data_length(dbufp, dlen0);
1101 			}
1102 
1103 			PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
1104 			if (__improbable(dbuf == NULL)) {
1105 				panic("%s: mbuf too large %p [off %d, "
1106 				    "len %d]", __func__, m0, off0, len0);
1107 				/* NOTREACHED */
1108 				__builtin_unreachable();
1109 			}
1110 			dbufp = dbuf;
1111 			dlim = __buflet_get_data_limit(dbuf) - doff;
1112 			dbaddr = (uint8_t *)__buflet_get_data_address(dbuf) + doff;
1113 			dlen0 = dlim;
1114 			doff = 0;
1115 		}
1116 
1117 		count = MIN(count, (unsigned)len);
1118 		count0 = count = MIN(count, dlim);
1119 
1120 		if (!do_cscum) {
1121 			_pkt_copy(sbaddr, dbaddr, count);
1122 			sbaddr += count;
1123 			dbaddr += count;
1124 			goto skip_csum;
1125 		}
1126 
1127 		partial = 0;
1128 		if ((uintptr_t)sbaddr & 1) {
1129 			/* Align on word boundary */
1130 			started_on_odd = !started_on_odd;
1131 #if BYTE_ORDER == LITTLE_ENDIAN
1132 			partial = *sbaddr << 8;
1133 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1134 			partial = *sbaddr;
1135 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1136 			*dbaddr++ = *sbaddr++;
1137 			count -= 1;
1138 		}
1139 
1140 		needs_swap = started_on_odd;
1141 		odd = count & 1u;
1142 		count -= odd;
1143 
1144 		if (count) {
1145 			partial = __packet_copy_and_sum(sbaddr,
1146 			    dbaddr, count, partial);
1147 			sbaddr += count;
1148 			dbaddr += count;
1149 			if (__improbable(partial & 0xc0000000)) {
1150 				if (needs_swap) {
1151 					partial = (partial << 8) +
1152 					    (partial >> 24);
1153 				}
1154 				sum += (partial >> 16);
1155 				sum += (partial & 0xffff);
1156 				partial = 0;
1157 			}
1158 		}
1159 
1160 		if (odd) {
1161 #if BYTE_ORDER == LITTLE_ENDIAN
1162 			partial += *sbaddr;
1163 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1164 			partial += *sbaddr << 8;
1165 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1166 			*dbaddr++ = *sbaddr++;
1167 			started_on_odd = !started_on_odd;
1168 		}
1169 
1170 		if (needs_swap) {
1171 			partial = (partial << 8) + (partial >> 24);
1172 		}
1173 		sum += (partial >> 16) + (partial & 0xffff);
1174 		/*
1175 		 * Reduce sum to allow potential byte swap
1176 		 * in the next iteration without carry.
1177 		 */
1178 		sum = (sum >> 16) + (sum & 0xffff);
1179 
1180 skip_csum:
1181 		dlim -= count0;
1182 		len -= count0;
1183 		mlen_copied += count0;
1184 
1185 		if (dlim == 0) {
1186 			dbaddr = NULL;
1187 		}
1188 
1189 		count = m->m_len - soff - mlen_copied;
1190 		if (count == 0) {
1191 			sbaddr = NULL;
1192 		}
1193 	}
1194 
1195 	ASSERT(len == 0);
1196 	ASSERT(dbuf != NULL);
1197 	__buflet_set_data_length(dbuf, (dlen0 - dlim));
1198 
1199 	if (!do_cscum) {
1200 		return 0;
1201 	}
1202 
1203 	/* Final fold (reduce 32-bit to 16-bit) */
1204 	sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
1205 	sum = (sum >> 16) + (sum & 0xffff);
1206 	return sum;
1207 }
1208 
1209 /*
1210  * This is a multi-buflet variant of pkt_copy_from_mbuf().
1211  *
1212  * start/stuff is relative to moff, within [0, len], such that
1213  * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
1214  */
1215 void
pkt_copy_multi_buflet_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1216 pkt_copy_multi_buflet_from_mbuf(const enum txrx t, kern_packet_t ph,
1217     const uint16_t poff, struct mbuf *m, const uint16_t moff,
1218     const uint32_t len, const boolean_t copysum, const uint16_t start)
1219 {
1220 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1221 	struct m_tag *ts_tag = NULL;
1222 	uint32_t partial;
1223 	uint16_t csum = 0;
1224 	uint16_t vlan = 0;
1225 	uint8_t *baddr;
1226 
1227 	static_assert(sizeof(csum) == sizeof(uint16_t));
1228 
1229 	/* get buffer address from packet */
1230 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1231 	ASSERT(baddr != NULL);
1232 	baddr += poff;
1233 	VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp) *
1234 	    __packet_get_buflet_count(ph)));
1235 
1236 	switch (t) {
1237 	case NR_RX:
1238 		pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
1239 		pkt->pkt_csum_rx_start_off = 0;
1240 		pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
1241 		pkt->pkt_svc_class = m_get_service_class(m);
1242 		if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
1243 		    != CSUM_RX_FULL_FLAGS) && copysum)) {
1244 			/*
1245 			 * Use m_copydata() to copy the portion up to the
1246 			 * point where we need to start the checksum, and
1247 			 * copy the remainder, checksumming as we go.
1248 			 */
1249 			if (start != 0) {
1250 				m_copydata(m, moff, start, baddr);
1251 			}
1252 			partial = m_copypkt_sum(m, start, ph, (poff + start),
1253 			    (len - start), TRUE);
1254 			csum = __packet_fold_sum(partial);
1255 			__packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
1256 			    start, csum, FALSE);
1257 			METADATA_ADJUST_LEN(pkt, start, poff);
1258 		} else {
1259 			(void) m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1260 		}
1261 
1262 		if (mbuf_get_vlan_tag(m, &vlan) == 0) {
1263 			__packet_set_vlan_tag(ph, vlan);
1264 		}
1265 
1266 #if COPY_LOG
1267 		SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_RX, current_proc(),
1268 		    "RX len %u, copy+sum %u (csum 0x%04x), start %u",
1269 		    len, (copysum ? (len - start) : 0), csum, start);
1270 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1271 		    "   mbuf %p csumf/rxstart/rxval 0x%x/%u/0x%04x",
1272 		    SK_KVA(m), m->m_pkthdr.csum_flags,
1273 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1274 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1275 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1276 		    "   pkt  %p poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1277 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1278 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1279 		    (uint32_t)pkt->pkt_csum_rx_value);
1280 #endif
1281 		break;
1282 
1283 	case NR_TX:
1284 		if (copysum) {
1285 			uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
1286 			/*
1287 			 * Use m_copydata() to copy the portion up to the
1288 			 * point where we need to start the checksum, and
1289 			 * copy the remainder, checksumming as we go.
1290 			 */
1291 			if (start != 0) {
1292 				m_copydata(m, moff, start, baddr);
1293 			}
1294 			partial = m_copypkt_sum(m, start, ph, (poff + start),
1295 			    (len - start), TRUE);
1296 			csum = __packet_fold_sum_final(partial);
1297 
1298 			/*
1299 			 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
1300 			 * ideally we'd only test for CSUM_ZERO_INVERT
1301 			 * here, but catch cases where the originator
1302 			 * did not set it for UDP.
1303 			 */
1304 			if (csum == 0 && (m->m_pkthdr.csum_flags &
1305 			    (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
1306 				csum = 0xffff;
1307 			}
1308 
1309 			/* Insert checksum into packet */
1310 			ASSERT(stuff <= (len - sizeof(csum)));
1311 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
1312 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
1313 			} else {
1314 				bcopy((void *)&csum, baddr + stuff,
1315 				    sizeof(csum));
1316 			}
1317 			METADATA_ADJUST_LEN(pkt, start, poff);
1318 		} else {
1319 			m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1320 		}
1321 		pkt->pkt_csum_flags = 0;
1322 		pkt->pkt_csum_tx_start_off = 0;
1323 		pkt->pkt_csum_tx_stuff_off = 0;
1324 
1325 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
1326 			pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV4;
1327 			pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
1328 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
1329 		}
1330 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
1331 			pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV6;
1332 			pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
1333 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
1334 		}
1335 		if (!copysum) {
1336 			pkt->pkt_csum_flags |= _convert_mbuf_csum_flags(m->m_pkthdr.csum_flags);
1337 		}
1338 
1339 		/* translate mbuf metadata */
1340 		pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
1341 		pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
1342 		pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
1343 		pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
1344 		switch (m->m_pkthdr.pkt_proto) {
1345 		case IPPROTO_QUIC:
1346 			pkt->pkt_flow_ip_proto = IPPROTO_UDP;
1347 			pkt->pkt_transport_protocol = IPPROTO_QUIC;
1348 			break;
1349 
1350 		default:
1351 			pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
1352 			pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
1353 			break;
1354 		}
1355 		(void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
1356 		pkt->pkt_svc_class = m_get_service_class(m);
1357 		pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
1358 		pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
1359 		if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
1360 			pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
1361 		}
1362 		if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_LPW) != 0) {
1363 			pkt->pkt_pflags |= __PKT_F_LPW;
1364 		}
1365 		if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) != 0) {
1366 			pkt->pkt_pflags |= PKT_F_L4S;
1367 		}
1368 		necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
1369 		pkt->pkt_policy_id =
1370 		    (uint32_t)necp_get_policy_id_from_packet(m);
1371 		pkt->pkt_skip_policy_id =
1372 		    (uint32_t)necp_get_skip_policy_id_from_packet(m);
1373 
1374 		if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
1375 			if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
1376 				__packet_set_tx_completion_data(ph,
1377 				    m->m_pkthdr.drv_tx_compl_arg,
1378 				    m->m_pkthdr.drv_tx_compl_data);
1379 			}
1380 			pkt->pkt_tx_compl_context =
1381 			    m->m_pkthdr.pkt_compl_context;
1382 			pkt->pkt_tx_compl_callbacks =
1383 			    m->m_pkthdr.pkt_compl_callbacks;
1384 			/*
1385 			 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
1386 			 * mbuf can no longer trigger a completion callback.
1387 			 * callback will be invoked when the kernel packet is
1388 			 * completed.
1389 			 */
1390 			m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
1391 
1392 			m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
1393 		}
1394 
1395 		ts_tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM);
1396 		if (ts_tag != NULL) {
1397 			__packet_set_tx_timestamp(ph, *(uint64_t *)(ts_tag->m_tag_data));
1398 		}
1399 
1400 		if (mbuf_get_vlan_tag(m, &vlan) == 0) {
1401 			__packet_set_vlan_tag(ph, vlan);
1402 		}
1403 
1404 #if COPY_LOG
1405 		SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_TX, current_proc(),
1406 		    "TX len %u, copy+sum %u (csum 0x%04x), start %u",
1407 		    len, (copysum ? (len - start) : 0), csum, start);
1408 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1409 		    "   mbuf %p csumf/txstart/txstuff 0x%x/%u/%u",
1410 		    SK_KVA(m), m->m_pkthdr.csum_flags,
1411 		    (uint32_t)m->m_pkthdr.csum_tx_start,
1412 		    (uint32_t)m->m_pkthdr.csum_tx_stuff);
1413 #endif
1414 		break;
1415 
1416 	default:
1417 		VERIFY(0);
1418 		/* NOTREACHED */
1419 		__builtin_unreachable();
1420 	}
1421 
1422 	if (m->m_flags & M_BCAST) {
1423 		__packet_set_link_broadcast(ph);
1424 	} else if (m->m_flags & M_MCAST) {
1425 		__packet_set_link_multicast(ph);
1426 	}
1427 
1428 #if COPY_LOG
1429 	SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, current_proc(), "%s %s",
1430 	    (t == NR_RX) ? "RX" : "TX", sk_dump("buf", baddr, len, 128));
1431 #endif
1432 }
1433 
1434 static inline uint32_t
_convert_pkt_csum_flags(uint32_t pkt_flags)1435 _convert_pkt_csum_flags(uint32_t pkt_flags)
1436 {
1437 	uint32_t mbuf_flags = 0;
1438 	if (pkt_flags & PACKET_CSUM_TCP) {
1439 		mbuf_flags |= CSUM_TCP;
1440 	}
1441 	if (pkt_flags & PACKET_CSUM_TCPIPV6) {
1442 		mbuf_flags |= CSUM_TCPIPV6;
1443 	}
1444 	if (pkt_flags & PACKET_CSUM_UDP) {
1445 		mbuf_flags |= CSUM_UDP;
1446 	}
1447 	if (pkt_flags & PACKET_CSUM_UDPIPV6) {
1448 		mbuf_flags |= CSUM_UDPIPV6;
1449 	}
1450 	if (pkt_flags & PACKET_CSUM_IP) {
1451 		mbuf_flags |= CSUM_IP;
1452 	}
1453 	if (pkt_flags & PACKET_CSUM_ZERO_INVERT) {
1454 		mbuf_flags |= CSUM_ZERO_INVERT;
1455 	}
1456 	if (pkt_flags & PACKET_CSUM_TSO_IPV4) {
1457 		mbuf_flags |= CSUM_TSO_IPV4;
1458 	}
1459 	if (pkt_flags & PACKET_CSUM_TSO_IPV6) {
1460 		mbuf_flags |= CSUM_TSO_IPV6;
1461 	}
1462 
1463 	return mbuf_flags;
1464 }
1465 
1466 /*
1467  * This routine is used for copying from a packet originating from a native
1468  * skywalk interface to an mbuf destined for the host legacy stack (NR_RX),
1469  * as well as for mbufs destined for the compat network interfaces (NR_TX).
1470  *
1471  * We do adjust the length to reflect the total data span.
1472  *
1473  * This routine supports copying into an mbuf chain for RX but not TX.
1474  *
1475  * start/stuff is relative to poff, within [0, len], such that
1476  * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1477  */
1478 void
pkt_copy_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1479 pkt_copy_to_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
1480     struct mbuf *m, const uint16_t moff, const uint32_t len,
1481     const boolean_t copysum, const uint16_t start)
1482 {
1483 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1484 	struct mbuf *curr_m;
1485 	uint32_t partial = 0;
1486 	uint32_t remaining_len = len, copied_len = 0;
1487 	uint16_t csum = 0;
1488 	uint16_t vlan = 0;
1489 	uint8_t *baddr;
1490 	uint8_t *dp;
1491 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1492 
1493 	ASSERT(len >= start);
1494 	static_assert(sizeof(csum) == sizeof(uint16_t));
1495 
1496 	/* get buffer address from packet */
1497 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1498 	ASSERT(baddr != NULL);
1499 	baddr += poff;
1500 	VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
1501 
1502 	ASSERT((m->m_flags & M_PKTHDR));
1503 	m->m_data += moff;
1504 
1505 	switch (t) {
1506 	case NR_RX:
1507 		m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1508 
1509 		/*
1510 		 * Use pkt_copy() to copy the portion up to the
1511 		 * point where we need to start the checksum, and
1512 		 * copy the remainder, checksumming as we go.
1513 		 */
1514 		if (__probable(do_sum && start != 0)) {
1515 			ASSERT(M_TRAILINGSPACE(m) >= start);
1516 			ASSERT(m->m_len == 0);
1517 			dp = (uint8_t *)m_mtod_current(m);
1518 			_pkt_copy(baddr, dp, start);
1519 			remaining_len -= start;
1520 			copied_len += start;
1521 			m->m_len += start;
1522 			m->m_pkthdr.len += start;
1523 		}
1524 		curr_m = m;
1525 		while (curr_m != NULL && remaining_len != 0) {
1526 			uint32_t tmp_len = MIN(remaining_len,
1527 			    (uint32_t)M_TRAILINGSPACE(curr_m));
1528 			dp = (uint8_t *)m_mtod_end(curr_m);
1529 			if (__probable(do_sum)) {
1530 				partial = __packet_copy_and_sum((baddr + copied_len),
1531 				    dp, tmp_len, partial);
1532 			} else {
1533 				_pkt_copy((baddr + copied_len), dp, tmp_len);
1534 			}
1535 
1536 			curr_m->m_len += tmp_len;
1537 			m->m_pkthdr.len += tmp_len;
1538 			copied_len += tmp_len;
1539 			remaining_len -= tmp_len;
1540 			curr_m = curr_m->m_next;
1541 		}
1542 		ASSERT(remaining_len == 0);
1543 
1544 		if (__probable(do_sum)) {
1545 			csum = __packet_fold_sum(partial);
1546 
1547 			m->m_pkthdr.csum_flags |=
1548 			    (CSUM_DATA_VALID | CSUM_PARTIAL);
1549 			m->m_pkthdr.csum_rx_start = start;
1550 			m->m_pkthdr.csum_rx_val = csum;
1551 		} else {
1552 			m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1553 			m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1554 			static_assert(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1555 			m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1556 			if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1557 				m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1558 			}
1559 		}
1560 
1561 		/* translate packet metadata */
1562 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1563 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1564 
1565 		m->m_pkthdr.rx_seg_cnt = pkt->pkt_seg_cnt;
1566 
1567 		if (__packet_get_vlan_tag(ph, &vlan) == 0) {
1568 			mbuf_set_vlan_tag(m, vlan);
1569 		}
1570 
1571 #if COPY_LOG
1572 		SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_RX, current_proc(),
1573 		    "RX len %u, copy+sum %u (csum 0x%04x), start %u",
1574 		    len, (copysum ? (len - start) : 0), csum, start);
1575 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1576 		    "   mbuf %p moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1577 		    SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1578 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1579 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1580 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1581 		    "   pkt  %p poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1582 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1583 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1584 		    (uint32_t)pkt->pkt_csum_rx_value);
1585 #endif
1586 		break;
1587 
1588 	case NR_TX:
1589 		dp = (uint8_t *)m_mtod_current(m);
1590 		ASSERT(m->m_next == NULL);
1591 
1592 		VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1593 		    (uint32_t)mbuf_maxlen(m));
1594 		m->m_len += len;
1595 		m->m_pkthdr.len += len;
1596 		VERIFY(m->m_len == m->m_pkthdr.len &&
1597 		    (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1598 
1599 		if (copysum) {
1600 			uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1601 			/*
1602 			 * Use pkt_copy() to copy the portion up to the
1603 			 * point where we need to start the checksum, and
1604 			 * copy the remainder, checksumming as we go.
1605 			 */
1606 			if (__probable(start != 0)) {
1607 				_pkt_copy(baddr, dp, start);
1608 			}
1609 			partial = __packet_copy_and_sum((baddr + start),
1610 			    (dp + start), (len - start), 0);
1611 			csum = __packet_fold_sum_final(partial);
1612 
1613 			/* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1614 			if (csum == 0 &&
1615 			    (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1616 				csum = 0xffff;
1617 			}
1618 
1619 			/* Insert checksum into packet */
1620 			ASSERT(stuff <= (len - sizeof(csum)));
1621 			if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1622 				*(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1623 			} else {
1624 				bcopy((void *)&csum, dp + stuff, sizeof(csum));
1625 			}
1626 		} else {
1627 			_pkt_copy(baddr, dp, len);
1628 		}
1629 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1630 		m->m_pkthdr.csum_tx_start = 0;
1631 		m->m_pkthdr.csum_tx_stuff = 0;
1632 		m->m_pkthdr.csum_flags |= _convert_pkt_csum_flags(pkt->pkt_csum_flags);
1633 
1634 		/* translate packet metadata */
1635 		m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1636 		m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1637 		m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1638 		m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1639 		m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1640 		m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1641 		m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1642 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1643 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1644 		m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1645 		m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1646 		if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1647 			m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1648 		}
1649 		if ((pkt->pkt_pflags & __PKT_F_LPW) != 0) {
1650 			m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_LPW;
1651 		}
1652 		if ((pkt->pkt_pflags & PKT_F_L4S) != 0) {
1653 			m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_L4S;
1654 		}
1655 		if ((pkt->pkt_pflags & PKT_F_OPT_TX_TIMESTAMP) != 0) {
1656 			struct m_tag *tag = NULL;
1657 			tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM,
1658 			    sizeof(uint64_t), M_WAITOK, m);
1659 			if (tag != NULL) {
1660 				m_tag_prepend(m, tag);
1661 				*(uint64_t *)tag->m_tag_data = pkt->pkt_com_opt->__po_pkt_tx_time;
1662 			}
1663 		}
1664 		m->m_pkthdr.necp_mtag.necp_policy_id = pkt->pkt_policy_id;
1665 		m->m_pkthdr.necp_mtag.necp_skip_policy_id = pkt->pkt_skip_policy_id;
1666 
1667 		if (__packet_get_vlan_tag(ph, &vlan) == 0) {
1668 			mbuf_set_vlan_tag(m, vlan);
1669 		}
1670 
1671 #if COPY_LOG
1672 		SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_TX, current_proc(),
1673 		    "TX len %u, copy+sum %u (csum 0x%04x), start %u",
1674 		    len, (copysum ? (len - start) : 0), csum, start);
1675 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1676 		    "   pkt  %p poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1677 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1678 		    (uint32_t)pkt->pkt_csum_tx_start_off,
1679 		    (uint32_t)pkt->pkt_csum_tx_stuff_off);
1680 #endif
1681 		break;
1682 
1683 	default:
1684 		VERIFY(0);
1685 		/* NOTREACHED */
1686 		__builtin_unreachable();
1687 	}
1688 
1689 	if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1690 		m->m_flags |= M_BCAST;
1691 	} else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1692 		m->m_flags |= M_MCAST;
1693 	}
1694 #if COPY_LOG
1695 	SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, current_proc(), "%s %s",
1696 	    (t == NR_RX) ? "RX" : "TX",
1697 	    sk_dump("buf", (uint8_t *)dp, m->m_len, 128));
1698 #endif
1699 }
1700 
1701 /*
1702  * This is a multi-buflet variant of pkt_copy_to_mbuf().
1703  * NOTE: poff is the offset within the packet.
1704  *
1705  * This routine supports copying into an mbuf chain for RX but not TX.
1706  *
1707  * start/stuff is relative to poff, within [0, len], such that
1708  * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1709  */
1710 void
pkt_copy_multi_buflet_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1711 pkt_copy_multi_buflet_to_mbuf(const enum txrx t, kern_packet_t ph,
1712     const uint16_t poff, struct mbuf *m, const uint16_t moff,
1713     const uint32_t len, const boolean_t copysum, const uint16_t start)
1714 {
1715 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1716 	struct mbuf *curr_m;
1717 	uint32_t partial = 0;
1718 	uint32_t remaining_len = len, copied_len = 0;
1719 	uint16_t csum = 0;
1720 	uint16_t vlan = 0;
1721 	uint8_t *baddr;
1722 	uint8_t *dp;
1723 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1724 
1725 	ASSERT(len >= start);
1726 	static_assert(sizeof(csum) == sizeof(uint16_t));
1727 
1728 	/* get buffer address from packet */
1729 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1730 	ASSERT(baddr != NULL);
1731 	baddr += poff;
1732 
1733 	ASSERT((m->m_flags & M_PKTHDR));
1734 	m->m_data += moff;
1735 
1736 	switch (t) {
1737 	case NR_RX:
1738 		m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1739 		if (__probable(do_sum && start != 0)) {
1740 			ASSERT(M_TRAILINGSPACE(m) >= start);
1741 			ASSERT(m->m_len == 0);
1742 			dp = (uint8_t *)m_mtod_current(m);
1743 			_pkt_copy(baddr, dp, start);
1744 			remaining_len -= start;
1745 			copied_len += start;
1746 			m->m_len += start;
1747 			m->m_pkthdr.len += start;
1748 		}
1749 		curr_m = m;
1750 		while (curr_m != NULL && remaining_len != 0) {
1751 			uint32_t tmp_len = MIN(remaining_len,
1752 			    (uint32_t)M_TRAILINGSPACE(curr_m));
1753 			uint16_t soff = poff + (uint16_t)copied_len;
1754 			dp = (uint8_t *)m_mtod_end(curr_m);
1755 
1756 			if (__probable(do_sum)) {
1757 				partial = _pkt_copyaddr_sum(ph, soff,
1758 				    dp, tmp_len, TRUE, partial, NULL);
1759 			} else {
1760 				pkt_copyaddr_sum(ph, soff,
1761 				    dp, tmp_len, FALSE, 0, NULL);
1762 			}
1763 
1764 			curr_m->m_len += tmp_len;
1765 			m->m_pkthdr.len += tmp_len;
1766 			copied_len += tmp_len;
1767 			remaining_len -= tmp_len;
1768 			curr_m = curr_m->m_next;
1769 		}
1770 		ASSERT(remaining_len == 0);
1771 
1772 		if (__probable(do_sum)) {
1773 			csum = __packet_fold_sum(partial);
1774 
1775 			m->m_pkthdr.csum_flags |=
1776 			    (CSUM_DATA_VALID | CSUM_PARTIAL);
1777 			m->m_pkthdr.csum_rx_start = start;
1778 			m->m_pkthdr.csum_rx_val = csum;
1779 		} else {
1780 			m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1781 			m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1782 			static_assert(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1783 			m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1784 			if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1785 				m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1786 			}
1787 		}
1788 
1789 		m->m_pkthdr.necp_mtag.necp_policy_id = pkt->pkt_policy_id;
1790 		m->m_pkthdr.necp_mtag.necp_skip_policy_id = pkt->pkt_skip_policy_id;
1791 
1792 		/* translate packet metadata */
1793 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1794 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1795 
1796 		m->m_pkthdr.rx_seg_cnt = pkt->pkt_seg_cnt;
1797 
1798 		if (__packet_get_vlan_tag(ph, &vlan) == 0) {
1799 			mbuf_set_vlan_tag(m, vlan);
1800 		}
1801 
1802 #if COPY_LOG
1803 		SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_RX, current_proc(),
1804 		    "RX len %u, copy+sum %u (csum 0x%04x), start %u",
1805 		    len, (copysum ? (len - start) : 0), csum, start);
1806 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1807 		    "   mbuf %p moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1808 		    SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1809 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1810 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1811 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1812 		    "   pkt  %p poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1813 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1814 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1815 		    (uint32_t)pkt->pkt_csum_rx_value);
1816 #endif
1817 		break;
1818 	case NR_TX:
1819 		ASSERT(len <= M16KCLBYTES);
1820 		dp = (uint8_t *)m_mtod_current(m);
1821 		ASSERT(m->m_next == NULL);
1822 		VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1823 		    (uint32_t)mbuf_maxlen(m));
1824 		m->m_len += len;
1825 		m->m_pkthdr.len += len;
1826 		VERIFY(m->m_len == m->m_pkthdr.len &&
1827 		    (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1828 		if (copysum) {
1829 			uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1830 			/*
1831 			 * Use pkt_copy() to copy the portion up to the
1832 			 * point where we need to start the checksum, and
1833 			 * copy the remainder, checksumming as we go.
1834 			 */
1835 			if (__probable(start != 0)) {
1836 				_pkt_copy(baddr, dp, start);
1837 			}
1838 			partial = _pkt_copyaddr_sum(ph, (poff + start),
1839 			    (dp + start), (len - start), TRUE, 0, NULL);
1840 			csum = __packet_fold_sum_final(partial);
1841 
1842 			/* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1843 			if (csum == 0 &&
1844 			    (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1845 				csum = 0xffff;
1846 			}
1847 
1848 			/* Insert checksum into packet */
1849 			ASSERT(stuff <= (len - sizeof(csum)));
1850 			if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1851 				*(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1852 			} else {
1853 				bcopy((void *)&csum, dp + stuff, sizeof(csum));
1854 			}
1855 		} else {
1856 			(void) _pkt_copyaddr_sum(ph, poff, dp, len, FALSE, 0, NULL);
1857 		}
1858 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1859 		m->m_pkthdr.csum_tx_start = 0;
1860 		m->m_pkthdr.csum_tx_stuff = 0;
1861 		m->m_pkthdr.csum_flags |= _convert_pkt_csum_flags(pkt->pkt_csum_flags);
1862 
1863 		/* translate packet metadata */
1864 		m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1865 		m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1866 		m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1867 		m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1868 		m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1869 		m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1870 		m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1871 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1872 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1873 		m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1874 		m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1875 		if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1876 			m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1877 		}
1878 		if ((pkt->pkt_pflags & __PKT_F_LPW) != 0) {
1879 			m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_LPW;
1880 		}
1881 		if ((pkt->pkt_pflags & PKT_F_L4S) != 0) {
1882 			m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_L4S;
1883 		}
1884 		if ((pkt->pkt_pflags & PKT_F_OPT_TX_TIMESTAMP) != 0) {
1885 			struct m_tag *tag = NULL;
1886 			tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM,
1887 			    sizeof(uint64_t), M_WAITOK, m);
1888 			if (tag != NULL) {
1889 				m_tag_prepend(m, tag);
1890 				*(uint64_t *)tag->m_tag_data = pkt->pkt_com_opt->__po_pkt_tx_time;
1891 			}
1892 		}
1893 
1894 		if (__packet_get_vlan_tag(ph, &vlan) == 0) {
1895 			mbuf_set_vlan_tag(m, vlan);
1896 		}
1897 
1898 #if COPY_LOG
1899 		SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_TX, current_proc(),
1900 		    "TX len %u, copy+sum %u (csum 0x%04x), start %u",
1901 		    len, (copysum ? (len - start) : 0), csum, start);
1902 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1903 		    "   pkt  %p poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1904 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1905 		    (uint32_t)pkt->pkt_csum_tx_start_off,
1906 		    (uint32_t)pkt->pkt_csum_tx_stuff_off);
1907 #endif
1908 		break;
1909 
1910 	default:
1911 		VERIFY(0);
1912 		/* NOTREACHED */
1913 		__builtin_unreachable();
1914 	}
1915 
1916 	if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1917 		m->m_flags |= M_BCAST;
1918 	} else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1919 		m->m_flags |= M_MCAST;
1920 	}
1921 #if COPY_LOG
1922 	SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, current_proc(), "%s %s",
1923 	    (t == NR_RX) ? "RX" : "TX",
1924 	    sk_dump("buf", (uint8_t *)dp, m->m_len, 128));
1925 #endif
1926 }
1927 
1928 /*
1929  * Like m_copydata(), but computes 16-bit sum as the data is copied.
1930  * Caller can provide an initial sum to be folded into the computed
1931  * sum.  The accumulated partial sum (32-bit) is returned to caller;
1932  * caller is responsible for further reducing it to 16-bit if needed,
1933  * as well as to perform the final 1's complement on it.
1934  */
1935 uint32_t
m_copydata_sum(struct mbuf * m,int off,int len,void * __sized_by (len)vp,uint32_t initial_sum,boolean_t * odd_start)1936 m_copydata_sum(struct mbuf *m, int off, int len, void *__sized_by(len)vp, uint32_t initial_sum,
1937     boolean_t *odd_start)
1938 {
1939 	boolean_t needs_swap, started_on_odd = FALSE;
1940 	int off0 = off, len0 = len;
1941 	struct mbuf *m0 = m;
1942 	uint64_t sum, partial;
1943 	unsigned count, odd;
1944 	char *cp = vp;
1945 
1946 	if (__improbable(off < 0 || len < 0)) {
1947 		panic("%s: invalid offset %d or len %d", __func__, off, len);
1948 		/* NOTREACHED */
1949 		__builtin_unreachable();
1950 	}
1951 
1952 	while (off > 0) {
1953 		if (__improbable(m == NULL)) {
1954 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
1955 			    __func__, m0, off0, len0);
1956 			/* NOTREACHED */
1957 			__builtin_unreachable();
1958 		}
1959 		if (off < m->m_len) {
1960 			break;
1961 		}
1962 		off -= m->m_len;
1963 		m = m->m_next;
1964 	}
1965 
1966 	if (odd_start) {
1967 		started_on_odd = *odd_start;
1968 	}
1969 	sum = initial_sum;
1970 
1971 	for (; len0 > 0; m = m->m_next) {
1972 		uint8_t *datap;
1973 
1974 		if (__improbable(m == NULL)) {
1975 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
1976 			    __func__, m0, off0, len);
1977 			/* NOTREACHED */
1978 			__builtin_unreachable();
1979 		}
1980 
1981 		datap = mtod(m, uint8_t *) + off;
1982 		count = m->m_len;
1983 
1984 		if (__improbable(count == 0)) {
1985 			continue;
1986 		}
1987 
1988 		count = MIN(count - off, (unsigned)len0);
1989 		partial = 0;
1990 
1991 		if ((uintptr_t)datap & 1) {
1992 			/* Align on word boundary */
1993 			started_on_odd = !started_on_odd;
1994 #if BYTE_ORDER == LITTLE_ENDIAN
1995 			partial = *datap << 8;
1996 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1997 			partial = *datap;
1998 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1999 			*cp++ = *datap++;
2000 			count -= 1;
2001 			len0 -= 1;
2002 		}
2003 
2004 		needs_swap = started_on_odd;
2005 		odd = count & 1u;
2006 		count -= odd;
2007 
2008 		if (count) {
2009 			partial = __packet_copy_and_sum(datap,
2010 			    cp, count, (uint32_t)partial);
2011 			datap += count;
2012 			cp += count;
2013 			len0 -= count;
2014 			if (__improbable((partial & (3ULL << 62)) != 0)) {
2015 				if (needs_swap) {
2016 					partial = (partial << 8) +
2017 					    (partial >> 56);
2018 				}
2019 				sum += (partial >> 32);
2020 				sum += (partial & 0xffffffff);
2021 				partial = 0;
2022 			}
2023 		}
2024 
2025 		if (odd) {
2026 #if BYTE_ORDER == LITTLE_ENDIAN
2027 			partial += *datap;
2028 #else /* BYTE_ORDER != LITTLE_ENDIAN */
2029 			partial += *datap << 8;
2030 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
2031 			*cp++ = *datap++;
2032 			len0 -= 1;
2033 			started_on_odd = !started_on_odd;
2034 		}
2035 		off = 0;
2036 
2037 		if (needs_swap) {
2038 			partial = (partial << 8) + (partial >> 24);
2039 		}
2040 		sum += (partial >> 32) + (partial & 0xffffffff);
2041 		/*
2042 		 * Reduce sum to allow potential byte swap
2043 		 * in the next iteration without carry.
2044 		 */
2045 		sum = (sum >> 32) + (sum & 0xffffffff);
2046 	}
2047 
2048 	if (odd_start) {
2049 		*odd_start = started_on_odd;
2050 	}
2051 
2052 	/* Final fold (reduce 64-bit to 32-bit) */
2053 	sum = (sum >> 32) + (sum & 0xffffffff); /* 33-bit */
2054 	sum = (sum >> 16) + (sum & 0xffff);     /* 17-bit + carry */
2055 
2056 	/* return 32-bit partial sum to caller */
2057 	return (uint32_t)sum;
2058 }
2059 
2060 #if DEBUG || DEVELOPMENT
2061 #define TRAILERS_MAX    16              /* max trailing bytes */
2062 #define TRAILERS_REGEN  (64 * 1024)     /* regeneration threshold */
2063 static uint8_t tb[TRAILERS_MAX];        /* random trailing bytes */
2064 static uint32_t regen = TRAILERS_REGEN; /* regeneration counter */
2065 
2066 uint32_t
pkt_add_trailers(kern_packet_t ph,const uint32_t len,const uint16_t start)2067 pkt_add_trailers(kern_packet_t ph, const uint32_t len, const uint16_t start)
2068 {
2069 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
2070 	uint32_t extra;
2071 	uint8_t *baddr;
2072 
2073 	/* get buffer address from packet */
2074 	MD_BUFLET_ADDR_ABS(pkt, baddr);
2075 	ASSERT(baddr != NULL);
2076 	ASSERT(len <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
2077 
2078 	extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
2079 	if (extra == 0 || extra > sizeof(tb) ||
2080 	    (len + extra) > PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp)) {
2081 		return 0;
2082 	}
2083 
2084 	/* generate random bytes once per TRAILERS_REGEN packets (approx.) */
2085 	if (regen++ == TRAILERS_REGEN) {
2086 		read_frandom(&tb[0], sizeof(tb));
2087 		regen = 0;
2088 	}
2089 
2090 	bcopy(&tb[0], (baddr + len), extra);
2091 
2092 	/* recompute partial sum (also to exercise related logic) */
2093 	pkt->pkt_csum_flags |= PACKET_CSUM_PARTIAL;
2094 	pkt->pkt_csum_rx_value = (uint16_t)__packet_cksum((baddr + start),
2095 	    ((len + extra) - start), 0);
2096 	pkt->pkt_csum_rx_start_off = start;
2097 
2098 	return extra;
2099 }
2100 
2101 uint32_t
pkt_add_trailers_mbuf(struct mbuf * m,const uint16_t start)2102 pkt_add_trailers_mbuf(struct mbuf *m, const uint16_t start)
2103 {
2104 	uint32_t extra;
2105 
2106 	extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
2107 	if (extra == 0 || extra > sizeof(tb)) {
2108 		return 0;
2109 	}
2110 
2111 	if (mbuf_copyback(m, m_pktlen(m), extra, &tb[0], M_NOWAIT) != 0) {
2112 		return 0;
2113 	}
2114 
2115 	/* generate random bytes once per TRAILERS_REGEN packets (approx.) */
2116 	if (regen++ == TRAILERS_REGEN) {
2117 		read_frandom(&tb[0], sizeof(tb));
2118 		regen = 0;
2119 	}
2120 
2121 	/* recompute partial sum (also to exercise related logic) */
2122 	m->m_pkthdr.csum_rx_val = m_sum16(m, start, (m_pktlen(m) - start));
2123 	m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
2124 	m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
2125 	m->m_pkthdr.csum_rx_start = start;
2126 
2127 	return extra;
2128 }
2129 #endif /* DEBUG || DEVELOPMENT */
2130 
2131 void
pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint16_t len,uint32_t * partial,boolean_t do_csum)2132 pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
2133     uint16_t doff, uint16_t len, uint32_t *partial, boolean_t do_csum)
2134 {
2135 	VERIFY(_pkt_copypkt_sum(sph, soff, dph, doff, len, partial, do_csum));
2136 }
2137 
2138 uint32_t
pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * __sized_by (len)dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)2139 pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *__sized_by(len)dbaddr,
2140     uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
2141 {
2142 	return _pkt_copyaddr_sum(sph, soff, dbaddr, len, do_csum, initial_sum, odd_start);
2143 }
2144 
2145 uint32_t
pkt_mcopypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint16_t len,boolean_t do_cscum)2146 pkt_mcopypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
2147     uint16_t len, boolean_t do_cscum)
2148 {
2149 	return m_copypkt_sum(m, soff, dph, doff, len, do_cscum);
2150 }
2151 
2152 void
pkt_copy(void * __sized_by (len)src,void * __sized_by (len)dst,size_t len)2153 pkt_copy(void *__sized_by(len)src, void *__sized_by(len)dst, size_t len)
2154 {
2155 	return _pkt_copy(src, dst, len);
2156 }
2157