xref: /xnu-12377.81.4/bsd/skywalk/packet/packet_copy.c (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 /*
2  * Copyright (c) 2017-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 #include <machine/endian.h>
31 #include <net/necp.h>
32 
33 #if (DEVELOPMENT || DEBUG)
34 
35 /* per-packet logging is wasteful in release */
36 #define COPY_LOG 1
37 
38 SYSCTL_NODE(_kern_skywalk, OID_AUTO, packet,
39     CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Skywalk packet");
40 int pkt_trailers = 0; /* for testing trailing bytes */
41 SYSCTL_INT(_kern_skywalk_packet, OID_AUTO, trailers,
42     CTLFLAG_RW | CTLFLAG_LOCKED, &pkt_trailers, 0, "");
43 #endif /* !DEVELOPMENT && !DEBUG */
44 
45 
46 __attribute__((always_inline))
47 static inline void
_pkt_copy(void * __sized_by (len)src,void * __sized_by (len)dst,size_t len)48 _pkt_copy(void *__sized_by(len)src, void *__sized_by(len)dst, size_t len)
49 {
50 	if (__probable(IS_P2ALIGNED(src, 8) && IS_P2ALIGNED(dst, 8))) {
51 		switch (len) {
52 		case 20:        /* standard IPv4 header */
53 			sk_copy64_20(src, dst);
54 			return;
55 
56 		case 40:        /* IPv6 header */
57 			sk_copy64_40(src, dst);
58 			return;
59 
60 		default:
61 			if (IS_P2ALIGNED(len, 64)) {
62 				sk_copy64_64x(src, dst, len);
63 				return;
64 			} else if (IS_P2ALIGNED(len, 32)) {
65 				sk_copy64_32x(src, dst, len);
66 				return;
67 			} else if (IS_P2ALIGNED(len, 8)) {
68 				sk_copy64_8x(src, dst, len);
69 				return;
70 			} else if (IS_P2ALIGNED(len, 4)) {
71 				sk_copy64_4x(src, dst, len);
72 				return;
73 			}
74 			break;
75 		}
76 	}
77 	bcopy(src, dst, len);
78 }
79 
80 /*
81  * This routine is used for copying data across two kernel packets.
82  * Can also optionally compute 16-bit partial inet checksum as the
83  * data is copied.
84  * This routine is used by flowswitch while copying packet from vp
85  * adapter pool to packet in native netif pool and vice-a-versa.
86  *
87  * start/stuff is relative to soff, within [0, len], such that
88  * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
89  */
90 void
pkt_copy_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)91 pkt_copy_from_pkt(const enum txrx t, kern_packet_t dph, const uint16_t doff,
92     kern_packet_t sph, const uint16_t soff, const uint32_t len,
93     const boolean_t copysum, const uint16_t start, const uint16_t stuff,
94     const boolean_t invert)
95 {
96 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
97 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
98 	uint32_t partial;
99 	uint16_t csum = 0;
100 	uint8_t *sbaddr, *dbaddr;
101 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
102 
103 	static_assert(sizeof(csum) == sizeof(uint16_t));
104 
105 	/* get buffer address from packet */
106 	MD_BUFLET_ADDR_ABS(spkt, sbaddr);
107 	ASSERT(sbaddr != NULL);
108 	sbaddr += soff;
109 	MD_BUFLET_ADDR_ABS(dpkt, dbaddr);
110 	ASSERT(dbaddr != NULL);
111 	dbaddr += doff;
112 	VERIFY((doff + len) <= PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
113 
114 	switch (t) {
115 	case NR_RX:
116 		dpkt->pkt_csum_flags = 0;
117 		if (__probable(do_sum)) {
118 			/*
119 			 * Use pkt_copy() to copy the portion up to the
120 			 * point where we need to start the checksum, and
121 			 * copy the remainder, checksumming as we go.
122 			 */
123 			if (__probable(start != 0)) {
124 				_pkt_copy(sbaddr, dbaddr, start);
125 			}
126 			partial = __packet_copy_and_sum((sbaddr + start),
127 			    (dbaddr + start), (len - start), 0);
128 			csum = __packet_fold_sum(partial);
129 
130 			__packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
131 			    start, csum, FALSE);
132 		} else {
133 			_pkt_copy(sbaddr, dbaddr, len);
134 			dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
135 			dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
136 			dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
137 		}
138 
139 #if COPY_LOG
140 		SK_DF(SK_VERB_COPY | SK_VERB_RX,
141 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
142 		    sk_proc_name(current_proc()), sk_proc_pid(current_proc()),
143 		    len, (copysum ? (len - start) : 0), csum, start);
144 		SK_DF(SK_VERB_COPY | SK_VERB_RX,
145 		    "   pkt  %p doff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
146 		    SK_KVA(dpkt), doff, dpkt->pkt_csum_flags,
147 		    (uint32_t)dpkt->pkt_csum_rx_start_off,
148 		    (uint32_t)dpkt->pkt_csum_rx_value);
149 #endif
150 		break;
151 
152 	case NR_TX:
153 		if (copysum) {
154 			/*
155 			 * Use pkt_copy() to copy the portion up to the
156 			 * point where we need to start the checksum, and
157 			 * copy the remainder, checksumming as we go.
158 			 */
159 			if (__probable(start != 0)) {
160 				_pkt_copy(sbaddr, dbaddr, start);
161 			}
162 			partial = __packet_copy_and_sum((sbaddr + start),
163 			    (dbaddr + start), (len - start), 0);
164 			csum = __packet_fold_sum_final(partial);
165 
166 			/* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
167 			if (csum == 0 && invert) {
168 				csum = 0xffff;
169 			}
170 
171 			/* Insert checksum into packet */
172 			ASSERT(stuff <= (len - sizeof(csum)));
173 			if (IS_P2ALIGNED(dbaddr + stuff, sizeof(csum))) {
174 				*(uint16_t *)(uintptr_t)(dbaddr + stuff) = csum;
175 			} else {
176 				bcopy((void *)&csum, dbaddr + stuff,
177 				    sizeof(csum));
178 			}
179 		} else {
180 			_pkt_copy(sbaddr, dbaddr, len);
181 		}
182 		dpkt->pkt_csum_flags = spkt->pkt_csum_flags &
183 		    (PACKET_CSUM_TSO_FLAGS | PACKET_TX_CSUM_OFFLOAD_FLAGS);
184 		dpkt->pkt_csum_tx_start_off = 0;
185 		dpkt->pkt_csum_tx_stuff_off = 0;
186 
187 #if COPY_LOG
188 		SK_DF(SK_VERB_COPY | SK_VERB_TX,
189 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u, flags %u",
190 		    sk_proc_name(current_proc()),
191 		    sk_proc_pid(current_proc()), len,
192 		    (copysum ? (len - start) : 0), csum, start, dpkt->pkt_csum_flags);
193 #endif
194 		break;
195 
196 	default:
197 		VERIFY(0);
198 		/* NOTREACHED */
199 		__builtin_unreachable();
200 	}
201 	METADATA_ADJUST_LEN(dpkt, len, doff);
202 
203 #if COPY_LOG
204 	SK_DF(SK_VERB_COPY | SK_VERB_DUMP, "%s(%d) %s %s",
205 	    sk_proc_name(current_proc()), sk_proc_pid(current_proc()),
206 	    (t == NR_RX) ? "RX" : "TX",
207 	    sk_dump("buf", dbaddr, len, 128));
208 #endif
209 }
210 
211 /*
212  * NOTE: soff is the offset within the packet
213  * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
214  * caller is responsible for further reducing it to 16-bit if needed,
215  * as well as to perform the final 1's complement on it.
216  */
217 uint32_t static inline
_pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * __sized_by (len)dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)218 _pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *__sized_by(len)dbaddr,
219     uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
220 {
221 	uint8_t odd = 0;
222 	uint8_t *sbaddr = NULL;
223 	uint32_t sum = initial_sum, partial;
224 	uint32_t len0 = len;
225 	boolean_t needs_swap, started_on_odd = FALSE;
226 	uint16_t sbcnt, off0 = soff;
227 	uint32_t clen, sboff, sblen;
228 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
229 	kern_buflet_t sbuf = NULL, sbufp = NULL;
230 
231 	sbcnt = __packet_get_buflet_count(sph);
232 
233 	if (odd_start) {
234 		started_on_odd = *odd_start;
235 	}
236 
237 	/* fastpath (copy+sum, single buflet, even aligned, even length) */
238 	if (do_csum && sbcnt == 1 && len != 0) {
239 		PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
240 		ASSERT(sbuf != NULL);
241 		sboff = __buflet_get_data_offset(sbuf);
242 		sblen = __buflet_get_data_length(sbuf);
243 		ASSERT(sboff <= soff);
244 		ASSERT(soff < sboff + sblen);
245 		sblen -= (soff - sboff);
246 		sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
247 
248 		clen = (uint16_t)MIN(len, sblen);
249 
250 		if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
251 			sum = __packet_copy_and_sum(sbaddr, dbaddr, clen, sum);
252 			return __packet_fold_sum(sum);
253 		}
254 
255 		sbaddr = NULL;
256 		sbuf = sbufp = NULL;
257 	}
258 
259 	while (len != 0) {
260 		PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
261 		if (__improbable(sbuf == NULL)) {
262 			panic("%s: bad packet, %p [off %d, len %d]",
263 			    __func__, SK_KVA(spkt), off0, len0);
264 			/* NOTREACHED */
265 			__builtin_unreachable();
266 		}
267 		sbufp = sbuf;
268 		sboff = __buflet_get_data_offset(sbuf);
269 		sblen = __buflet_get_data_length(sbuf);
270 		ASSERT((sboff <= soff) && (soff < sboff + sblen));
271 		sblen -= (soff - sboff);
272 		sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
273 		soff = 0;
274 		clen = (uint16_t)MIN(len, sblen);
275 		if (__probable(do_csum)) {
276 			partial = 0;
277 			if (__improbable((uintptr_t)sbaddr & 1)) {
278 				/* Align on word boundary */
279 				started_on_odd = !started_on_odd;
280 #if BYTE_ORDER == LITTLE_ENDIAN
281 				partial = (uint8_t)*sbaddr << 8;
282 #else /* BYTE_ORDER != LITTLE_ENDIAN */
283 				partial = (uint8_t)*sbaddr;
284 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
285 				/*
286 				 * -fbounds-safety: *dbaddr++ = *sbaddr++ fails
287 				 * to compile. But the following works. Also,
288 				 * grouping dbaddr and len updates led to higher
289 				 * throughput performance, compared to doing
290 				 * dbaddr++; sbaddr++; len -= 1; in that order.
291 				 */
292 				*dbaddr = *sbaddr;
293 				dbaddr++;
294 				sblen -= 1;
295 				clen -= 1;
296 				len -= 1;
297 				sbaddr++;
298 			}
299 			needs_swap = started_on_odd;
300 
301 			odd = clen & 1u;
302 			clen -= odd;
303 
304 			if (clen != 0) {
305 				partial = __packet_copy_and_sum(sbaddr, dbaddr,
306 				    clen, partial);
307 			}
308 
309 			if (__improbable(partial & 0xc0000000)) {
310 				if (needs_swap) {
311 					partial = (partial << 8) +
312 					    (partial >> 24);
313 				}
314 				sum += (partial >> 16);
315 				sum += (partial & 0xffff);
316 				partial = 0;
317 			}
318 		} else {
319 			_pkt_copy(sbaddr, dbaddr, clen);
320 		}
321 
322 		dbaddr += clen;
323 		/*
324 		 * Updating len before updating sbaddr led to faster throughput
325 		 * than doing: dbaddr += clen; sbaddr += clen;
326 		 * len -= clen + odd;
327 		 */
328 		len -= clen;
329 		sblen -= clen;
330 		sbaddr += clen;
331 
332 		if (__probable(do_csum)) {
333 			if (odd != 0) {
334 #if BYTE_ORDER == LITTLE_ENDIAN
335 				partial += (uint8_t)*sbaddr;
336 #else /* BYTE_ORDER != LITTLE_ENDIAN */
337 				partial += (uint8_t)*sbaddr << 8;
338 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
339 				ASSERT(odd == 1);
340 				/*
341 				 * -fbounds-safety: Not written as `*dbaddr++ = *sbaddr++`
342 				 * to avoid compiler bug (rdar://98749526). This
343 				 * bug is only fixed when using `bound-checks-new-checks`.
344 				 */
345 				*dbaddr = *sbaddr++;
346 				dbaddr++;
347 				len -= 1;
348 				sblen -= 1;
349 				started_on_odd = !started_on_odd;
350 			}
351 
352 			if (needs_swap) {
353 				partial = (partial << 8) + (partial >> 24);
354 			}
355 			sum += (partial >> 16) + (partial & 0xffff);
356 			/*
357 			 * Reduce sum to allow potential byte swap
358 			 * in the next iteration without carry.
359 			 */
360 			sum = (sum >> 16) + (sum & 0xffff);
361 		}
362 		ASSERT(sblen == 0 || len == 0);
363 	}
364 
365 	if (odd_start) {
366 		*odd_start = started_on_odd;
367 	}
368 
369 	if (__probable(do_csum)) {
370 		/* Final fold (reduce 32-bit to 16-bit) */
371 		sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
372 		sum = (sum >> 16) + (sum & 0xffff);
373 	}
374 	return sum;
375 }
376 
377 /*
378  * NOTE: Caller of this function is responsible to adjust the length and offset
379  * of the first buflet of the destination packet if (doff != 0),
380  * i.e. additional data is being prependend to the packet.
381  * It should also finalize the packet.
382  * To simplify & optimize the routine, we have also assumed that soff & doff
383  * will lie within the first buffer, which is true for the current use cases
384  * where, doff is the offset of the checksum field in the TCP/IP header and
385  * soff is the L3 offset.
386  * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
387  * caller is responsible for further reducing it to 16-bit if needed,
388  * as well as to perform the final 1's complement on it.
389  */
390 static inline boolean_t
_pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint32_t len,uint32_t * csum_partial,boolean_t do_csum)391 _pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
392     uint16_t doff, uint32_t len, uint32_t *csum_partial, boolean_t do_csum)
393 {
394 	uint8_t odd = 0;
395 	uint32_t sum = 0, partial;
396 	boolean_t needs_swap, started_on_odd = FALSE;
397 	uint8_t *sbaddr = NULL, *dbaddr = NULL;
398 	uint16_t sbcnt, dbcnt;
399 	uint32_t clen, dlen0, sboff, sblen, dlim;
400 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
401 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
402 	kern_buflet_t sbuf = NULL, sbufp = NULL, dbuf = NULL, dbufp = NULL;
403 
404 	ASSERT(csum_partial != NULL || !do_csum);
405 	sbcnt = __packet_get_buflet_count(sph);
406 	dbcnt = __packet_get_buflet_count(dph);
407 
408 	while (len != 0) {
409 		ASSERT(sbaddr == NULL || dbaddr == NULL);
410 		if (sbaddr == NULL) {
411 			PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
412 			if (__improbable(sbuf == NULL)) {
413 				break;
414 			}
415 			sbufp = sbuf;
416 			sblen = __buflet_get_data_length(sbuf);
417 			sboff = __buflet_get_data_offset(sbuf);
418 			ASSERT(soff >= sboff);
419 			ASSERT(sboff + sblen > soff);
420 			sblen -= (soff - sboff);
421 			sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
422 			soff = 0;
423 		}
424 
425 		if (dbaddr == NULL) {
426 			if (dbufp != NULL) {
427 				__buflet_set_data_length(dbufp, dlen0);
428 			}
429 
430 			PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
431 			if (__improbable(dbuf == NULL)) {
432 				break;
433 			}
434 			dbufp = dbuf;
435 			dlim = __buflet_get_data_limit(dbuf);
436 			ASSERT(dlim > doff);
437 			dlim -= doff;
438 			if (doff != 0) {
439 				VERIFY(__buflet_set_data_offset(dbuf, doff) == 0);
440 			}
441 			dbaddr = (uint8_t *)__buflet_get_data_address(dbuf) + doff;
442 			dlen0 = dlim;
443 			doff = 0;
444 		}
445 
446 		clen = MIN(len, sblen);
447 		clen = MIN(clen, dlim);
448 
449 		if (__probable(do_csum)) {
450 			partial = 0;
451 			if (__improbable((uintptr_t)sbaddr & 1)) {
452 				/* Align on word boundary */
453 				started_on_odd = !started_on_odd;
454 #if BYTE_ORDER == LITTLE_ENDIAN
455 				partial = (uint8_t)*sbaddr << 8;
456 #else /* BYTE_ORDER != LITTLE_ENDIAN */
457 				partial = (uint8_t)*sbaddr;
458 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
459 				*dbaddr++ = *sbaddr++;
460 				clen -= 1;
461 				dlim -= 1;
462 				len -= 1;
463 			}
464 			needs_swap = started_on_odd;
465 
466 			odd = clen & 1u;
467 			clen -= odd;
468 
469 			if (clen != 0) {
470 				partial = __packet_copy_and_sum(sbaddr, dbaddr,
471 				    clen, partial);
472 			}
473 
474 			if (__improbable(partial & 0xc0000000)) {
475 				if (needs_swap) {
476 					partial = (partial << 8) +
477 					    (partial >> 24);
478 				}
479 				sum += (partial >> 16);
480 				sum += (partial & 0xffff);
481 				partial = 0;
482 			}
483 		} else {
484 			_pkt_copy(sbaddr, dbaddr, clen);
485 		}
486 		sbaddr += clen;
487 		dbaddr += clen;
488 
489 		if (__probable(do_csum)) {
490 			if (odd != 0) {
491 #if BYTE_ORDER == LITTLE_ENDIAN
492 				partial += (uint8_t)*sbaddr;
493 #else /* BYTE_ORDER != LITTLE_ENDIAN */
494 				partial += (uint8_t)*sbaddr << 8;
495 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
496 				*dbaddr++ = *sbaddr++;
497 				started_on_odd = !started_on_odd;
498 			}
499 
500 			if (needs_swap) {
501 				partial = (partial << 8) + (partial >> 24);
502 			}
503 			sum += (partial >> 16) + (partial & 0xffff);
504 			/*
505 			 * Reduce sum to allow potential byte swap
506 			 * in the next iteration without carry.
507 			 */
508 			sum = (sum >> 16) + (sum & 0xffff);
509 		}
510 
511 		sblen -= clen + odd;
512 		dlim -= clen + odd;
513 		len -= clen + odd;
514 
515 		if (sblen == 0) {
516 			sbaddr = NULL;
517 		}
518 
519 		if (dlim == 0) {
520 			dbaddr = NULL;
521 		}
522 	}
523 
524 	if (__probable(dbuf != NULL)) {
525 		__buflet_set_data_length(dbuf, (dlen0 - dlim));
526 	}
527 	if (__probable(do_csum)) {
528 		/* Final fold (reduce 32-bit to 16-bit) */
529 		sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
530 		sum = (sum >> 16) + (sum & 0xffff);
531 		*csum_partial = (uint32_t)sum;
532 	}
533 	return len == 0;
534 }
535 
536 uint32_t
pkt_sum(kern_packet_t sph,uint16_t soff,uint16_t len)537 pkt_sum(kern_packet_t sph, uint16_t soff, uint16_t len)
538 {
539 	uint8_t odd = 0;
540 	uint32_t sum = 0, partial;
541 	boolean_t needs_swap, started_on_odd = FALSE;
542 	uint8_t *sbaddr = NULL;
543 	uint16_t sbcnt;
544 	uint32_t clen, sblen, sboff;
545 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
546 	kern_buflet_t sbuf = NULL, sbufp = NULL;
547 
548 	sbcnt = __packet_get_buflet_count(sph);
549 
550 	/* fastpath (single buflet, even aligned, even length) */
551 	if (sbcnt == 1 && len != 0) {
552 		PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
553 		ASSERT(sbuf != NULL);
554 		sblen = __buflet_get_data_length(sbuf);
555 		sboff = __buflet_get_data_offset(sbuf);
556 		ASSERT(soff >= sboff);
557 		ASSERT(sboff + sblen > soff);
558 		sblen -= (soff - sboff);
559 		sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
560 
561 		clen = MIN(len, sblen);
562 
563 		if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
564 			sum = __packet_cksum(sbaddr, clen, 0);
565 			return __packet_fold_sum(sum);
566 		}
567 
568 		sbaddr = NULL;
569 		sbuf = sbufp = NULL;
570 	}
571 
572 	/* slowpath */
573 	while (len != 0) {
574 		ASSERT(sbaddr == NULL);
575 		if (sbaddr == NULL) {
576 			PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
577 			if (__improbable(sbuf == NULL)) {
578 				break;
579 			}
580 			sbufp = sbuf;
581 			sblen = __buflet_get_data_length(sbuf);
582 			sboff = __buflet_get_data_offset(sbuf);
583 			ASSERT(soff >= sboff);
584 			ASSERT(sboff + sblen > soff);
585 			sblen -= (soff - sboff);
586 			sbaddr = (uint8_t *)__buflet_get_data_address(sbuf) + soff;
587 			soff = 0;
588 		}
589 
590 		clen = MIN(len, sblen);
591 
592 		partial = 0;
593 		if (__improbable((uintptr_t)sbaddr & 1)) {
594 			/* Align on word boundary */
595 			started_on_odd = !started_on_odd;
596 #if BYTE_ORDER == LITTLE_ENDIAN
597 			partial = (uint8_t)*sbaddr << 8;
598 #else /* BYTE_ORDER != LITTLE_ENDIAN */
599 			partial = (uint8_t)*sbaddr;
600 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
601 			clen -= 1;
602 			len -= 1;
603 		}
604 		needs_swap = started_on_odd;
605 
606 		odd = clen & 1u;
607 		clen -= odd;
608 
609 		if (clen != 0) {
610 			partial = __packet_cksum(sbaddr,
611 			    clen, partial);
612 		}
613 
614 		if (__improbable(partial & 0xc0000000)) {
615 			if (needs_swap) {
616 				partial = (partial << 8) +
617 				    (partial >> 24);
618 			}
619 			sum += (partial >> 16);
620 			sum += (partial & 0xffff);
621 			partial = 0;
622 		}
623 		sbaddr += clen;
624 
625 		if (odd != 0) {
626 #if BYTE_ORDER == LITTLE_ENDIAN
627 			partial += (uint8_t)*sbaddr;
628 #else /* BYTE_ORDER != LITTLE_ENDIAN */
629 			partial += (uint8_t)*sbaddr << 8;
630 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
631 			started_on_odd = !started_on_odd;
632 		}
633 
634 		if (needs_swap) {
635 			partial = (partial << 8) + (partial >> 24);
636 		}
637 		sum += (partial >> 16) + (partial & 0xffff);
638 		/*
639 		 * Reduce sum to allow potential byte swap
640 		 * in the next iteration without carry.
641 		 */
642 		sum = (sum >> 16) + (sum & 0xffff);
643 
644 		sblen -= clen + odd;
645 		len -= clen + odd;
646 
647 		if (sblen == 0) {
648 			sbaddr = NULL;
649 		}
650 	}
651 
652 	/* Final fold (reduce 32-bit to 16-bit) */
653 	sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
654 	sum = (sum >> 16) + (sum & 0xffff);
655 	return (uint32_t)sum;
656 }
657 
658 
659 /*
660  * This is a multi-buflet variant of pkt_copy_from_pkt().
661  *
662  * start/stuff is relative to soff, within [0, len], such that
663  * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
664  */
665 void
pkt_copy_multi_buflet_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)666 pkt_copy_multi_buflet_from_pkt(const enum txrx t, kern_packet_t dph,
667     const uint16_t doff, kern_packet_t sph, const uint16_t soff,
668     const uint32_t len, const boolean_t copysum, const uint16_t start,
669     const uint16_t stuff, const boolean_t invert)
670 {
671 	boolean_t rc;
672 	uint32_t partial;
673 	uint16_t csum = 0;
674 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
675 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
676 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
677 
678 	VERIFY((doff + len) <= (PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp) *
679 	    __packet_get_buflet_count(dph)));
680 
681 	switch (t) {
682 	case NR_RX:
683 		dpkt->pkt_csum_flags = 0;
684 		if (__probable(do_sum)) {
685 			/*
686 			 * copy the portion up to the point where we need to
687 			 * start the checksum, and copy the remainder,
688 			 * checksumming as we go.
689 			 */
690 			if (__probable(start != 0)) {
691 				rc = _pkt_copypkt_sum(sph, soff, dph, doff,
692 				    start, NULL, FALSE);
693 				ASSERT(rc);
694 			}
695 			_pkt_copypkt_sum(sph, (soff + start), dph,
696 			    (doff + start), (len - start), &partial, TRUE);
697 			csum = __packet_fold_sum(partial);
698 			__packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
699 			    start, csum, FALSE);
700 			METADATA_ADJUST_LEN(dpkt, start, doff);
701 		} else {
702 			rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
703 			    FALSE);
704 			ASSERT(rc);
705 			dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
706 			dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
707 			dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
708 		}
709 		break;
710 
711 	case NR_TX:
712 		if (copysum) {
713 			uint8_t *baddr;
714 			/*
715 			 * copy the portion up to the point where we need to
716 			 * start the checksum, and copy the remainder,
717 			 * checksumming as we go.
718 			 */
719 			if (__probable(start != 0)) {
720 				rc = _pkt_copypkt_sum(sph, soff, dph, doff,
721 				    start, NULL, FALSE);
722 				ASSERT(rc);
723 			}
724 			rc = _pkt_copypkt_sum(sph, (soff + start), dph,
725 			    (doff + start), (len - start), &partial, TRUE);
726 			ASSERT(rc);
727 			csum = __packet_fold_sum_final(partial);
728 
729 			/* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
730 			if (csum == 0 && invert) {
731 				csum = 0xffff;
732 			}
733 
734 			/*
735 			 * Insert checksum into packet.
736 			 * Here we assume that checksum will be in the
737 			 * first buffer.
738 			 */
739 			ASSERT((stuff + doff + sizeof(csum)) <=
740 			    PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
741 			ASSERT(stuff <= (len - sizeof(csum)));
742 
743 			/* get first buflet buffer address from packet */
744 			MD_BUFLET_ADDR_ABS(dpkt, baddr);
745 			ASSERT(baddr != NULL);
746 			baddr += doff;
747 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
748 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
749 			} else {
750 				bcopy((void *)&csum, baddr + stuff,
751 				    sizeof(csum));
752 			}
753 			METADATA_ADJUST_LEN(dpkt, start, doff);
754 		} else {
755 			rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
756 			    FALSE);
757 			ASSERT(rc);
758 		}
759 		dpkt->pkt_csum_flags = spkt->pkt_csum_flags &
760 		    (PACKET_CSUM_TSO_FLAGS | PACKET_TX_CSUM_OFFLOAD_FLAGS);
761 		dpkt->pkt_csum_tx_start_off = 0;
762 		dpkt->pkt_csum_tx_stuff_off = 0;
763 
764 #if COPY_LOG
765 		SK_DF(SK_VERB_COPY | SK_VERB_TX,
766 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u, flags %u",
767 		    sk_proc_name(current_proc()), sk_proc_pid(current_proc()),
768 		    len, (copysum ? (len - start) : 0), csum, start,
769 		    dpkt->pkt_csum_flags);
770 #endif
771 		break;
772 
773 	default:
774 		VERIFY(0);
775 		/* NOTREACHED */
776 		__builtin_unreachable();
777 	}
778 }
779 
780 static inline uint32_t
_convert_mbuf_csum_flags(uint32_t mbuf_flags)781 _convert_mbuf_csum_flags(uint32_t mbuf_flags)
782 {
783 	uint32_t pkt_flags = 0;
784 
785 	if (mbuf_flags & CSUM_TCP) {
786 		pkt_flags |= PACKET_CSUM_TCP;
787 	}
788 	if (mbuf_flags & CSUM_TCPIPV6) {
789 		pkt_flags |= PACKET_CSUM_TCPIPV6;
790 	}
791 	if (mbuf_flags & CSUM_UDP) {
792 		pkt_flags |= PACKET_CSUM_UDP;
793 	}
794 	if (mbuf_flags & CSUM_UDPIPV6) {
795 		pkt_flags |= PACKET_CSUM_UDPIPV6;
796 	}
797 	if (mbuf_flags & CSUM_IP) {
798 		pkt_flags |= PACKET_CSUM_IP;
799 	}
800 	if (mbuf_flags & CSUM_ZERO_INVERT) {
801 		pkt_flags |= PACKET_CSUM_ZERO_INVERT;
802 	}
803 
804 	return pkt_flags;
805 }
806 
807 /*
808  * This routine is used for copying an mbuf which originated in the host
809  * stack destined to a native skywalk interface (NR_TX), as well as for
810  * mbufs originating on compat network interfaces (NR_RX).
811  *
812  * start/stuff is relative to moff, within [0, len], such that
813  * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
814  */
815 void
pkt_copy_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)816 pkt_copy_from_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
817     struct mbuf *m, const uint16_t moff, const uint32_t len,
818     const boolean_t copysum, const uint16_t start)
819 {
820 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
821 	struct m_tag *ts_tag = NULL;
822 	uint32_t partial;
823 	uint16_t csum = 0;
824 	uint16_t vlan = 0;
825 	uint8_t *baddr;
826 
827 	static_assert(sizeof(csum) == sizeof(uint16_t));
828 
829 	/* get buffer address from packet */
830 	MD_BUFLET_ADDR_ABS(pkt, baddr);
831 	ASSERT(baddr != NULL);
832 	baddr += poff;
833 	VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
834 
835 	switch (t) {
836 	case NR_RX:
837 		pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
838 		pkt->pkt_csum_rx_start_off = 0;
839 		pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
840 		pkt->pkt_svc_class = m_get_service_class(m);
841 		if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
842 		    != CSUM_RX_FULL_FLAGS) && copysum)) {
843 			/*
844 			 * Use m_copydata() to copy the portion up to the
845 			 * point where we need to start the checksum, and
846 			 * copy the remainder, checksumming as we go.
847 			 */
848 			if (start != 0) {
849 				m_copydata(m, moff, start, baddr);
850 			}
851 			partial = m_copydata_sum(m, start, (len - start),
852 			    (baddr + start), 0, NULL);
853 			csum = __packet_fold_sum(partial);
854 
855 			__packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
856 			    start, csum, FALSE);
857 		} else {
858 			m_copydata(m, moff, len, baddr);
859 		}
860 
861 		if (mbuf_get_vlan_tag(m, &vlan) == 0) {
862 			__packet_set_vlan_tag(ph, vlan);
863 		}
864 
865 #if COPY_LOG
866 		SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_RX, current_proc(),
867 		    "RX len %u, copy+sum %u (csum 0x%04x), start %u",
868 		    len, (copysum ? (len - start) : 0), csum, start);
869 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
870 		    "   mbuf %p csumf/rxstart/rxval 0x%x/%u/0x%04x",
871 		    SK_KVA(m), m->m_pkthdr.csum_flags,
872 		    (uint32_t)m->m_pkthdr.csum_rx_start,
873 		    (uint32_t)m->m_pkthdr.csum_rx_val);
874 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
875 		    "   pkt  %p poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
876 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
877 		    (uint32_t)pkt->pkt_csum_rx_start_off,
878 		    (uint32_t)pkt->pkt_csum_rx_value);
879 #endif
880 		break;
881 
882 	case NR_TX:
883 		if (copysum) {
884 			uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
885 			/*
886 			 * Use m_copydata() to copy the portion up to the
887 			 * point where we need to start the checksum, and
888 			 * copy the remainder, checksumming as we go.
889 			 */
890 			if (start != 0) {
891 				m_copydata(m, moff, start, baddr);
892 			}
893 			partial = m_copydata_sum(m, start, (len - start),
894 			    (baddr + start), 0, NULL);
895 			csum = __packet_fold_sum_final(partial);
896 
897 			/*
898 			 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
899 			 * ideally we'd only test for CSUM_ZERO_INVERT
900 			 * here, but catch cases where the originator
901 			 * did not set it for UDP.
902 			 */
903 			if (csum == 0 && (m->m_pkthdr.csum_flags &
904 			    (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
905 				csum = 0xffff;
906 			}
907 
908 			/* Insert checksum into packet */
909 			ASSERT(stuff <= (len - sizeof(csum)));
910 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
911 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
912 			} else {
913 				bcopy((void *)&csum, baddr + stuff,
914 				    sizeof(csum));
915 			}
916 		} else {
917 			m_copydata(m, moff, len, baddr);
918 		}
919 		pkt->pkt_csum_flags = 0;
920 		pkt->pkt_csum_tx_start_off = 0;
921 		pkt->pkt_csum_tx_stuff_off = 0;
922 
923 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
924 			pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV4;
925 			pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
926 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
927 		}
928 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
929 			pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV6;
930 			pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
931 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
932 		}
933 		if (!copysum) {
934 			pkt->pkt_csum_flags |= _convert_mbuf_csum_flags(m->m_pkthdr.csum_flags);
935 		}
936 
937 		/* translate mbuf metadata */
938 		pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
939 		pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
940 		pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
941 		pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
942 		switch (m->m_pkthdr.pkt_proto) {
943 		case IPPROTO_QUIC:
944 			pkt->pkt_flow_ip_proto = IPPROTO_UDP;
945 			pkt->pkt_transport_protocol = IPPROTO_QUIC;
946 			break;
947 
948 		default:
949 			pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
950 			pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
951 			break;
952 		}
953 		(void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
954 		pkt->pkt_svc_class = m_get_service_class(m);
955 		pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
956 		pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
957 		if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
958 			pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
959 		}
960 		if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_LPW) != 0) {
961 			pkt->pkt_pflags |= __PKT_F_LPW;
962 		}
963 		if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) != 0) {
964 			pkt->pkt_pflags |= PKT_F_L4S;
965 		}
966 		necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
967 		pkt->pkt_policy_id =
968 		    (uint32_t)necp_get_policy_id_from_packet(m);
969 		pkt->pkt_skip_policy_id =
970 		    (uint32_t)necp_get_skip_policy_id_from_packet(m);
971 
972 		if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
973 			if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
974 				__packet_set_tx_completion_data(ph,
975 				    m->m_pkthdr.drv_tx_compl_arg,
976 				    m->m_pkthdr.drv_tx_compl_data);
977 			}
978 			pkt->pkt_tx_compl_context =
979 			    m->m_pkthdr.pkt_compl_context;
980 			pkt->pkt_tx_compl_callbacks =
981 			    m->m_pkthdr.pkt_compl_callbacks;
982 			/*
983 			 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
984 			 * mbuf can no longer trigger a completion callback.
985 			 * callback will be invoked when the kernel packet is
986 			 * completed.
987 			 */
988 			m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
989 
990 			m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
991 		}
992 
993 		ts_tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM);
994 		if (ts_tag != NULL) {
995 			__packet_set_tx_timestamp(ph, *(uint64_t *)(ts_tag->m_tag_data));
996 		}
997 
998 		if (mbuf_get_vlan_tag(m, &vlan) == 0) {
999 			__packet_set_vlan_tag(ph, vlan);
1000 		}
1001 
1002 #if COPY_LOG
1003 		SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_TX, current_proc(),
1004 		    "TX len %u, copy+sum %u (csum 0x%04x), start %u",
1005 		    len, (copysum ? (len - start) : 0), csum, start);
1006 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1007 		    "   mbuf %p csumf/txstart/txstuff 0x%x/%u/%u",
1008 		    SK_KVA(m), m->m_pkthdr.csum_flags,
1009 		    (uint32_t)m->m_pkthdr.csum_tx_start,
1010 		    (uint32_t)m->m_pkthdr.csum_tx_stuff);
1011 #endif
1012 		break;
1013 
1014 	default:
1015 		VERIFY(0);
1016 		/* NOTREACHED */
1017 		__builtin_unreachable();
1018 	}
1019 	METADATA_ADJUST_LEN(pkt, len, poff);
1020 
1021 	if (m->m_flags & M_BCAST) {
1022 		__packet_set_link_broadcast(ph);
1023 	} else if (m->m_flags & M_MCAST) {
1024 		__packet_set_link_multicast(ph);
1025 	}
1026 
1027 #if COPY_LOG
1028 	SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, current_proc(), "%s %s",
1029 	    (t == NR_RX) ? "RX" : "TX", sk_dump("buf", baddr, len, 128));
1030 #endif
1031 }
1032 
1033 /*
1034  * Like m_copydata_sum(), but works on a destination kernel packet.
1035  */
1036 static inline uint32_t
m_copypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint32_t len,boolean_t do_cscum)1037 m_copypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
1038     uint32_t len, boolean_t do_cscum)
1039 {
1040 	boolean_t needs_swap, started_on_odd = FALSE;
1041 	int off0 = soff;
1042 	uint32_t len0 = len;
1043 	struct mbuf *m0 = m;
1044 	uint32_t sum = 0, partial;
1045 	unsigned count0, count, odd, mlen_copied;
1046 	uint8_t *sbaddr = NULL, *dbaddr = NULL;
1047 	uint16_t dbcnt = __packet_get_buflet_count(dph);
1048 	uint32_t dlim, dlen0;
1049 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
1050 	kern_buflet_t dbuf = NULL, dbufp = NULL;
1051 
1052 	while (soff > 0) {
1053 		if (__improbable(m == NULL)) {
1054 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
1055 			    __func__, m0, off0, len0);
1056 			/* NOTREACHED */
1057 			__builtin_unreachable();
1058 		}
1059 		if (soff < m->m_len) {
1060 			break;
1061 		}
1062 		soff -= m->m_len;
1063 		m = m->m_next;
1064 	}
1065 
1066 	if (__improbable(m == NULL)) {
1067 		panic("%s: invalid mbuf chain %p [off %d, len %d]",
1068 		    __func__, m0, off0, len0);
1069 		/* NOTREACHED */
1070 		__builtin_unreachable();
1071 	}
1072 
1073 	sbaddr = mtod(m, uint8_t *) + soff;
1074 	count = m->m_len - soff;
1075 	mlen_copied = 0;
1076 
1077 	while (len != 0) {
1078 		ASSERT(sbaddr == NULL || dbaddr == NULL);
1079 		if (sbaddr == NULL) {
1080 			soff = 0;
1081 			m = m->m_next;
1082 			if (__improbable(m == NULL)) {
1083 				panic("%s: invalid mbuf chain %p [off %d, "
1084 				    "len %d]", __func__, m0, off0, len0);
1085 				/* NOTREACHED */
1086 				__builtin_unreachable();
1087 			}
1088 			sbaddr = mtod(m, uint8_t *);
1089 			count = m->m_len;
1090 			mlen_copied = 0;
1091 		}
1092 
1093 		if (__improbable(count == 0)) {
1094 			sbaddr = NULL;
1095 			continue;
1096 		}
1097 
1098 		if (dbaddr == NULL) {
1099 			if (dbufp != NULL) {
1100 				__buflet_set_data_length(dbufp, dlen0);
1101 			}
1102 
1103 			PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
1104 			if (__improbable(dbuf == NULL)) {
1105 				panic("%s: mbuf too large %p [off %d, "
1106 				    "len %d]", __func__, m0, off0, len0);
1107 				/* NOTREACHED */
1108 				__builtin_unreachable();
1109 			}
1110 			dbufp = dbuf;
1111 			dlim = __buflet_get_data_limit(dbuf) - doff;
1112 			dbaddr = (uint8_t *)__buflet_get_data_address(dbuf) + doff;
1113 			dlen0 = dlim;
1114 			doff = 0;
1115 		}
1116 
1117 		count = MIN(count, (unsigned)len);
1118 		count0 = count = MIN(count, dlim);
1119 
1120 		if (!do_cscum) {
1121 			_pkt_copy(sbaddr, dbaddr, count);
1122 			sbaddr += count;
1123 			dbaddr += count;
1124 			goto skip_csum;
1125 		}
1126 
1127 		partial = 0;
1128 		if ((uintptr_t)sbaddr & 1) {
1129 			/* Align on word boundary */
1130 			started_on_odd = !started_on_odd;
1131 #if BYTE_ORDER == LITTLE_ENDIAN
1132 			partial = *sbaddr << 8;
1133 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1134 			partial = *sbaddr;
1135 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1136 			*dbaddr++ = *sbaddr++;
1137 			count -= 1;
1138 		}
1139 
1140 		needs_swap = started_on_odd;
1141 		odd = count & 1u;
1142 		count -= odd;
1143 
1144 		if (count) {
1145 			partial = __packet_copy_and_sum(sbaddr,
1146 			    dbaddr, count, partial);
1147 			sbaddr += count;
1148 			dbaddr += count;
1149 			if (__improbable(partial & 0xc0000000)) {
1150 				if (needs_swap) {
1151 					partial = (partial << 8) +
1152 					    (partial >> 24);
1153 				}
1154 				sum += (partial >> 16);
1155 				sum += (partial & 0xffff);
1156 				partial = 0;
1157 			}
1158 		}
1159 
1160 		if (odd) {
1161 #if BYTE_ORDER == LITTLE_ENDIAN
1162 			partial += *sbaddr;
1163 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1164 			partial += *sbaddr << 8;
1165 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1166 			*dbaddr++ = *sbaddr++;
1167 			started_on_odd = !started_on_odd;
1168 		}
1169 
1170 		if (needs_swap) {
1171 			partial = (partial << 8) + (partial >> 24);
1172 		}
1173 		sum += (partial >> 16) + (partial & 0xffff);
1174 		/*
1175 		 * Reduce sum to allow potential byte swap
1176 		 * in the next iteration without carry.
1177 		 */
1178 		sum = (sum >> 16) + (sum & 0xffff);
1179 
1180 skip_csum:
1181 		dlim -= count0;
1182 		len -= count0;
1183 		mlen_copied += count0;
1184 
1185 		if (dlim == 0) {
1186 			dbaddr = NULL;
1187 		}
1188 
1189 		count = m->m_len - soff - mlen_copied;
1190 		if (count == 0) {
1191 			sbaddr = NULL;
1192 		}
1193 	}
1194 
1195 	ASSERT(len == 0);
1196 	ASSERT(dbuf != NULL);
1197 	__buflet_set_data_length(dbuf, (dlen0 - dlim));
1198 
1199 	if (!do_cscum) {
1200 		return 0;
1201 	}
1202 
1203 	/* Final fold (reduce 32-bit to 16-bit) */
1204 	sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
1205 	sum = (sum >> 16) + (sum & 0xffff);
1206 	return sum;
1207 }
1208 
1209 /*
1210  * This is a multi-buflet variant of pkt_copy_from_mbuf().
1211  *
1212  * start/stuff is relative to moff, within [0, len], such that
1213  * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
1214  */
1215 void
pkt_copy_multi_buflet_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1216 pkt_copy_multi_buflet_from_mbuf(const enum txrx t, kern_packet_t ph,
1217     const uint16_t poff, struct mbuf *m, const uint16_t moff,
1218     const uint32_t len, const boolean_t copysum, const uint16_t start)
1219 {
1220 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1221 	struct m_tag *ts_tag = NULL;
1222 	uint32_t partial;
1223 	uint16_t csum = 0;
1224 	uint16_t vlan = 0;
1225 	uint8_t *baddr;
1226 
1227 	static_assert(sizeof(csum) == sizeof(uint16_t));
1228 
1229 	/* get buffer address from packet */
1230 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1231 	ASSERT(baddr != NULL);
1232 	baddr += poff;
1233 	VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp) *
1234 	    __packet_get_buflet_count(ph)));
1235 
1236 	switch (t) {
1237 	case NR_RX:
1238 		pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
1239 		pkt->pkt_csum_rx_start_off = 0;
1240 		pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
1241 		pkt->pkt_svc_class = m_get_service_class(m);
1242 		if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
1243 		    != CSUM_RX_FULL_FLAGS) && copysum)) {
1244 			/*
1245 			 * Use m_copydata() to copy the portion up to the
1246 			 * point where we need to start the checksum, and
1247 			 * copy the remainder, checksumming as we go.
1248 			 */
1249 			if (start != 0) {
1250 				m_copydata(m, moff, start, baddr);
1251 			}
1252 			partial = m_copypkt_sum(m, start, ph, (poff + start),
1253 			    (len - start), TRUE);
1254 			csum = __packet_fold_sum(partial);
1255 			__packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
1256 			    start, csum, FALSE);
1257 			METADATA_ADJUST_LEN(pkt, start, poff);
1258 		} else {
1259 			(void) m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1260 		}
1261 
1262 		if (mbuf_get_vlan_tag(m, &vlan) == 0) {
1263 			__packet_set_vlan_tag(ph, vlan);
1264 		}
1265 
1266 #if COPY_LOG
1267 		SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_RX, current_proc(),
1268 		    "RX len %u, copy+sum %u (csum 0x%04x), start %u",
1269 		    len, (copysum ? (len - start) : 0), csum, start);
1270 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1271 		    "   mbuf %p csumf/rxstart/rxval 0x%x/%u/0x%04x",
1272 		    SK_KVA(m), m->m_pkthdr.csum_flags,
1273 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1274 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1275 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1276 		    "   pkt  %p poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1277 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1278 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1279 		    (uint32_t)pkt->pkt_csum_rx_value);
1280 #endif
1281 		break;
1282 
1283 	case NR_TX:
1284 		if (copysum) {
1285 			uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
1286 			/*
1287 			 * Use m_copydata() to copy the portion up to the
1288 			 * point where we need to start the checksum, and
1289 			 * copy the remainder, checksumming as we go.
1290 			 */
1291 			if (start != 0) {
1292 				m_copydata(m, moff, start, baddr);
1293 			}
1294 			partial = m_copypkt_sum(m, start, ph, (poff + start),
1295 			    (len - start), TRUE);
1296 			csum = __packet_fold_sum_final(partial);
1297 
1298 			/*
1299 			 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
1300 			 * ideally we'd only test for CSUM_ZERO_INVERT
1301 			 * here, but catch cases where the originator
1302 			 * did not set it for UDP.
1303 			 */
1304 			if (csum == 0 && (m->m_pkthdr.csum_flags &
1305 			    (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
1306 				csum = 0xffff;
1307 			}
1308 
1309 			/* Insert checksum into packet */
1310 			ASSERT(stuff <= (len - sizeof(csum)));
1311 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
1312 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
1313 			} else {
1314 				bcopy((void *)&csum, baddr + stuff,
1315 				    sizeof(csum));
1316 			}
1317 			METADATA_ADJUST_LEN(pkt, start, poff);
1318 		} else {
1319 			m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1320 		}
1321 		pkt->pkt_csum_flags = 0;
1322 		pkt->pkt_csum_tx_start_off = 0;
1323 		pkt->pkt_csum_tx_stuff_off = 0;
1324 
1325 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
1326 			pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV4;
1327 			pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
1328 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
1329 		}
1330 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
1331 			pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV6;
1332 			pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
1333 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
1334 		}
1335 		if (!copysum) {
1336 			pkt->pkt_csum_flags |= _convert_mbuf_csum_flags(m->m_pkthdr.csum_flags);
1337 		}
1338 
1339 		/* translate mbuf metadata */
1340 		pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
1341 		pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
1342 		pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
1343 		pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
1344 		switch (m->m_pkthdr.pkt_proto) {
1345 		case IPPROTO_QUIC:
1346 			pkt->pkt_flow_ip_proto = IPPROTO_UDP;
1347 			pkt->pkt_transport_protocol = IPPROTO_QUIC;
1348 			break;
1349 
1350 		default:
1351 			pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
1352 			pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
1353 			break;
1354 		}
1355 		(void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
1356 		pkt->pkt_svc_class = m_get_service_class(m);
1357 		pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
1358 		pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
1359 		if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
1360 			pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
1361 		}
1362 		if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_LPW) != 0) {
1363 			pkt->pkt_pflags |= __PKT_F_LPW;
1364 		}
1365 		if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) != 0) {
1366 			pkt->pkt_pflags |= PKT_F_L4S;
1367 		}
1368 		necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
1369 		pkt->pkt_policy_id =
1370 		    (uint32_t)necp_get_policy_id_from_packet(m);
1371 		pkt->pkt_skip_policy_id =
1372 		    (uint32_t)necp_get_skip_policy_id_from_packet(m);
1373 
1374 		if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
1375 			if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
1376 				__packet_set_tx_completion_data(ph,
1377 				    m->m_pkthdr.drv_tx_compl_arg,
1378 				    m->m_pkthdr.drv_tx_compl_data);
1379 			}
1380 			pkt->pkt_tx_compl_context =
1381 			    m->m_pkthdr.pkt_compl_context;
1382 			pkt->pkt_tx_compl_callbacks =
1383 			    m->m_pkthdr.pkt_compl_callbacks;
1384 			/*
1385 			 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
1386 			 * mbuf can no longer trigger a completion callback.
1387 			 * callback will be invoked when the kernel packet is
1388 			 * completed.
1389 			 */
1390 			m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
1391 
1392 			m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
1393 		}
1394 
1395 		ts_tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM);
1396 		if (ts_tag != NULL) {
1397 			__packet_set_tx_timestamp(ph, *(uint64_t *)(ts_tag->m_tag_data));
1398 		}
1399 
1400 		if (mbuf_get_vlan_tag(m, &vlan) == 0) {
1401 			__packet_set_vlan_tag(ph, vlan);
1402 		}
1403 
1404 #if COPY_LOG
1405 		SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_TX, current_proc(),
1406 		    "TX len %u, copy+sum %u (csum 0x%04x), start %u",
1407 		    len, (copysum ? (len - start) : 0), csum, start);
1408 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1409 		    "   mbuf %p csumf/txstart/txstuff 0x%x/%u/%u",
1410 		    SK_KVA(m), m->m_pkthdr.csum_flags,
1411 		    (uint32_t)m->m_pkthdr.csum_tx_start,
1412 		    (uint32_t)m->m_pkthdr.csum_tx_stuff);
1413 #endif
1414 		break;
1415 
1416 	default:
1417 		VERIFY(0);
1418 		/* NOTREACHED */
1419 		__builtin_unreachable();
1420 	}
1421 
1422 	if (m->m_flags & M_BCAST) {
1423 		__packet_set_link_broadcast(ph);
1424 	} else if (m->m_flags & M_MCAST) {
1425 		__packet_set_link_multicast(ph);
1426 	}
1427 
1428 #if COPY_LOG
1429 	SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, current_proc(), "%s %s",
1430 	    (t == NR_RX) ? "RX" : "TX", sk_dump("buf", baddr, len, 128));
1431 #endif
1432 }
1433 
1434 static inline uint32_t
_convert_pkt_csum_flags(uint32_t pkt_flags)1435 _convert_pkt_csum_flags(uint32_t pkt_flags)
1436 {
1437 	uint32_t mbuf_flags = 0;
1438 	if (pkt_flags & PACKET_CSUM_TCP) {
1439 		mbuf_flags |= CSUM_TCP;
1440 	}
1441 	if (pkt_flags & PACKET_CSUM_TCPIPV6) {
1442 		mbuf_flags |= CSUM_TCPIPV6;
1443 	}
1444 	if (pkt_flags & PACKET_CSUM_UDP) {
1445 		mbuf_flags |= CSUM_UDP;
1446 	}
1447 	if (pkt_flags & PACKET_CSUM_UDPIPV6) {
1448 		mbuf_flags |= CSUM_UDPIPV6;
1449 	}
1450 	if (pkt_flags & PACKET_CSUM_IP) {
1451 		mbuf_flags |= CSUM_IP;
1452 	}
1453 	if (pkt_flags & PACKET_CSUM_ZERO_INVERT) {
1454 		mbuf_flags |= CSUM_ZERO_INVERT;
1455 	}
1456 	if (pkt_flags & PACKET_CSUM_TSO_IPV4) {
1457 		mbuf_flags |= CSUM_TSO_IPV4;
1458 	}
1459 	if (pkt_flags & PACKET_CSUM_TSO_IPV6) {
1460 		mbuf_flags |= CSUM_TSO_IPV6;
1461 	}
1462 
1463 	return mbuf_flags;
1464 }
1465 
1466 /*
1467  * This routine is used for copying from a packet originating from a native
1468  * skywalk interface to an mbuf destined for the host legacy stack (NR_RX),
1469  * as well as for mbufs destined for the compat network interfaces (NR_TX).
1470  *
1471  * We do adjust the length to reflect the total data span.
1472  *
1473  * This routine supports copying into an mbuf chain for RX but not TX.
1474  *
1475  * start/stuff is relative to poff, within [0, len], such that
1476  * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1477  */
1478 void
pkt_copy_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1479 pkt_copy_to_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
1480     struct mbuf *m, const uint16_t moff, const uint32_t len,
1481     const boolean_t copysum, const uint16_t start)
1482 {
1483 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1484 	struct mbuf *curr_m;
1485 	uint32_t partial = 0;
1486 	uint32_t remaining_len = len, copied_len = 0;
1487 	uint16_t csum = 0;
1488 	uint16_t vlan = 0;
1489 	uint8_t *baddr;
1490 	uint8_t *dp;
1491 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1492 
1493 	ASSERT(len >= start);
1494 	static_assert(sizeof(csum) == sizeof(uint16_t));
1495 
1496 	/* get buffer address from packet */
1497 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1498 	ASSERT(baddr != NULL);
1499 	baddr += poff;
1500 	VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
1501 
1502 	ASSERT((m->m_flags & M_PKTHDR));
1503 	m->m_data += moff;
1504 
1505 	switch (t) {
1506 	case NR_RX:
1507 		m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1508 
1509 		/*
1510 		 * Use pkt_copy() to copy the portion up to the
1511 		 * point where we need to start the checksum, and
1512 		 * copy the remainder, checksumming as we go.
1513 		 */
1514 		if (__probable(do_sum && start != 0)) {
1515 			ASSERT(M_TRAILINGSPACE(m) >= start);
1516 			ASSERT(m->m_len == 0);
1517 			dp = (uint8_t *)m_mtod_current(m);
1518 			_pkt_copy(baddr, dp, start);
1519 			remaining_len -= start;
1520 			copied_len += start;
1521 			m->m_len += start;
1522 			m->m_pkthdr.len += start;
1523 		}
1524 		curr_m = m;
1525 		while (curr_m != NULL && remaining_len != 0) {
1526 			uint32_t tmp_len = MIN(remaining_len,
1527 			    (uint32_t)M_TRAILINGSPACE(curr_m));
1528 			dp = (uint8_t *)m_mtod_end(curr_m);
1529 			if (__probable(do_sum)) {
1530 				partial = __packet_copy_and_sum((baddr + copied_len),
1531 				    dp, tmp_len, partial);
1532 			} else {
1533 				_pkt_copy((baddr + copied_len), dp, tmp_len);
1534 			}
1535 
1536 			curr_m->m_len += tmp_len;
1537 			m->m_pkthdr.len += tmp_len;
1538 			copied_len += tmp_len;
1539 			remaining_len -= tmp_len;
1540 			curr_m = curr_m->m_next;
1541 		}
1542 		ASSERT(remaining_len == 0);
1543 
1544 		if (__probable(do_sum)) {
1545 			csum = __packet_fold_sum(partial);
1546 
1547 			m->m_pkthdr.csum_flags |=
1548 			    (CSUM_DATA_VALID | CSUM_PARTIAL);
1549 			m->m_pkthdr.csum_rx_start = start;
1550 			m->m_pkthdr.csum_rx_val = csum;
1551 		} else {
1552 			m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1553 			m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1554 			static_assert(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1555 			m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1556 			if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1557 				m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1558 			}
1559 		}
1560 
1561 		/* translate packet metadata */
1562 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1563 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1564 
1565 		m->m_pkthdr.rx_seg_cnt = pkt->pkt_seg_cnt;
1566 		if (__improbable((pkt->pkt_link_flags & PKT_LINKF_BCAST) != 0)) {
1567 			m->m_flags |= M_BCAST;
1568 		}
1569 		if (__improbable((pkt->pkt_link_flags & PKT_LINKF_MCAST) != 0)) {
1570 			m->m_flags |= M_MCAST;
1571 		}
1572 
1573 		if (__packet_get_vlan_tag(ph, &vlan) == 0) {
1574 			mbuf_set_vlan_tag(m, vlan);
1575 		}
1576 
1577 #if COPY_LOG
1578 		SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_RX, current_proc(),
1579 		    "RX len %u, copy+sum %u (csum 0x%04x), start %u",
1580 		    len, (copysum ? (len - start) : 0), csum, start);
1581 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1582 		    "   mbuf %p moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1583 		    SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1584 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1585 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1586 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1587 		    "   pkt  %p poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1588 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1589 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1590 		    (uint32_t)pkt->pkt_csum_rx_value);
1591 #endif
1592 		break;
1593 
1594 	case NR_TX:
1595 		dp = (uint8_t *)m_mtod_current(m);
1596 		ASSERT(m->m_next == NULL);
1597 
1598 		VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1599 		    (uint32_t)mbuf_maxlen(m));
1600 		m->m_len += len;
1601 		m->m_pkthdr.len += len;
1602 		VERIFY(m->m_len == m->m_pkthdr.len &&
1603 		    (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1604 
1605 		if (copysum) {
1606 			uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1607 			/*
1608 			 * Use pkt_copy() to copy the portion up to the
1609 			 * point where we need to start the checksum, and
1610 			 * copy the remainder, checksumming as we go.
1611 			 */
1612 			if (__probable(start != 0)) {
1613 				_pkt_copy(baddr, dp, start);
1614 			}
1615 			partial = __packet_copy_and_sum((baddr + start),
1616 			    (dp + start), (len - start), 0);
1617 			csum = __packet_fold_sum_final(partial);
1618 
1619 			/* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1620 			if (csum == 0 &&
1621 			    (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1622 				csum = 0xffff;
1623 			}
1624 
1625 			/* Insert checksum into packet */
1626 			ASSERT(stuff <= (len - sizeof(csum)));
1627 			if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1628 				*(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1629 			} else {
1630 				bcopy((void *)&csum, dp + stuff, sizeof(csum));
1631 			}
1632 		} else {
1633 			_pkt_copy(baddr, dp, len);
1634 		}
1635 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1636 		m->m_pkthdr.csum_tx_start = 0;
1637 		m->m_pkthdr.csum_tx_stuff = 0;
1638 		m->m_pkthdr.csum_flags |= _convert_pkt_csum_flags(pkt->pkt_csum_flags);
1639 
1640 		/* translate packet metadata */
1641 		m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1642 		m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1643 		m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1644 		m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1645 		m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1646 		m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1647 		m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1648 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1649 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1650 		m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1651 		m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1652 		if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1653 			m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1654 		}
1655 		if ((pkt->pkt_pflags & __PKT_F_LPW) != 0) {
1656 			m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_LPW;
1657 		}
1658 		if ((pkt->pkt_pflags & PKT_F_L4S) != 0) {
1659 			m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_L4S;
1660 		}
1661 		if ((pkt->pkt_pflags & PKT_F_OPT_TX_TIMESTAMP) != 0) {
1662 			struct m_tag *tag = NULL;
1663 			tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM,
1664 			    sizeof(uint64_t), M_WAITOK, m);
1665 			if (tag != NULL) {
1666 				m_tag_prepend(m, tag);
1667 				*(uint64_t *)tag->m_tag_data = pkt->pkt_com_opt->__po_pkt_tx_time;
1668 			}
1669 		}
1670 		m->m_pkthdr.necp_mtag.necp_policy_id = pkt->pkt_policy_id;
1671 		m->m_pkthdr.necp_mtag.necp_skip_policy_id = pkt->pkt_skip_policy_id;
1672 
1673 		if (__packet_get_vlan_tag(ph, &vlan) == 0) {
1674 			mbuf_set_vlan_tag(m, vlan);
1675 		}
1676 
1677 #if COPY_LOG
1678 		SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_TX, current_proc(),
1679 		    "TX len %u, copy+sum %u (csum 0x%04x), start %u",
1680 		    len, (copysum ? (len - start) : 0), csum, start);
1681 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1682 		    "   pkt  %p poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1683 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1684 		    (uint32_t)pkt->pkt_csum_tx_start_off,
1685 		    (uint32_t)pkt->pkt_csum_tx_stuff_off);
1686 #endif
1687 		break;
1688 
1689 	default:
1690 		VERIFY(0);
1691 		/* NOTREACHED */
1692 		__builtin_unreachable();
1693 	}
1694 
1695 	if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1696 		m->m_flags |= M_BCAST;
1697 	} else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1698 		m->m_flags |= M_MCAST;
1699 	}
1700 #if COPY_LOG
1701 	SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, current_proc(), "%s %s",
1702 	    (t == NR_RX) ? "RX" : "TX",
1703 	    sk_dump("buf", (uint8_t *)dp, m->m_len, 128));
1704 #endif
1705 }
1706 
1707 /*
1708  * This is a multi-buflet variant of pkt_copy_to_mbuf().
1709  * NOTE: poff is the offset within the packet.
1710  *
1711  * This routine supports copying into an mbuf chain for RX but not TX.
1712  *
1713  * start/stuff is relative to poff, within [0, len], such that
1714  * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1715  */
1716 void
pkt_copy_multi_buflet_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1717 pkt_copy_multi_buflet_to_mbuf(const enum txrx t, kern_packet_t ph,
1718     const uint16_t poff, struct mbuf *m, const uint16_t moff,
1719     const uint32_t len, const boolean_t copysum, const uint16_t start)
1720 {
1721 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1722 	struct mbuf *curr_m;
1723 	uint32_t partial = 0;
1724 	uint32_t remaining_len = len, copied_len = 0;
1725 	uint16_t csum = 0;
1726 	uint16_t vlan = 0;
1727 	uint8_t *baddr;
1728 	uint8_t *dp;
1729 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1730 
1731 	ASSERT(len >= start);
1732 	static_assert(sizeof(csum) == sizeof(uint16_t));
1733 
1734 	/* get buffer address from packet */
1735 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1736 	ASSERT(baddr != NULL);
1737 	baddr += poff;
1738 
1739 	ASSERT((m->m_flags & M_PKTHDR));
1740 	m->m_data += moff;
1741 
1742 	switch (t) {
1743 	case NR_RX:
1744 		m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1745 		if (__probable(do_sum && start != 0)) {
1746 			ASSERT(M_TRAILINGSPACE(m) >= start);
1747 			ASSERT(m->m_len == 0);
1748 			dp = (uint8_t *)m_mtod_current(m);
1749 			_pkt_copy(baddr, dp, start);
1750 			remaining_len -= start;
1751 			copied_len += start;
1752 			m->m_len += start;
1753 			m->m_pkthdr.len += start;
1754 		}
1755 		curr_m = m;
1756 		while (curr_m != NULL && remaining_len != 0) {
1757 			uint32_t tmp_len = MIN(remaining_len,
1758 			    (uint32_t)M_TRAILINGSPACE(curr_m));
1759 			uint16_t soff = poff + (uint16_t)copied_len;
1760 			dp = (uint8_t *)m_mtod_end(curr_m);
1761 
1762 			if (__probable(do_sum)) {
1763 				partial = _pkt_copyaddr_sum(ph, soff,
1764 				    dp, tmp_len, TRUE, partial, NULL);
1765 			} else {
1766 				pkt_copyaddr_sum(ph, soff,
1767 				    dp, tmp_len, FALSE, 0, NULL);
1768 			}
1769 
1770 			curr_m->m_len += tmp_len;
1771 			m->m_pkthdr.len += tmp_len;
1772 			copied_len += tmp_len;
1773 			remaining_len -= tmp_len;
1774 			curr_m = curr_m->m_next;
1775 		}
1776 		ASSERT(remaining_len == 0);
1777 
1778 		if (__probable(do_sum)) {
1779 			csum = __packet_fold_sum(partial);
1780 
1781 			m->m_pkthdr.csum_flags |=
1782 			    (CSUM_DATA_VALID | CSUM_PARTIAL);
1783 			m->m_pkthdr.csum_rx_start = start;
1784 			m->m_pkthdr.csum_rx_val = csum;
1785 		} else {
1786 			m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1787 			m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1788 			static_assert(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1789 			m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1790 			if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1791 				m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1792 			}
1793 		}
1794 
1795 		m->m_pkthdr.necp_mtag.necp_policy_id = pkt->pkt_policy_id;
1796 		m->m_pkthdr.necp_mtag.necp_skip_policy_id = pkt->pkt_skip_policy_id;
1797 
1798 		/* translate packet metadata */
1799 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1800 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1801 
1802 		m->m_pkthdr.rx_seg_cnt = pkt->pkt_seg_cnt;
1803 
1804 		if (__packet_get_vlan_tag(ph, &vlan) == 0) {
1805 			mbuf_set_vlan_tag(m, vlan);
1806 		}
1807 		if (__improbable((pkt->pkt_link_flags & PKT_LINKF_BCAST) != 0)) {
1808 			m->m_flags |= M_BCAST;
1809 		}
1810 		if (__improbable((pkt->pkt_link_flags & PKT_LINKF_MCAST) != 0)) {
1811 			m->m_flags |= M_MCAST;
1812 		}
1813 
1814 #if COPY_LOG
1815 		SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_RX, current_proc(),
1816 		    "RX len %u, copy+sum %u (csum 0x%04x), start %u",
1817 		    len, (copysum ? (len - start) : 0), csum, start);
1818 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1819 		    "   mbuf %p moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1820 		    SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1821 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1822 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1823 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1824 		    "   pkt  %p poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1825 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1826 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1827 		    (uint32_t)pkt->pkt_csum_rx_value);
1828 #endif
1829 		break;
1830 	case NR_TX:
1831 		ASSERT(len <= M16KCLBYTES);
1832 		dp = (uint8_t *)m_mtod_current(m);
1833 		ASSERT(m->m_next == NULL);
1834 		VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1835 		    (uint32_t)mbuf_maxlen(m));
1836 		m->m_len += len;
1837 		m->m_pkthdr.len += len;
1838 		VERIFY(m->m_len == m->m_pkthdr.len &&
1839 		    (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1840 		if (copysum) {
1841 			uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1842 			/*
1843 			 * Use pkt_copy() to copy the portion up to the
1844 			 * point where we need to start the checksum, and
1845 			 * copy the remainder, checksumming as we go.
1846 			 */
1847 			if (__probable(start != 0)) {
1848 				_pkt_copy(baddr, dp, start);
1849 			}
1850 			partial = _pkt_copyaddr_sum(ph, (poff + start),
1851 			    (dp + start), (len - start), TRUE, 0, NULL);
1852 			csum = __packet_fold_sum_final(partial);
1853 
1854 			/* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1855 			if (csum == 0 &&
1856 			    (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1857 				csum = 0xffff;
1858 			}
1859 
1860 			/* Insert checksum into packet */
1861 			ASSERT(stuff <= (len - sizeof(csum)));
1862 			if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1863 				*(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1864 			} else {
1865 				bcopy((void *)&csum, dp + stuff, sizeof(csum));
1866 			}
1867 		} else {
1868 			(void) _pkt_copyaddr_sum(ph, poff, dp, len, FALSE, 0, NULL);
1869 		}
1870 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1871 		m->m_pkthdr.csum_tx_start = 0;
1872 		m->m_pkthdr.csum_tx_stuff = 0;
1873 		m->m_pkthdr.csum_flags |= _convert_pkt_csum_flags(pkt->pkt_csum_flags);
1874 
1875 		/* translate packet metadata */
1876 		m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1877 		m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1878 		m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1879 		m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1880 		m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1881 		m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1882 		m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1883 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1884 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1885 		m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1886 		m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1887 		if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1888 			m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1889 		}
1890 		if ((pkt->pkt_pflags & __PKT_F_LPW) != 0) {
1891 			m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_LPW;
1892 		}
1893 		if ((pkt->pkt_pflags & PKT_F_L4S) != 0) {
1894 			m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_L4S;
1895 		}
1896 		if ((pkt->pkt_pflags & PKT_F_OPT_TX_TIMESTAMP) != 0) {
1897 			struct m_tag *tag = NULL;
1898 			tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM,
1899 			    sizeof(uint64_t), M_WAITOK, m);
1900 			if (tag != NULL) {
1901 				m_tag_prepend(m, tag);
1902 				*(uint64_t *)tag->m_tag_data = pkt->pkt_com_opt->__po_pkt_tx_time;
1903 			}
1904 		}
1905 
1906 		if (__packet_get_vlan_tag(ph, &vlan) == 0) {
1907 			mbuf_set_vlan_tag(m, vlan);
1908 		}
1909 
1910 #if COPY_LOG
1911 		SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_TX, current_proc(),
1912 		    "TX len %u, copy+sum %u (csum 0x%04x), start %u",
1913 		    len, (copysum ? (len - start) : 0), csum, start);
1914 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1915 		    "   pkt  %p poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1916 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1917 		    (uint32_t)pkt->pkt_csum_tx_start_off,
1918 		    (uint32_t)pkt->pkt_csum_tx_stuff_off);
1919 #endif
1920 		break;
1921 
1922 	default:
1923 		VERIFY(0);
1924 		/* NOTREACHED */
1925 		__builtin_unreachable();
1926 	}
1927 
1928 	if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1929 		m->m_flags |= M_BCAST;
1930 	} else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1931 		m->m_flags |= M_MCAST;
1932 	}
1933 #if COPY_LOG
1934 	SK_PDF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, current_proc(), "%s %s",
1935 	    (t == NR_RX) ? "RX" : "TX",
1936 	    sk_dump("buf", (uint8_t *)dp, m->m_len, 128));
1937 #endif
1938 }
1939 
1940 /*
1941  * Like m_copydata(), but computes 16-bit sum as the data is copied.
1942  * Caller can provide an initial sum to be folded into the computed
1943  * sum.  The accumulated partial sum (32-bit) is returned to caller;
1944  * caller is responsible for further reducing it to 16-bit if needed,
1945  * as well as to perform the final 1's complement on it.
1946  */
1947 uint32_t
m_copydata_sum(struct mbuf * m,int off,int len,void * __sized_by (len)vp,uint32_t initial_sum,boolean_t * odd_start)1948 m_copydata_sum(struct mbuf *m, int off, int len, void *__sized_by(len)vp, uint32_t initial_sum,
1949     boolean_t *odd_start)
1950 {
1951 	boolean_t needs_swap, started_on_odd = FALSE;
1952 	int off0 = off, len0 = len;
1953 	struct mbuf *m0 = m;
1954 	uint64_t sum, partial;
1955 	unsigned count, odd;
1956 	char *cp = vp;
1957 
1958 	if (__improbable(off < 0 || len < 0)) {
1959 		panic("%s: invalid offset %d or len %d", __func__, off, len);
1960 		/* NOTREACHED */
1961 		__builtin_unreachable();
1962 	}
1963 
1964 	while (off > 0) {
1965 		if (__improbable(m == NULL)) {
1966 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
1967 			    __func__, m0, off0, len0);
1968 			/* NOTREACHED */
1969 			__builtin_unreachable();
1970 		}
1971 		if (off < m->m_len) {
1972 			break;
1973 		}
1974 		off -= m->m_len;
1975 		m = m->m_next;
1976 	}
1977 
1978 	if (odd_start) {
1979 		started_on_odd = *odd_start;
1980 	}
1981 	sum = initial_sum;
1982 
1983 	for (; len0 > 0; m = m->m_next) {
1984 		uint8_t *datap;
1985 
1986 		if (__improbable(m == NULL)) {
1987 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
1988 			    __func__, m0, off0, len);
1989 			/* NOTREACHED */
1990 			__builtin_unreachable();
1991 		}
1992 
1993 		datap = mtod(m, uint8_t *) + off;
1994 		count = m->m_len;
1995 
1996 		if (__improbable(count == 0)) {
1997 			continue;
1998 		}
1999 
2000 		count = MIN(count - off, (unsigned)len0);
2001 		partial = 0;
2002 
2003 		if ((uintptr_t)datap & 1) {
2004 			/* Align on word boundary */
2005 			started_on_odd = !started_on_odd;
2006 #if BYTE_ORDER == LITTLE_ENDIAN
2007 			partial = *datap << 8;
2008 #else /* BYTE_ORDER != LITTLE_ENDIAN */
2009 			partial = *datap;
2010 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
2011 			*cp++ = *datap++;
2012 			count -= 1;
2013 			len0 -= 1;
2014 		}
2015 
2016 		needs_swap = started_on_odd;
2017 		odd = count & 1u;
2018 		count -= odd;
2019 
2020 		if (count) {
2021 			partial = __packet_copy_and_sum(datap,
2022 			    cp, count, (uint32_t)partial);
2023 			datap += count;
2024 			cp += count;
2025 			len0 -= count;
2026 			if (__improbable((partial & (3ULL << 62)) != 0)) {
2027 				if (needs_swap) {
2028 					partial = (partial << 8) +
2029 					    (partial >> 56);
2030 				}
2031 				sum += (partial >> 32);
2032 				sum += (partial & 0xffffffff);
2033 				partial = 0;
2034 			}
2035 		}
2036 
2037 		if (odd) {
2038 #if BYTE_ORDER == LITTLE_ENDIAN
2039 			partial += *datap;
2040 #else /* BYTE_ORDER != LITTLE_ENDIAN */
2041 			partial += *datap << 8;
2042 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
2043 			*cp++ = *datap++;
2044 			len0 -= 1;
2045 			started_on_odd = !started_on_odd;
2046 		}
2047 		off = 0;
2048 
2049 		if (needs_swap) {
2050 			partial = (partial << 8) + (partial >> 24);
2051 		}
2052 		sum += (partial >> 32) + (partial & 0xffffffff);
2053 		/*
2054 		 * Reduce sum to allow potential byte swap
2055 		 * in the next iteration without carry.
2056 		 */
2057 		sum = (sum >> 32) + (sum & 0xffffffff);
2058 	}
2059 
2060 	if (odd_start) {
2061 		*odd_start = started_on_odd;
2062 	}
2063 
2064 	/* Final fold (reduce 64-bit to 32-bit) */
2065 	sum = (sum >> 32) + (sum & 0xffffffff); /* 33-bit */
2066 	sum = (sum >> 16) + (sum & 0xffff);     /* 17-bit + carry */
2067 
2068 	/* return 32-bit partial sum to caller */
2069 	return (uint32_t)sum;
2070 }
2071 
2072 #if DEBUG || DEVELOPMENT
2073 #define TRAILERS_MAX    16              /* max trailing bytes */
2074 #define TRAILERS_REGEN  (64 * 1024)     /* regeneration threshold */
2075 static uint8_t tb[TRAILERS_MAX];        /* random trailing bytes */
2076 static uint32_t regen = TRAILERS_REGEN; /* regeneration counter */
2077 
2078 uint32_t
pkt_add_trailers(kern_packet_t ph,const uint32_t len,const uint16_t start)2079 pkt_add_trailers(kern_packet_t ph, const uint32_t len, const uint16_t start)
2080 {
2081 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
2082 	uint32_t extra;
2083 	uint8_t *baddr;
2084 
2085 	/* get buffer address from packet */
2086 	MD_BUFLET_ADDR_ABS(pkt, baddr);
2087 	ASSERT(baddr != NULL);
2088 	ASSERT(len <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
2089 
2090 	extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
2091 	if (extra == 0 || extra > sizeof(tb) ||
2092 	    (len + extra) > PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp)) {
2093 		return 0;
2094 	}
2095 
2096 	/* generate random bytes once per TRAILERS_REGEN packets (approx.) */
2097 	if (regen++ == TRAILERS_REGEN) {
2098 		read_frandom(&tb[0], sizeof(tb));
2099 		regen = 0;
2100 	}
2101 
2102 	bcopy(&tb[0], (baddr + len), extra);
2103 
2104 	/* recompute partial sum (also to exercise related logic) */
2105 	pkt->pkt_csum_flags |= PACKET_CSUM_PARTIAL;
2106 	pkt->pkt_csum_rx_value = (uint16_t)__packet_cksum((baddr + start),
2107 	    ((len + extra) - start), 0);
2108 	pkt->pkt_csum_rx_start_off = start;
2109 
2110 	return extra;
2111 }
2112 
2113 uint32_t
pkt_add_trailers_mbuf(struct mbuf * m,const uint16_t start)2114 pkt_add_trailers_mbuf(struct mbuf *m, const uint16_t start)
2115 {
2116 	uint32_t extra;
2117 
2118 	extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
2119 	if (extra == 0 || extra > sizeof(tb)) {
2120 		return 0;
2121 	}
2122 
2123 	if (mbuf_copyback(m, m_pktlen(m), extra, &tb[0], M_NOWAIT) != 0) {
2124 		return 0;
2125 	}
2126 
2127 	/* generate random bytes once per TRAILERS_REGEN packets (approx.) */
2128 	if (regen++ == TRAILERS_REGEN) {
2129 		read_frandom(&tb[0], sizeof(tb));
2130 		regen = 0;
2131 	}
2132 
2133 	/* recompute partial sum (also to exercise related logic) */
2134 	m->m_pkthdr.csum_rx_val = m_sum16(m, start, (m_pktlen(m) - start));
2135 	m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
2136 	m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
2137 	m->m_pkthdr.csum_rx_start = start;
2138 
2139 	return extra;
2140 }
2141 #endif /* DEBUG || DEVELOPMENT */
2142 
2143 void
pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint16_t len,uint32_t * partial,boolean_t do_csum)2144 pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
2145     uint16_t doff, uint16_t len, uint32_t *partial, boolean_t do_csum)
2146 {
2147 	VERIFY(_pkt_copypkt_sum(sph, soff, dph, doff, len, partial, do_csum));
2148 }
2149 
2150 uint32_t
pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * __sized_by (len)dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)2151 pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *__sized_by(len)dbaddr,
2152     uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
2153 {
2154 	return _pkt_copyaddr_sum(sph, soff, dbaddr, len, do_csum, initial_sum, odd_start);
2155 }
2156 
2157 uint32_t
pkt_mcopypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint16_t len,boolean_t do_cscum)2158 pkt_mcopypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
2159     uint16_t len, boolean_t do_cscum)
2160 {
2161 	return m_copypkt_sum(m, soff, dph, doff, len, do_cscum);
2162 }
2163 
2164 void
pkt_copy(void * __sized_by (len)src,void * __sized_by (len)dst,size_t len)2165 pkt_copy(void *__sized_by(len)src, void *__sized_by(len)dst, size_t len)
2166 {
2167 	return _pkt_copy(src, dst, len);
2168 }
2169