xref: /xnu-8792.81.2/bsd/skywalk/packet/packet_copy.c (revision 19c3b8c28c31cb8130e034cfb5df6bf9ba342d90)
1 /*
2  * Copyright (c) 2017-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 #include <machine/endian.h>
31 #include <net/necp.h>
32 
33 #if (DEVELOPMENT || DEBUG)
34 SYSCTL_NODE(_kern_skywalk, OID_AUTO, packet,
35     CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Skywalk packet");
36 int pkt_trailers = 0; /* for testing trailing bytes */
37 SYSCTL_INT(_kern_skywalk_packet, OID_AUTO, trailers,
38     CTLFLAG_RW | CTLFLAG_LOCKED, &pkt_trailers, 0, "");
39 #endif /* !DEVELOPMENT && !DEBUG */
40 
41 
42 __attribute__((always_inline))
43 static inline void
_pkt_copy(void * src,void * dst,size_t len)44 _pkt_copy(void *src, void *dst, size_t len)
45 {
46 	if (__probable(IS_P2ALIGNED(src, 8) && IS_P2ALIGNED(dst, 8))) {
47 		switch (len) {
48 		case 20:        /* standard IPv4 header */
49 			sk_copy64_20(src, dst);
50 			return;
51 
52 		case 40:        /* IPv6 header */
53 			sk_copy64_40(src, dst);
54 			return;
55 
56 		default:
57 			if (IS_P2ALIGNED(len, 64)) {
58 				sk_copy64_64x(src, dst, len);
59 				return;
60 			} else if (IS_P2ALIGNED(len, 32)) {
61 				sk_copy64_32x(src, dst, len);
62 				return;
63 			} else if (IS_P2ALIGNED(len, 8)) {
64 				sk_copy64_8x(src, dst, len);
65 				return;
66 			} else if (IS_P2ALIGNED(len, 4)) {
67 				sk_copy64_4x(src, dst, len);
68 				return;
69 			}
70 			break;
71 		}
72 	}
73 	bcopy(src, dst, len);
74 }
75 
76 /*
77  * This routine is used for copying data across two kernel packets.
78  * Can also optionally compute 16-bit partial inet checksum as the
79  * data is copied.
80  * This routine is used by flowswitch while copying packet from vp
81  * adapter pool to packet in native netif pool and vice-a-versa.
82  *
83  * start/stuff is relative to soff, within [0, len], such that
84  * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
85  */
86 void
pkt_copy_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)87 pkt_copy_from_pkt(const enum txrx t, kern_packet_t dph, const uint16_t doff,
88     kern_packet_t sph, const uint16_t soff, const uint32_t len,
89     const boolean_t copysum, const uint16_t start, const uint16_t stuff,
90     const boolean_t invert)
91 {
92 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
93 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
94 	uint32_t partial;
95 	uint16_t csum = 0;
96 	uint8_t *sbaddr, *dbaddr;
97 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
98 
99 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
100 
101 	/* get buffer address from packet */
102 	MD_BUFLET_ADDR_ABS(spkt, sbaddr);
103 	ASSERT(sbaddr != NULL);
104 	sbaddr += soff;
105 	MD_BUFLET_ADDR_ABS(dpkt, dbaddr);
106 	ASSERT(dbaddr != NULL);
107 	dbaddr += doff;
108 	VERIFY((doff + len) <= PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
109 
110 	switch (t) {
111 	case NR_RX:
112 		dpkt->pkt_csum_flags = 0;
113 		if (__probable(do_sum)) {
114 			/*
115 			 * Use pkt_copy() to copy the portion up to the
116 			 * point where we need to start the checksum, and
117 			 * copy the remainder, checksumming as we go.
118 			 */
119 			if (__probable(start != 0)) {
120 				_pkt_copy(sbaddr, dbaddr, start);
121 			}
122 			partial = __packet_copy_and_sum((sbaddr + start),
123 			    (dbaddr + start), (len - start), 0);
124 			csum = __packet_fold_sum(partial);
125 
126 			__packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
127 			    start, csum, FALSE);
128 		} else {
129 			_pkt_copy(sbaddr, dbaddr, len);
130 			dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
131 			dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
132 			dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
133 		}
134 
135 		SK_DF(SK_VERB_COPY | SK_VERB_RX,
136 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
137 		    sk_proc_name_address(current_proc()),
138 		    sk_proc_pid(current_proc()), len,
139 		    (copysum ? (len - start) : 0), csum, start);
140 		SK_DF(SK_VERB_COPY | SK_VERB_RX,
141 		    "   pkt  0x%llx doff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
142 		    SK_KVA(dpkt), doff, dpkt->pkt_csum_flags,
143 		    (uint32_t)dpkt->pkt_csum_rx_start_off,
144 		    (uint32_t)dpkt->pkt_csum_rx_value);
145 		break;
146 
147 	case NR_TX:
148 		if (__probable(copysum)) {
149 			/*
150 			 * Use pkt_copy() to copy the portion up to the
151 			 * point where we need to start the checksum, and
152 			 * copy the remainder, checksumming as we go.
153 			 */
154 			if (__probable(start != 0)) {
155 				_pkt_copy(sbaddr, dbaddr, start);
156 			}
157 			partial = __packet_copy_and_sum((sbaddr + start),
158 			    (dbaddr + start), (len - start), 0);
159 			csum = __packet_fold_sum_final(partial);
160 
161 			/* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
162 			if (csum == 0 && invert) {
163 				csum = 0xffff;
164 			}
165 
166 			/* Insert checksum into packet */
167 			ASSERT(stuff <= (len - sizeof(csum)));
168 			if (IS_P2ALIGNED(dbaddr + stuff, sizeof(csum))) {
169 				*(uint16_t *)(uintptr_t)(dbaddr + stuff) = csum;
170 			} else {
171 				bcopy((void *)&csum, dbaddr + stuff,
172 				    sizeof(csum));
173 			}
174 		} else {
175 			_pkt_copy(sbaddr, dbaddr, len);
176 		}
177 		dpkt->pkt_csum_flags = spkt->pkt_csum_flags & (PACKET_CSUM_TSO_FLAGS);
178 		dpkt->pkt_csum_tx_start_off = 0;
179 		dpkt->pkt_csum_tx_stuff_off = 0;
180 
181 		SK_DF(SK_VERB_COPY | SK_VERB_TX,
182 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
183 		    sk_proc_name_address(current_proc()),
184 		    sk_proc_pid(current_proc()), len,
185 		    (copysum ? (len - start) : 0), csum, start);
186 		break;
187 
188 	default:
189 		VERIFY(0);
190 		/* NOTREACHED */
191 		__builtin_unreachable();
192 	}
193 	METADATA_ADJUST_LEN(dpkt, len, doff);
194 
195 	SK_DF(SK_VERB_COPY | SK_VERB_DUMP, "%s(%d) %s %s",
196 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
197 	    (t == NR_RX) ? "RX" : "TX",
198 	    sk_dump("buf", dbaddr, len, 128, NULL, 0));
199 }
200 
201 /*
202  * NOTE: soff is the offset within the packet
203  * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
204  * caller is responsible for further reducing it to 16-bit if needed,
205  * as well as to perform the final 1's complement on it.
206  */
207 uint32_t static inline
_pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)208 _pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
209     uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
210 {
211 	uint8_t odd = 0;
212 	uint8_t *sbaddr = NULL;
213 	uint32_t sum = initial_sum, partial;
214 	uint32_t len0 = len;
215 	boolean_t needs_swap, started_on_odd = FALSE;
216 	uint16_t clen, sboff, sblen, sbcnt, off0 = soff;
217 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
218 	kern_buflet_t sbuf = NULL, sbufp = NULL;
219 
220 	sbcnt = __packet_get_buflet_count(sph);
221 
222 	if (odd_start) {
223 		started_on_odd = *odd_start;
224 	}
225 
226 	/* fastpath (copy+sum, single buflet, even aligned, even length) */
227 	if (do_csum && sbcnt == 1 && len != 0) {
228 		PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
229 		ASSERT(sbuf != NULL);
230 		sboff = __buflet_get_data_offset(sbuf);
231 		sblen = __buflet_get_data_length(sbuf);
232 		ASSERT(sboff <= soff);
233 		ASSERT(soff < sboff + sblen);
234 		sblen -= (soff - sboff);
235 		sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
236 
237 		clen = (uint16_t)MIN(len, sblen);
238 
239 		if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
240 			sum = __packet_copy_and_sum(sbaddr, dbaddr, clen, sum);
241 			return __packet_fold_sum(sum);
242 		}
243 
244 		sbaddr = NULL;
245 		sbuf = sbufp = NULL;
246 	}
247 
248 	while (len != 0) {
249 		PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
250 		if (__improbable(sbuf == NULL)) {
251 			panic("%s: bad packet, 0x%llx [off %d, len %d]",
252 			    __func__, SK_KVA(spkt), off0, len0);
253 			/* NOTREACHED */
254 			__builtin_unreachable();
255 		}
256 		sbufp = sbuf;
257 		sboff = __buflet_get_data_offset(sbuf);
258 		sblen = __buflet_get_data_length(sbuf);
259 		ASSERT((sboff <= soff) && (soff < sboff + sblen));
260 		sblen -= (soff - sboff);
261 		sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
262 		soff = 0;
263 		clen = (uint16_t)MIN(len, sblen);
264 		if (__probable(do_csum)) {
265 			partial = 0;
266 			if (__improbable((uintptr_t)sbaddr & 1)) {
267 				/* Align on word boundary */
268 				started_on_odd = !started_on_odd;
269 #if BYTE_ORDER == LITTLE_ENDIAN
270 				partial = (uint8_t)*sbaddr << 8;
271 #else /* BYTE_ORDER != LITTLE_ENDIAN */
272 				partial = (uint8_t)*sbaddr;
273 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
274 				*dbaddr++ = *sbaddr++;
275 				sblen -= 1;
276 				clen -= 1;
277 				len -= 1;
278 			}
279 			needs_swap = started_on_odd;
280 
281 			odd = clen & 1u;
282 			clen -= odd;
283 
284 			if (clen != 0) {
285 				partial = __packet_copy_and_sum(sbaddr, dbaddr,
286 				    clen, partial);
287 			}
288 
289 			if (__improbable(partial & 0xc0000000)) {
290 				if (needs_swap) {
291 					partial = (partial << 8) +
292 					    (partial >> 24);
293 				}
294 				sum += (partial >> 16);
295 				sum += (partial & 0xffff);
296 				partial = 0;
297 			}
298 		} else {
299 			_pkt_copy(sbaddr, dbaddr, clen);
300 		}
301 
302 		dbaddr += clen;
303 		sbaddr += clen;
304 
305 		if (__probable(do_csum)) {
306 			if (odd != 0) {
307 #if BYTE_ORDER == LITTLE_ENDIAN
308 				partial += (uint8_t)*sbaddr;
309 #else /* BYTE_ORDER != LITTLE_ENDIAN */
310 				partial += (uint8_t)*sbaddr << 8;
311 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
312 				*dbaddr++ = *sbaddr++;
313 				started_on_odd = !started_on_odd;
314 			}
315 
316 			if (needs_swap) {
317 				partial = (partial << 8) + (partial >> 24);
318 			}
319 			sum += (partial >> 16) + (partial & 0xffff);
320 			/*
321 			 * Reduce sum to allow potential byte swap
322 			 * in the next iteration without carry.
323 			 */
324 			sum = (sum >> 16) + (sum & 0xffff);
325 		}
326 
327 		sblen -= clen + odd;
328 		len -= clen + odd;
329 		ASSERT(sblen == 0 || len == 0);
330 	}
331 
332 	if (odd_start) {
333 		*odd_start = started_on_odd;
334 	}
335 
336 	if (__probable(do_csum)) {
337 		/* Final fold (reduce 32-bit to 16-bit) */
338 		sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
339 		sum = (sum >> 16) + (sum & 0xffff);
340 	}
341 	return sum;
342 }
343 
344 /*
345  * NOTE: Caller of this function is responsible to adjust the length and offset
346  * of the first buflet of the destination packet if (doff != 0),
347  * i.e. additional data is being prependend to the packet.
348  * It should also finalize the packet.
349  * To simplify & optimize the routine, we have also assumed that soff & doff
350  * will lie within the first buffer, which is true for the current use cases
351  * where, doff is the offset of the checksum field in the TCP/IP header and
352  * soff is the L3 offset.
353  * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
354  * caller is responsible for further reducing it to 16-bit if needed,
355  * as well as to perform the final 1's complement on it.
356  */
357 static inline boolean_t
_pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint32_t len,uint32_t * csum_partial,boolean_t do_csum)358 _pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
359     uint16_t doff, uint32_t len, uint32_t *csum_partial, boolean_t do_csum)
360 {
361 	uint8_t odd = 0;
362 	uint32_t sum = 0, partial;
363 	boolean_t needs_swap, started_on_odd = FALSE;
364 	uint8_t *sbaddr = NULL, *dbaddr = NULL;
365 	uint16_t clen, sblen, dlen0, dlim, sbcnt, dbcnt, sboff;
366 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
367 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
368 	kern_buflet_t sbuf = NULL, sbufp = NULL, dbuf = NULL, dbufp = NULL;
369 
370 	ASSERT(csum_partial != NULL || !do_csum);
371 	sbcnt = __packet_get_buflet_count(sph);
372 	dbcnt = __packet_get_buflet_count(dph);
373 
374 	while (len != 0) {
375 		ASSERT(sbaddr == NULL || dbaddr == NULL);
376 		if (sbaddr == NULL) {
377 			PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
378 			if (__improbable(sbuf == NULL)) {
379 				break;
380 			}
381 			sbufp = sbuf;
382 			sblen = __buflet_get_data_length(sbuf);
383 			sboff = __buflet_get_data_offset(sbuf);
384 			ASSERT(soff >= sboff);
385 			ASSERT(sboff + sblen > soff);
386 			sblen -= (soff - sboff);
387 			sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
388 			soff = 0;
389 		}
390 
391 		if (dbaddr == NULL) {
392 			if (dbufp != NULL) {
393 				__buflet_set_data_length(dbufp, dlen0);
394 			}
395 
396 			PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
397 			if (__improbable(dbuf == NULL)) {
398 				break;
399 			}
400 			dbufp = dbuf;
401 			dlim = __buflet_get_data_limit(dbuf);
402 			ASSERT(dlim > doff);
403 			dlim -= doff;
404 			if (doff != 0) {
405 				VERIFY(__buflet_set_data_offset(dbuf, doff) == 0);
406 			}
407 			dbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(dbuf) + doff);
408 			dlen0 = dlim;
409 			doff = 0;
410 		}
411 
412 		clen = (uint16_t)MIN(len, sblen);
413 		clen = MIN(clen, dlim);
414 
415 		if (__probable(do_csum)) {
416 			partial = 0;
417 			if (__improbable((uintptr_t)sbaddr & 1)) {
418 				/* Align on word boundary */
419 				started_on_odd = !started_on_odd;
420 #if BYTE_ORDER == LITTLE_ENDIAN
421 				partial = (uint8_t)*sbaddr << 8;
422 #else /* BYTE_ORDER != LITTLE_ENDIAN */
423 				partial = (uint8_t)*sbaddr;
424 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
425 				*dbaddr++ = *sbaddr++;
426 				clen -= 1;
427 				dlim -= 1;
428 				len -= 1;
429 			}
430 			needs_swap = started_on_odd;
431 
432 			odd = clen & 1u;
433 			clen -= odd;
434 
435 			if (clen != 0) {
436 				partial = __packet_copy_and_sum(sbaddr, dbaddr,
437 				    clen, partial);
438 			}
439 
440 			if (__improbable(partial & 0xc0000000)) {
441 				if (needs_swap) {
442 					partial = (partial << 8) +
443 					    (partial >> 24);
444 				}
445 				sum += (partial >> 16);
446 				sum += (partial & 0xffff);
447 				partial = 0;
448 			}
449 		} else {
450 			_pkt_copy(sbaddr, dbaddr, clen);
451 		}
452 		sbaddr += clen;
453 		dbaddr += clen;
454 
455 		if (__probable(do_csum)) {
456 			if (odd != 0) {
457 #if BYTE_ORDER == LITTLE_ENDIAN
458 				partial += (uint8_t)*sbaddr;
459 #else /* BYTE_ORDER != LITTLE_ENDIAN */
460 				partial += (uint8_t)*sbaddr << 8;
461 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
462 				*dbaddr++ = *sbaddr++;
463 				started_on_odd = !started_on_odd;
464 			}
465 
466 			if (needs_swap) {
467 				partial = (partial << 8) + (partial >> 24);
468 			}
469 			sum += (partial >> 16) + (partial & 0xffff);
470 			/*
471 			 * Reduce sum to allow potential byte swap
472 			 * in the next iteration without carry.
473 			 */
474 			sum = (sum >> 16) + (sum & 0xffff);
475 		}
476 
477 		sblen -= clen + odd;
478 		dlim -= clen + odd;
479 		len -= clen + odd;
480 
481 		if (sblen == 0) {
482 			sbaddr = NULL;
483 		}
484 
485 		if (dlim == 0) {
486 			dbaddr = NULL;
487 		}
488 	}
489 
490 	if (__probable(dbuf != NULL)) {
491 		__buflet_set_data_length(dbuf, (dlen0 - dlim));
492 	}
493 	if (__probable(do_csum)) {
494 		/* Final fold (reduce 32-bit to 16-bit) */
495 		sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
496 		sum = (sum >> 16) + (sum & 0xffff);
497 		*csum_partial = (uint32_t)sum;
498 	}
499 	return len == 0;
500 }
501 
502 uint32_t
pkt_sum(kern_packet_t sph,uint16_t soff,uint16_t len)503 pkt_sum(kern_packet_t sph, uint16_t soff, uint16_t len)
504 {
505 	uint8_t odd = 0;
506 	uint32_t sum = 0, partial;
507 	boolean_t needs_swap, started_on_odd = FALSE;
508 	uint8_t *sbaddr = NULL;
509 	uint16_t clen, sblen, sbcnt, sboff;
510 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
511 	kern_buflet_t sbuf = NULL, sbufp = NULL;
512 
513 	sbcnt = __packet_get_buflet_count(sph);
514 
515 	/* fastpath (single buflet, even aligned, even length) */
516 	if (sbcnt == 1 && len != 0) {
517 		PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
518 		ASSERT(sbuf != NULL);
519 		sblen = __buflet_get_data_length(sbuf);
520 		sboff = __buflet_get_data_offset(sbuf);
521 		ASSERT(soff >= sboff);
522 		ASSERT(sboff + sblen > soff);
523 		sblen -= (soff - sboff);
524 		sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
525 
526 		clen = MIN(len, sblen);
527 
528 		if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
529 			sum = __packet_cksum(sbaddr, clen, 0);
530 			return __packet_fold_sum(sum);
531 		}
532 
533 		sbaddr = NULL;
534 		sbuf = sbufp = NULL;
535 	}
536 
537 	/* slowpath */
538 	while (len != 0) {
539 		ASSERT(sbaddr == NULL);
540 		if (sbaddr == NULL) {
541 			PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
542 			if (__improbable(sbuf == NULL)) {
543 				break;
544 			}
545 			sbufp = sbuf;
546 			sblen = __buflet_get_data_length(sbuf);
547 			sboff = __buflet_get_data_offset(sbuf);
548 			ASSERT(soff >= sboff);
549 			ASSERT(sboff + sblen > soff);
550 			sblen -= (soff - sboff);
551 			sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
552 			soff = 0;
553 		}
554 
555 		clen = MIN(len, sblen);
556 
557 		partial = 0;
558 		if (__improbable((uintptr_t)sbaddr & 1)) {
559 			/* Align on word boundary */
560 			started_on_odd = !started_on_odd;
561 #if BYTE_ORDER == LITTLE_ENDIAN
562 			partial = (uint8_t)*sbaddr << 8;
563 #else /* BYTE_ORDER != LITTLE_ENDIAN */
564 			partial = (uint8_t)*sbaddr;
565 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
566 			clen -= 1;
567 			len -= 1;
568 		}
569 		needs_swap = started_on_odd;
570 
571 		odd = clen & 1u;
572 		clen -= odd;
573 
574 		if (clen != 0) {
575 			partial = __packet_cksum(sbaddr,
576 			    clen, partial);
577 		}
578 
579 		if (__improbable(partial & 0xc0000000)) {
580 			if (needs_swap) {
581 				partial = (partial << 8) +
582 				    (partial >> 24);
583 			}
584 			sum += (partial >> 16);
585 			sum += (partial & 0xffff);
586 			partial = 0;
587 		}
588 		sbaddr += clen;
589 
590 		if (odd != 0) {
591 #if BYTE_ORDER == LITTLE_ENDIAN
592 			partial += (uint8_t)*sbaddr;
593 #else /* BYTE_ORDER != LITTLE_ENDIAN */
594 			partial += (uint8_t)*sbaddr << 8;
595 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
596 			started_on_odd = !started_on_odd;
597 		}
598 
599 		if (needs_swap) {
600 			partial = (partial << 8) + (partial >> 24);
601 		}
602 		sum += (partial >> 16) + (partial & 0xffff);
603 		/*
604 		 * Reduce sum to allow potential byte swap
605 		 * in the next iteration without carry.
606 		 */
607 		sum = (sum >> 16) + (sum & 0xffff);
608 
609 		sblen -= clen + odd;
610 		len -= clen + odd;
611 
612 		if (sblen == 0) {
613 			sbaddr = NULL;
614 		}
615 	}
616 
617 	/* Final fold (reduce 32-bit to 16-bit) */
618 	sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
619 	sum = (sum >> 16) + (sum & 0xffff);
620 	return (uint32_t)sum;
621 }
622 
623 
624 /*
625  * This is a multi-buflet variant of pkt_copy_from_pkt().
626  *
627  * start/stuff is relative to soff, within [0, len], such that
628  * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
629  */
630 void
pkt_copy_multi_buflet_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)631 pkt_copy_multi_buflet_from_pkt(const enum txrx t, kern_packet_t dph,
632     const uint16_t doff, kern_packet_t sph, const uint16_t soff,
633     const uint32_t len, const boolean_t copysum, const uint16_t start,
634     const uint16_t stuff, const boolean_t invert)
635 {
636 	boolean_t rc;
637 	uint32_t partial;
638 	uint16_t csum = 0;
639 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
640 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
641 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
642 
643 	VERIFY((doff + len) <= (PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp) *
644 	    __packet_get_buflet_count(dph)));
645 
646 	switch (t) {
647 	case NR_RX:
648 		dpkt->pkt_csum_flags = 0;
649 		if (__probable(do_sum)) {
650 			/*
651 			 * copy the portion up to the point where we need to
652 			 * start the checksum, and copy the remainder,
653 			 * checksumming as we go.
654 			 */
655 			if (__probable(start != 0)) {
656 				rc = _pkt_copypkt_sum(sph, soff, dph, doff,
657 				    start, NULL, FALSE);
658 				ASSERT(rc);
659 			}
660 			_pkt_copypkt_sum(sph, (soff + start), dph,
661 			    (doff + start), (len - start), &partial, TRUE);
662 			csum = __packet_fold_sum(partial);
663 			__packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
664 			    start, csum, FALSE);
665 			METADATA_ADJUST_LEN(dpkt, start, doff);
666 		} else {
667 			rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
668 			    FALSE);
669 			ASSERT(rc);
670 			dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
671 			dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
672 			dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
673 		}
674 		break;
675 
676 	case NR_TX:
677 		if (__probable(copysum)) {
678 			uint8_t *baddr;
679 			/*
680 			 * copy the portion up to the point where we need to
681 			 * start the checksum, and copy the remainder,
682 			 * checksumming as we go.
683 			 */
684 			if (__probable(start != 0)) {
685 				rc = _pkt_copypkt_sum(sph, soff, dph, doff,
686 				    start, NULL, FALSE);
687 				ASSERT(rc);
688 			}
689 			rc = _pkt_copypkt_sum(sph, (soff + start), dph,
690 			    (doff + start), (len - start), &partial, TRUE);
691 			ASSERT(rc);
692 			csum = __packet_fold_sum_final(partial);
693 
694 			/* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
695 			if (csum == 0 && invert) {
696 				csum = 0xffff;
697 			}
698 
699 			/*
700 			 * Insert checksum into packet.
701 			 * Here we assume that checksum will be in the
702 			 * first buffer.
703 			 */
704 			ASSERT((stuff + doff + sizeof(csum)) <=
705 			    PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
706 			ASSERT(stuff <= (len - sizeof(csum)));
707 
708 			/* get first buflet buffer address from packet */
709 			MD_BUFLET_ADDR_ABS(dpkt, baddr);
710 			ASSERT(baddr != NULL);
711 			baddr += doff;
712 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
713 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
714 			} else {
715 				bcopy((void *)&csum, baddr + stuff,
716 				    sizeof(csum));
717 			}
718 			METADATA_ADJUST_LEN(dpkt, start, doff);
719 		} else {
720 			rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
721 			    FALSE);
722 			ASSERT(rc);
723 		}
724 		dpkt->pkt_csum_flags = spkt->pkt_csum_flags & (PACKET_CSUM_TSO_FLAGS);
725 		dpkt->pkt_csum_tx_start_off = 0;
726 		dpkt->pkt_csum_tx_stuff_off = 0;
727 		break;
728 
729 	default:
730 		VERIFY(0);
731 		/* NOTREACHED */
732 		__builtin_unreachable();
733 	}
734 }
735 
736 /*
737  * This routine is used for copying an mbuf which originated in the host
738  * stack destined to a native skywalk interface (NR_TX), as well as for
739  * mbufs originating on compat network interfaces (NR_RX).
740  *
741  * start/stuff is relative to moff, within [0, len], such that
742  * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
743  */
744 void
pkt_copy_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)745 pkt_copy_from_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
746     struct mbuf *m, const uint16_t moff, const uint32_t len,
747     const boolean_t copysum, const uint16_t start)
748 {
749 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
750 	uint32_t partial;
751 	uint16_t csum = 0;
752 	uint8_t *baddr;
753 
754 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
755 
756 	/* get buffer address from packet */
757 	MD_BUFLET_ADDR_ABS(pkt, baddr);
758 	ASSERT(baddr != NULL);
759 	baddr += poff;
760 	VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
761 
762 	switch (t) {
763 	case NR_RX:
764 		pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
765 		pkt->pkt_csum_rx_start_off = 0;
766 		pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
767 		pkt->pkt_svc_class = m_get_service_class(m);
768 		if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
769 		    != CSUM_RX_FULL_FLAGS) && copysum)) {
770 			/*
771 			 * Use m_copydata() to copy the portion up to the
772 			 * point where we need to start the checksum, and
773 			 * copy the remainder, checksumming as we go.
774 			 */
775 			if (start != 0) {
776 				m_copydata(m, moff, start, baddr);
777 			}
778 			partial = m_copydata_sum(m, start, (len - start),
779 			    (baddr + start), 0, NULL);
780 			csum = __packet_fold_sum(partial);
781 
782 			__packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
783 			    start, csum, FALSE);
784 		} else {
785 			m_copydata(m, moff, len, baddr);
786 		}
787 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
788 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
789 		    sk_proc_name_address(current_proc()),
790 		    sk_proc_pid(current_proc()), len,
791 		    (copysum ? (len - start) : 0), csum, start);
792 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
793 		    "   mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
794 		    SK_KVA(m), m->m_pkthdr.csum_flags,
795 		    (uint32_t)m->m_pkthdr.csum_rx_start,
796 		    (uint32_t)m->m_pkthdr.csum_rx_val);
797 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
798 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
799 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
800 		    (uint32_t)pkt->pkt_csum_rx_start_off,
801 		    (uint32_t)pkt->pkt_csum_rx_value);
802 		break;
803 
804 	case NR_TX:
805 		if (__probable(copysum)) {
806 			uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
807 			/*
808 			 * Use m_copydata() to copy the portion up to the
809 			 * point where we need to start the checksum, and
810 			 * copy the remainder, checksumming as we go.
811 			 */
812 			if (start != 0) {
813 				m_copydata(m, moff, start, baddr);
814 			}
815 			partial = m_copydata_sum(m, start, (len - start),
816 			    (baddr + start), 0, NULL);
817 			csum = __packet_fold_sum_final(partial);
818 
819 			/*
820 			 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
821 			 * ideally we'd only test for CSUM_ZERO_INVERT
822 			 * here, but catch cases where the originator
823 			 * did not set it for UDP.
824 			 */
825 			if (csum == 0 && (m->m_pkthdr.csum_flags &
826 			    (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
827 				csum = 0xffff;
828 			}
829 
830 			/* Insert checksum into packet */
831 			ASSERT(stuff <= (len - sizeof(csum)));
832 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
833 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
834 			} else {
835 				bcopy((void *)&csum, baddr + stuff,
836 				    sizeof(csum));
837 			}
838 		} else {
839 			m_copydata(m, moff, len, baddr);
840 		}
841 		pkt->pkt_csum_flags = 0;
842 		pkt->pkt_csum_tx_start_off = 0;
843 		pkt->pkt_csum_tx_stuff_off = 0;
844 
845 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
846 			pkt->pkt_csum_flags |= PACKET_TSO_IPV4;
847 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
848 		}
849 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
850 			pkt->pkt_csum_flags |= PACKET_TSO_IPV6;
851 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
852 		}
853 
854 		/* translate mbuf metadata */
855 		pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
856 		pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
857 		pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
858 		pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
859 		pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
860 		switch (m->m_pkthdr.pkt_proto) {
861 		case IPPROTO_QUIC:
862 			pkt->pkt_flow_ip_proto = IPPROTO_UDP;
863 			pkt->pkt_transport_protocol = IPPROTO_QUIC;
864 			break;
865 
866 		default:
867 			pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
868 			pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
869 			break;
870 		}
871 		(void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
872 		pkt->pkt_svc_class = m_get_service_class(m);
873 		pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
874 		pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
875 		if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
876 			pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
877 		}
878 		if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) != 0) {
879 			pkt->pkt_pflags |= PKT_F_L4S;
880 		}
881 		necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
882 		pkt->pkt_policy_id =
883 		    (uint32_t)necp_get_policy_id_from_packet(m);
884 
885 		if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
886 			if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
887 				__packet_set_tx_completion_data(ph,
888 				    m->m_pkthdr.drv_tx_compl_arg,
889 				    m->m_pkthdr.drv_tx_compl_data);
890 			}
891 			pkt->pkt_tx_compl_context =
892 			    m->m_pkthdr.pkt_compl_context;
893 			pkt->pkt_tx_compl_callbacks =
894 			    m->m_pkthdr.pkt_compl_callbacks;
895 			/*
896 			 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
897 			 * mbuf can no longer trigger a completion callback.
898 			 * callback will be invoked when the kernel packet is
899 			 * completed.
900 			 */
901 			m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
902 
903 			m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
904 		}
905 
906 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
907 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
908 		    sk_proc_name_address(current_proc()),
909 		    sk_proc_pid(current_proc()), len,
910 		    (copysum ? (len - start) : 0), csum, start);
911 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
912 		    "   mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
913 		    SK_KVA(m), m->m_pkthdr.csum_flags,
914 		    (uint32_t)m->m_pkthdr.csum_tx_start,
915 		    (uint32_t)m->m_pkthdr.csum_tx_stuff);
916 		break;
917 
918 	default:
919 		VERIFY(0);
920 		/* NOTREACHED */
921 		__builtin_unreachable();
922 	}
923 	METADATA_ADJUST_LEN(pkt, len, poff);
924 
925 	if (m->m_flags & M_BCAST) {
926 		__packet_set_link_broadcast(ph);
927 	} else if (m->m_flags & M_MCAST) {
928 		__packet_set_link_multicast(ph);
929 	}
930 
931 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
932 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
933 	    (t == NR_RX) ? "RX" : "TX",
934 	    sk_dump("buf", baddr, len, 128, NULL, 0));
935 }
936 
937 /*
938  * Like m_copydata_sum(), but works on a destination kernel packet.
939  */
940 static inline uint32_t
m_copypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint32_t len,boolean_t do_cscum)941 m_copypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
942     uint32_t len, boolean_t do_cscum)
943 {
944 	boolean_t needs_swap, started_on_odd = FALSE;
945 	int off0 = soff;
946 	uint32_t len0 = len;
947 	struct mbuf *m0 = m;
948 	uint32_t sum = 0, partial;
949 	unsigned count0, count, odd, mlen_copied;
950 	uint8_t *sbaddr = NULL, *dbaddr = NULL;
951 	uint16_t dlen0, dlim, dbcnt = __packet_get_buflet_count(dph);
952 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
953 	kern_buflet_t dbuf = NULL, dbufp = NULL;
954 
955 	while (soff > 0) {
956 		if (__improbable(m == NULL)) {
957 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
958 			    __func__, m0, off0, len0);
959 			/* NOTREACHED */
960 			__builtin_unreachable();
961 		}
962 		if (soff < m->m_len) {
963 			break;
964 		}
965 		soff -= m->m_len;
966 		m = m->m_next;
967 	}
968 
969 	if (__improbable(m == NULL)) {
970 		panic("%s: invalid mbuf chain %p [off %d, len %d]",
971 		    __func__, m0, off0, len0);
972 		/* NOTREACHED */
973 		__builtin_unreachable();
974 	}
975 
976 	sbaddr = mtod(m, uint8_t *) + soff;
977 	count = m->m_len - soff;
978 	mlen_copied = 0;
979 
980 	while (len != 0) {
981 		ASSERT(sbaddr == NULL || dbaddr == NULL);
982 		if (sbaddr == NULL) {
983 			soff = 0;
984 			m = m->m_next;
985 			if (__improbable(m == NULL)) {
986 				panic("%s: invalid mbuf chain %p [off %d, "
987 				    "len %d]", __func__, m0, off0, len0);
988 				/* NOTREACHED */
989 				__builtin_unreachable();
990 			}
991 			sbaddr = mtod(m, uint8_t *);
992 			count = m->m_len;
993 			mlen_copied = 0;
994 		}
995 
996 		if (__improbable(count == 0)) {
997 			sbaddr = NULL;
998 			continue;
999 		}
1000 
1001 		if (dbaddr == NULL) {
1002 			if (dbufp != NULL) {
1003 				__buflet_set_data_length(dbufp, dlen0);
1004 			}
1005 
1006 			PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
1007 			if (__improbable(dbuf == NULL)) {
1008 				panic("%s: mbuf too large %p [off %d, "
1009 				    "len %d]", __func__, m0, off0, len0);
1010 				/* NOTREACHED */
1011 				__builtin_unreachable();
1012 			}
1013 			dbufp = dbuf;
1014 			dlim = __buflet_get_data_limit(dbuf) - doff;
1015 			dbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(dbuf) + doff);
1016 			dlen0 = dlim;
1017 			doff = 0;
1018 		}
1019 
1020 		count = MIN(count, (unsigned)len);
1021 		count0 = count = MIN(count, dlim);
1022 
1023 		if (!do_cscum) {
1024 			_pkt_copy(sbaddr, dbaddr, count);
1025 			sbaddr += count;
1026 			dbaddr += count;
1027 			goto skip_csum;
1028 		}
1029 
1030 		partial = 0;
1031 		if ((uintptr_t)sbaddr & 1) {
1032 			/* Align on word boundary */
1033 			started_on_odd = !started_on_odd;
1034 #if BYTE_ORDER == LITTLE_ENDIAN
1035 			partial = *sbaddr << 8;
1036 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1037 			partial = *sbaddr;
1038 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1039 			*dbaddr++ = *sbaddr++;
1040 			count -= 1;
1041 		}
1042 
1043 		needs_swap = started_on_odd;
1044 		odd = count & 1u;
1045 		count -= odd;
1046 
1047 		if (count) {
1048 			partial = __packet_copy_and_sum(sbaddr,
1049 			    dbaddr, count, partial);
1050 			sbaddr += count;
1051 			dbaddr += count;
1052 			if (__improbable(partial & 0xc0000000)) {
1053 				if (needs_swap) {
1054 					partial = (partial << 8) +
1055 					    (partial >> 24);
1056 				}
1057 				sum += (partial >> 16);
1058 				sum += (partial & 0xffff);
1059 				partial = 0;
1060 			}
1061 		}
1062 
1063 		if (odd) {
1064 #if BYTE_ORDER == LITTLE_ENDIAN
1065 			partial += *sbaddr;
1066 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1067 			partial += *sbaddr << 8;
1068 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1069 			*dbaddr++ = *sbaddr++;
1070 			started_on_odd = !started_on_odd;
1071 		}
1072 
1073 		if (needs_swap) {
1074 			partial = (partial << 8) + (partial >> 24);
1075 		}
1076 		sum += (partial >> 16) + (partial & 0xffff);
1077 		/*
1078 		 * Reduce sum to allow potential byte swap
1079 		 * in the next iteration without carry.
1080 		 */
1081 		sum = (sum >> 16) + (sum & 0xffff);
1082 
1083 skip_csum:
1084 		dlim -= count0;
1085 		len -= count0;
1086 		mlen_copied += count0;
1087 
1088 		if (dlim == 0) {
1089 			dbaddr = NULL;
1090 		}
1091 
1092 		count = m->m_len - soff - mlen_copied;
1093 		if (count == 0) {
1094 			sbaddr = NULL;
1095 		}
1096 	}
1097 
1098 	ASSERT(len == 0);
1099 	ASSERT(dbuf != NULL);
1100 	__buflet_set_data_length(dbuf, (dlen0 - dlim));
1101 
1102 	if (!do_cscum) {
1103 		return 0;
1104 	}
1105 
1106 	/* Final fold (reduce 32-bit to 16-bit) */
1107 	sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
1108 	sum = (sum >> 16) + (sum & 0xffff);
1109 	return sum;
1110 }
1111 
1112 /*
1113  * This is a multi-buflet variant of pkt_copy_from_mbuf().
1114  *
1115  * start/stuff is relative to moff, within [0, len], such that
1116  * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
1117  */
1118 void
pkt_copy_multi_buflet_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1119 pkt_copy_multi_buflet_from_mbuf(const enum txrx t, kern_packet_t ph,
1120     const uint16_t poff, struct mbuf *m, const uint16_t moff,
1121     const uint32_t len, const boolean_t copysum, const uint16_t start)
1122 {
1123 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1124 	uint32_t partial;
1125 	uint16_t csum = 0;
1126 	uint8_t *baddr;
1127 
1128 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
1129 
1130 	/* get buffer address from packet */
1131 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1132 	ASSERT(baddr != NULL);
1133 	baddr += poff;
1134 	VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp) *
1135 	    __packet_get_buflet_count(ph)));
1136 
1137 	switch (t) {
1138 	case NR_RX:
1139 		pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
1140 		pkt->pkt_csum_rx_start_off = 0;
1141 		pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
1142 		pkt->pkt_svc_class = m_get_service_class(m);
1143 		if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
1144 		    != CSUM_RX_FULL_FLAGS) && copysum)) {
1145 			/*
1146 			 * Use m_copydata() to copy the portion up to the
1147 			 * point where we need to start the checksum, and
1148 			 * copy the remainder, checksumming as we go.
1149 			 */
1150 			if (start != 0) {
1151 				m_copydata(m, moff, start, baddr);
1152 			}
1153 			partial = m_copypkt_sum(m, start, ph, (poff + start),
1154 			    (len - start), TRUE);
1155 			csum = __packet_fold_sum(partial);
1156 			__packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
1157 			    start, csum, FALSE);
1158 			METADATA_ADJUST_LEN(pkt, start, poff);
1159 		} else {
1160 			(void) m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1161 		}
1162 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1163 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1164 		    sk_proc_name_address(current_proc()),
1165 		    sk_proc_pid(current_proc()), len,
1166 		    (copysum ? (len - start) : 0), csum, start);
1167 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1168 		    "   mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
1169 		    SK_KVA(m), m->m_pkthdr.csum_flags,
1170 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1171 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1172 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1173 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1174 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1175 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1176 		    (uint32_t)pkt->pkt_csum_rx_value);
1177 		break;
1178 
1179 	case NR_TX:
1180 		if (__probable(copysum)) {
1181 			uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
1182 			/*
1183 			 * Use m_copydata() to copy the portion up to the
1184 			 * point where we need to start the checksum, and
1185 			 * copy the remainder, checksumming as we go.
1186 			 */
1187 			if (start != 0) {
1188 				m_copydata(m, moff, start, baddr);
1189 			}
1190 			partial = m_copypkt_sum(m, start, ph, (poff + start),
1191 			    (len - start), TRUE);
1192 			csum = __packet_fold_sum_final(partial);
1193 
1194 			/*
1195 			 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
1196 			 * ideally we'd only test for CSUM_ZERO_INVERT
1197 			 * here, but catch cases where the originator
1198 			 * did not set it for UDP.
1199 			 */
1200 			if (csum == 0 && (m->m_pkthdr.csum_flags &
1201 			    (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
1202 				csum = 0xffff;
1203 			}
1204 
1205 			/* Insert checksum into packet */
1206 			ASSERT(stuff <= (len - sizeof(csum)));
1207 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
1208 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
1209 			} else {
1210 				bcopy((void *)&csum, baddr + stuff,
1211 				    sizeof(csum));
1212 			}
1213 			METADATA_ADJUST_LEN(pkt, start, poff);
1214 		} else {
1215 			m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1216 		}
1217 		pkt->pkt_csum_flags = 0;
1218 		pkt->pkt_csum_tx_start_off = 0;
1219 		pkt->pkt_csum_tx_stuff_off = 0;
1220 
1221 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
1222 			pkt->pkt_csum_flags |= PACKET_TSO_IPV4;
1223 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
1224 		}
1225 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
1226 			pkt->pkt_csum_flags |= PACKET_TSO_IPV6;
1227 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
1228 		}
1229 
1230 		/* translate mbuf metadata */
1231 		pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
1232 		pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
1233 		pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
1234 		pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
1235 		pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
1236 		switch (m->m_pkthdr.pkt_proto) {
1237 		case IPPROTO_QUIC:
1238 			pkt->pkt_flow_ip_proto = IPPROTO_UDP;
1239 			pkt->pkt_transport_protocol = IPPROTO_QUIC;
1240 			break;
1241 
1242 		default:
1243 			pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
1244 			pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
1245 			break;
1246 		}
1247 		(void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
1248 		pkt->pkt_svc_class = m_get_service_class(m);
1249 		pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
1250 		pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
1251 		if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
1252 			pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
1253 		}
1254 		if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) != 0) {
1255 			pkt->pkt_pflags |= PKT_F_L4S;
1256 		}
1257 		necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
1258 		pkt->pkt_policy_id =
1259 		    (uint32_t)necp_get_policy_id_from_packet(m);
1260 
1261 		if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
1262 			if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
1263 				__packet_set_tx_completion_data(ph,
1264 				    m->m_pkthdr.drv_tx_compl_arg,
1265 				    m->m_pkthdr.drv_tx_compl_data);
1266 			}
1267 			pkt->pkt_tx_compl_context =
1268 			    m->m_pkthdr.pkt_compl_context;
1269 			pkt->pkt_tx_compl_callbacks =
1270 			    m->m_pkthdr.pkt_compl_callbacks;
1271 			/*
1272 			 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
1273 			 * mbuf can no longer trigger a completion callback.
1274 			 * callback will be invoked when the kernel packet is
1275 			 * completed.
1276 			 */
1277 			m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
1278 
1279 			m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
1280 		}
1281 
1282 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1283 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1284 		    sk_proc_name_address(current_proc()),
1285 		    sk_proc_pid(current_proc()), len,
1286 		    (copysum ? (len - start) : 0), csum, start);
1287 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1288 		    "   mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
1289 		    SK_KVA(m), m->m_pkthdr.csum_flags,
1290 		    (uint32_t)m->m_pkthdr.csum_tx_start,
1291 		    (uint32_t)m->m_pkthdr.csum_tx_stuff);
1292 		break;
1293 
1294 	default:
1295 		VERIFY(0);
1296 		/* NOTREACHED */
1297 		__builtin_unreachable();
1298 	}
1299 
1300 	if (m->m_flags & M_BCAST) {
1301 		__packet_set_link_broadcast(ph);
1302 	} else if (m->m_flags & M_MCAST) {
1303 		__packet_set_link_multicast(ph);
1304 	}
1305 
1306 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1307 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1308 	    (t == NR_RX) ? "RX" : "TX",
1309 	    sk_dump("buf", baddr, len, 128, NULL, 0));
1310 }
1311 
1312 /*
1313  * This routine is used for copying from a packet originating from a native
1314  * skywalk interface to an mbuf destined for the host legacy stack (NR_RX),
1315  * as well as for mbufs destined for the compat network interfaces (NR_TX).
1316  *
1317  * Note that this routine does not alter m_data pointer of the mbuf, as the
1318  * caller may want to use the original value upon return.  We do, however,
1319  * adjust the length to reflect the total data span.
1320  *
1321  * This routine supports copying into an mbuf chain for RX but not TX.
1322  *
1323  * start/stuff is relative to poff, within [0, len], such that
1324  * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1325  */
1326 void
pkt_copy_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1327 pkt_copy_to_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
1328     struct mbuf *m, const uint16_t moff, const uint32_t len,
1329     const boolean_t copysum, const uint16_t start)
1330 {
1331 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1332 	struct mbuf *curr_m;
1333 	uint32_t partial = 0;
1334 	uint32_t remaining_len = len, copied_len = 0;
1335 	uint16_t csum = 0;
1336 	uint8_t *baddr;
1337 	uint8_t *dp;
1338 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1339 
1340 	ASSERT(len >= start);
1341 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
1342 
1343 	/* get buffer address from packet */
1344 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1345 	ASSERT(baddr != NULL);
1346 	baddr += poff;
1347 	VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
1348 
1349 	ASSERT((m->m_flags & M_PKTHDR));
1350 	m->m_data += moff;
1351 
1352 	switch (t) {
1353 	case NR_RX:
1354 		m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1355 
1356 		/*
1357 		 * Use pkt_copy() to copy the portion up to the
1358 		 * point where we need to start the checksum, and
1359 		 * copy the remainder, checksumming as we go.
1360 		 */
1361 		if (__probable(do_sum && start != 0)) {
1362 			ASSERT(M_TRAILINGSPACE(m) >= start);
1363 			ASSERT(m->m_len == 0);
1364 			dp = (uint8_t *)m->m_data;
1365 			_pkt_copy(baddr, dp, start);
1366 			remaining_len -= start;
1367 			copied_len += start;
1368 			m->m_len += start;
1369 			m->m_pkthdr.len += start;
1370 		}
1371 		curr_m = m;
1372 		while (curr_m != NULL && remaining_len != 0) {
1373 			uint32_t tmp_len = MIN(remaining_len,
1374 			    (uint32_t)M_TRAILINGSPACE(curr_m));
1375 			dp = (uint8_t *)curr_m->m_data + curr_m->m_len;
1376 			if (__probable(do_sum)) {
1377 				partial = __packet_copy_and_sum((baddr + copied_len),
1378 				    dp, tmp_len, partial);
1379 			} else {
1380 				_pkt_copy((baddr + copied_len), dp, tmp_len);
1381 			}
1382 
1383 			curr_m->m_len += tmp_len;
1384 			m->m_pkthdr.len += tmp_len;
1385 			copied_len += tmp_len;
1386 			remaining_len -= tmp_len;
1387 			curr_m = curr_m->m_next;
1388 		}
1389 		ASSERT(remaining_len == 0);
1390 
1391 		if (__probable(do_sum)) {
1392 			csum = __packet_fold_sum(partial);
1393 
1394 			m->m_pkthdr.csum_flags |=
1395 			    (CSUM_DATA_VALID | CSUM_PARTIAL);
1396 			m->m_pkthdr.csum_rx_start = start;
1397 			m->m_pkthdr.csum_rx_val = csum;
1398 		} else {
1399 			m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1400 			m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1401 			_CASSERT(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1402 			m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1403 			if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1404 				m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1405 			}
1406 		}
1407 
1408 		/* translate packet metadata */
1409 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1410 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1411 
1412 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1413 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1414 		    sk_proc_name_address(current_proc()),
1415 		    sk_proc_pid(current_proc()), len,
1416 		    (copysum ? (len - start) : 0), csum, start);
1417 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1418 		    "   mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1419 		    SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1420 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1421 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1422 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1423 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1424 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1425 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1426 		    (uint32_t)pkt->pkt_csum_rx_value);
1427 		break;
1428 
1429 	case NR_TX:
1430 		dp = (uint8_t *)m->m_data;
1431 		ASSERT(m->m_next == NULL);
1432 
1433 		VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1434 		    (uint32_t)mbuf_maxlen(m));
1435 		m->m_len += len;
1436 		m->m_pkthdr.len += len;
1437 		VERIFY(m->m_len == m->m_pkthdr.len &&
1438 		    (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1439 
1440 		if (__probable(copysum)) {
1441 			uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1442 			/*
1443 			 * Use pkt_copy() to copy the portion up to the
1444 			 * point where we need to start the checksum, and
1445 			 * copy the remainder, checksumming as we go.
1446 			 */
1447 			if (__probable(start != 0)) {
1448 				_pkt_copy(baddr, dp, start);
1449 			}
1450 			partial = __packet_copy_and_sum((baddr + start),
1451 			    (dp + start), (len - start), 0);
1452 			csum = __packet_fold_sum_final(partial);
1453 
1454 			/* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1455 			if (csum == 0 &&
1456 			    (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1457 				csum = 0xffff;
1458 			}
1459 
1460 			/* Insert checksum into packet */
1461 			ASSERT(stuff <= (len - sizeof(csum)));
1462 			if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1463 				*(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1464 			} else {
1465 				bcopy((void *)&csum, dp + stuff, sizeof(csum));
1466 			}
1467 		} else {
1468 			_pkt_copy(baddr, dp, len);
1469 		}
1470 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1471 		m->m_pkthdr.csum_tx_start = 0;
1472 		m->m_pkthdr.csum_tx_stuff = 0;
1473 
1474 		/* translate packet metadata */
1475 		m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1476 		m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1477 		m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1478 		m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1479 		m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1480 		m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1481 		m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1482 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1483 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1484 		m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1485 		m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1486 		if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1487 			m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1488 		}
1489 		if ((pkt->pkt_pflags & PKT_F_L4S) != 0) {
1490 			m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_L4S;
1491 		}
1492 
1493 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1494 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1495 		    sk_proc_name_address(current_proc()),
1496 		    sk_proc_pid(current_proc()), len,
1497 		    (copysum ? (len - start) : 0), csum, start);
1498 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1499 		    "   pkt  0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1500 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1501 		    (uint32_t)pkt->pkt_csum_tx_start_off,
1502 		    (uint32_t)pkt->pkt_csum_tx_stuff_off);
1503 		break;
1504 
1505 	default:
1506 		VERIFY(0);
1507 		/* NOTREACHED */
1508 		__builtin_unreachable();
1509 	}
1510 
1511 	if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1512 		m->m_flags |= M_BCAST;
1513 	} else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1514 		m->m_flags |= M_MCAST;
1515 	}
1516 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1517 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1518 	    (t == NR_RX) ? "RX" : "TX",
1519 	    sk_dump("buf", (uint8_t *)dp, m->m_pkthdr.len, 128, NULL, 0));
1520 }
1521 
1522 /*
1523  * This is a multi-buflet variant of pkt_copy_to_mbuf().
1524  * NOTE: poff is the offset within the packet.
1525  *
1526  * This routine supports copying into an mbuf chain for RX but not TX.
1527  *
1528  * start/stuff is relative to poff, within [0, len], such that
1529  * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1530  */
1531 void
pkt_copy_multi_buflet_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1532 pkt_copy_multi_buflet_to_mbuf(const enum txrx t, kern_packet_t ph,
1533     const uint16_t poff, struct mbuf *m, const uint16_t moff,
1534     const uint32_t len, const boolean_t copysum, const uint16_t start)
1535 {
1536 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1537 	struct mbuf *curr_m;
1538 	uint32_t partial = 0;
1539 	uint32_t remaining_len = len, copied_len = 0;
1540 	uint16_t csum = 0;
1541 	uint8_t *baddr;
1542 	uint8_t *dp;
1543 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1544 
1545 	ASSERT(len >= start);
1546 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
1547 
1548 	/* get buffer address from packet */
1549 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1550 	ASSERT(baddr != NULL);
1551 	baddr += poff;
1552 	VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp) *
1553 	    __packet_get_buflet_count(ph)));
1554 
1555 	ASSERT((m->m_flags & M_PKTHDR));
1556 	m->m_data += moff;
1557 
1558 	switch (t) {
1559 	case NR_RX:
1560 		m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1561 		if (__probable(do_sum && start != 0)) {
1562 			ASSERT(M_TRAILINGSPACE(m) >= start);
1563 			ASSERT(m->m_len == 0);
1564 			dp = (uint8_t *)m->m_data;
1565 			_pkt_copy(baddr, dp, start);
1566 			remaining_len -= start;
1567 			copied_len += start;
1568 			m->m_len += start;
1569 			m->m_pkthdr.len += start;
1570 		}
1571 		curr_m = m;
1572 		while (curr_m != NULL && remaining_len != 0) {
1573 			uint32_t tmp_len = MIN(remaining_len,
1574 			    (uint32_t)M_TRAILINGSPACE(curr_m));
1575 			uint16_t soff = poff + (uint16_t)copied_len;
1576 			dp = (uint8_t *)curr_m->m_data + curr_m->m_len;
1577 
1578 			if (__probable(do_sum)) {
1579 				partial = _pkt_copyaddr_sum(ph, soff,
1580 				    dp, tmp_len, TRUE, partial, NULL);
1581 			} else {
1582 				pkt_copyaddr_sum(ph, soff,
1583 				    dp, tmp_len, FALSE, 0, NULL);
1584 			}
1585 
1586 			curr_m->m_len += tmp_len;
1587 			m->m_pkthdr.len += tmp_len;
1588 			copied_len += tmp_len;
1589 			remaining_len -= tmp_len;
1590 			curr_m = curr_m->m_next;
1591 		}
1592 		ASSERT(remaining_len == 0);
1593 
1594 		if (__probable(do_sum)) {
1595 			csum = __packet_fold_sum(partial);
1596 
1597 			m->m_pkthdr.csum_flags |=
1598 			    (CSUM_DATA_VALID | CSUM_PARTIAL);
1599 			m->m_pkthdr.csum_rx_start = start;
1600 			m->m_pkthdr.csum_rx_val = csum;
1601 		} else {
1602 			m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1603 			m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1604 			_CASSERT(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1605 			m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1606 			if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1607 				m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1608 			}
1609 		}
1610 
1611 		/* translate packet metadata */
1612 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1613 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1614 
1615 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1616 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1617 		    sk_proc_name_address(current_proc()),
1618 		    sk_proc_pid(current_proc()), len,
1619 		    (copysum ? (len - start) : 0), csum, start);
1620 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1621 		    "   mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1622 		    SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1623 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1624 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1625 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1626 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1627 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1628 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1629 		    (uint32_t)pkt->pkt_csum_rx_value);
1630 		break;
1631 	case NR_TX:
1632 		dp = (uint8_t *)m->m_data;
1633 		ASSERT(m->m_next == NULL);
1634 		VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1635 		    (uint32_t)mbuf_maxlen(m));
1636 		m->m_len += len;
1637 		m->m_pkthdr.len += len;
1638 		VERIFY(m->m_len == m->m_pkthdr.len &&
1639 		    (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1640 		if (__probable(copysum)) {
1641 			uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1642 			/*
1643 			 * Use pkt_copy() to copy the portion up to the
1644 			 * point where we need to start the checksum, and
1645 			 * copy the remainder, checksumming as we go.
1646 			 */
1647 			if (__probable(start != 0)) {
1648 				_pkt_copy(baddr, dp, start);
1649 			}
1650 			partial = _pkt_copyaddr_sum(ph, (poff + start),
1651 			    (dp + start), (len - start), TRUE, 0, NULL);
1652 			csum = __packet_fold_sum_final(partial);
1653 
1654 			/* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1655 			if (csum == 0 &&
1656 			    (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1657 				csum = 0xffff;
1658 			}
1659 
1660 			/* Insert checksum into packet */
1661 			ASSERT(stuff <= (len - sizeof(csum)));
1662 			if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1663 				*(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1664 			} else {
1665 				bcopy((void *)&csum, dp + stuff, sizeof(csum));
1666 			}
1667 		} else {
1668 			(void) _pkt_copyaddr_sum(ph, poff, dp, len, FALSE, 0, NULL);
1669 		}
1670 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1671 		m->m_pkthdr.csum_tx_start = 0;
1672 		m->m_pkthdr.csum_tx_stuff = 0;
1673 
1674 		/* translate packet metadata */
1675 		m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1676 		m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1677 		m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1678 		m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1679 		m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1680 		m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1681 		m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1682 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1683 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1684 		m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1685 		m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1686 		if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1687 			m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1688 		}
1689 		if ((pkt->pkt_pflags & PKT_F_L4S) != 0) {
1690 			m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_L4S;
1691 		}
1692 
1693 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1694 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1695 		    sk_proc_name_address(current_proc()),
1696 		    sk_proc_pid(current_proc()), len,
1697 		    (copysum ? (len - start) : 0), csum, start);
1698 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1699 		    "   pkt  0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1700 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1701 		    (uint32_t)pkt->pkt_csum_tx_start_off,
1702 		    (uint32_t)pkt->pkt_csum_tx_stuff_off);
1703 		break;
1704 
1705 	default:
1706 		VERIFY(0);
1707 		/* NOTREACHED */
1708 		__builtin_unreachable();
1709 	}
1710 
1711 	if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1712 		m->m_flags |= M_BCAST;
1713 	} else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1714 		m->m_flags |= M_MCAST;
1715 	}
1716 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1717 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1718 	    (t == NR_RX) ? "RX" : "TX",
1719 	    sk_dump("buf", (uint8_t *)dp, m->m_pkthdr.len, 128, NULL, 0));
1720 }
1721 
1722 /*
1723  * Like m_copydata(), but computes 16-bit sum as the data is copied.
1724  * Caller can provide an initial sum to be folded into the computed
1725  * sum.  The accumulated partial sum (32-bit) is returned to caller;
1726  * caller is responsible for further reducing it to 16-bit if needed,
1727  * as well as to perform the final 1's complement on it.
1728  */
1729 uint32_t
m_copydata_sum(struct mbuf * m,int off,int len,void * vp,uint32_t initial_sum,boolean_t * odd_start)1730 m_copydata_sum(struct mbuf *m, int off, int len, void *vp, uint32_t initial_sum,
1731     boolean_t *odd_start)
1732 {
1733 	boolean_t needs_swap, started_on_odd = FALSE;
1734 	int off0 = off, len0 = len;
1735 	struct mbuf *m0 = m;
1736 	uint64_t sum, partial;
1737 	unsigned count, odd;
1738 	char *cp = vp;
1739 
1740 	if (__improbable(off < 0 || len < 0)) {
1741 		panic("%s: invalid offset %d or len %d", __func__, off, len);
1742 		/* NOTREACHED */
1743 		__builtin_unreachable();
1744 	}
1745 
1746 	while (off > 0) {
1747 		if (__improbable(m == NULL)) {
1748 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
1749 			    __func__, m0, off0, len0);
1750 			/* NOTREACHED */
1751 			__builtin_unreachable();
1752 		}
1753 		if (off < m->m_len) {
1754 			break;
1755 		}
1756 		off -= m->m_len;
1757 		m = m->m_next;
1758 	}
1759 
1760 	if (odd_start) {
1761 		started_on_odd = *odd_start;
1762 	}
1763 	sum = initial_sum;
1764 
1765 	for (; len > 0; m = m->m_next) {
1766 		uint8_t *datap;
1767 
1768 		if (__improbable(m == NULL)) {
1769 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
1770 			    __func__, m0, off0, len0);
1771 			/* NOTREACHED */
1772 			__builtin_unreachable();
1773 		}
1774 
1775 		datap = mtod(m, uint8_t *) + off;
1776 		count = m->m_len;
1777 
1778 		if (__improbable(count == 0)) {
1779 			continue;
1780 		}
1781 
1782 		count = MIN(count - off, (unsigned)len);
1783 		partial = 0;
1784 
1785 		if ((uintptr_t)datap & 1) {
1786 			/* Align on word boundary */
1787 			started_on_odd = !started_on_odd;
1788 #if BYTE_ORDER == LITTLE_ENDIAN
1789 			partial = *datap << 8;
1790 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1791 			partial = *datap;
1792 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1793 			*cp++ = *datap++;
1794 			count -= 1;
1795 			len -= 1;
1796 		}
1797 
1798 		needs_swap = started_on_odd;
1799 		odd = count & 1u;
1800 		count -= odd;
1801 
1802 		if (count) {
1803 			partial = __packet_copy_and_sum(datap,
1804 			    cp, count, (uint32_t)partial);
1805 			datap += count;
1806 			cp += count;
1807 			len -= count;
1808 			if (__improbable((partial & (3ULL << 62)) != 0)) {
1809 				if (needs_swap) {
1810 					partial = (partial << 8) +
1811 					    (partial >> 56);
1812 				}
1813 				sum += (partial >> 32);
1814 				sum += (partial & 0xffffffff);
1815 				partial = 0;
1816 			}
1817 		}
1818 
1819 		if (odd) {
1820 #if BYTE_ORDER == LITTLE_ENDIAN
1821 			partial += *datap;
1822 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1823 			partial += *datap << 8;
1824 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1825 			*cp++ = *datap++;
1826 			len -= 1;
1827 			started_on_odd = !started_on_odd;
1828 		}
1829 		off = 0;
1830 
1831 		if (needs_swap) {
1832 			partial = (partial << 8) + (partial >> 24);
1833 		}
1834 		sum += (partial >> 32) + (partial & 0xffffffff);
1835 		/*
1836 		 * Reduce sum to allow potential byte swap
1837 		 * in the next iteration without carry.
1838 		 */
1839 		sum = (sum >> 32) + (sum & 0xffffffff);
1840 	}
1841 
1842 	if (odd_start) {
1843 		*odd_start = started_on_odd;
1844 	}
1845 
1846 	/* Final fold (reduce 64-bit to 32-bit) */
1847 	sum = (sum >> 32) + (sum & 0xffffffff); /* 33-bit */
1848 	sum = (sum >> 16) + (sum & 0xffff);     /* 17-bit + carry */
1849 
1850 	/* return 32-bit partial sum to caller */
1851 	return (uint32_t)sum;
1852 }
1853 
1854 #if DEBUG || DEVELOPMENT
1855 #define TRAILERS_MAX    16              /* max trailing bytes */
1856 #define TRAILERS_REGEN  (64 * 1024)     /* regeneration threshold */
1857 static uint8_t tb[TRAILERS_MAX];        /* random trailing bytes */
1858 static uint32_t regen = TRAILERS_REGEN; /* regeneration counter */
1859 
1860 uint32_t
pkt_add_trailers(kern_packet_t ph,const uint32_t len,const uint16_t start)1861 pkt_add_trailers(kern_packet_t ph, const uint32_t len, const uint16_t start)
1862 {
1863 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1864 	uint32_t extra;
1865 	uint8_t *baddr;
1866 
1867 	/* get buffer address from packet */
1868 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1869 	ASSERT(baddr != NULL);
1870 	ASSERT(len <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
1871 
1872 	extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
1873 	if (extra == 0 || extra > sizeof(tb) ||
1874 	    (len + extra) > PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp)) {
1875 		return 0;
1876 	}
1877 
1878 	/* generate random bytes once per TRAILERS_REGEN packets (approx.) */
1879 	if (regen++ == TRAILERS_REGEN) {
1880 		read_frandom(&tb[0], sizeof(tb));
1881 		regen = 0;
1882 	}
1883 
1884 	bcopy(&tb[0], (baddr + len), extra);
1885 
1886 	/* recompute partial sum (also to exercise related logic) */
1887 	pkt->pkt_csum_flags |= PACKET_CSUM_PARTIAL;
1888 	pkt->pkt_csum_rx_value = (uint16_t)__packet_cksum((baddr + start),
1889 	    ((len + extra) - start), 0);
1890 	pkt->pkt_csum_rx_start_off = start;
1891 
1892 	return extra;
1893 }
1894 
1895 uint32_t
pkt_add_trailers_mbuf(struct mbuf * m,const uint16_t start)1896 pkt_add_trailers_mbuf(struct mbuf *m, const uint16_t start)
1897 {
1898 	uint32_t extra;
1899 
1900 	extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
1901 	if (extra == 0 || extra > sizeof(tb)) {
1902 		return 0;
1903 	}
1904 
1905 	if (mbuf_copyback(m, m_pktlen(m), extra, &tb[0], M_NOWAIT) != 0) {
1906 		return 0;
1907 	}
1908 
1909 	/* generate random bytes once per TRAILERS_REGEN packets (approx.) */
1910 	if (regen++ == TRAILERS_REGEN) {
1911 		read_frandom(&tb[0], sizeof(tb));
1912 		regen = 0;
1913 	}
1914 
1915 	/* recompute partial sum (also to exercise related logic) */
1916 	m->m_pkthdr.csum_rx_val = m_sum16(m, start, (m_pktlen(m) - start));
1917 	m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1918 	m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
1919 	m->m_pkthdr.csum_rx_start = start;
1920 
1921 	return extra;
1922 }
1923 #endif /* DEBUG || DEVELOPMENT */
1924 
1925 void
pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint16_t len,uint32_t * partial,boolean_t do_csum)1926 pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
1927     uint16_t doff, uint16_t len, uint32_t *partial, boolean_t do_csum)
1928 {
1929 	VERIFY(_pkt_copypkt_sum(sph, soff, dph, doff, len, partial, do_csum));
1930 }
1931 
1932 uint32_t
pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)1933 pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
1934     uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
1935 {
1936 	return _pkt_copyaddr_sum(sph, soff, dbaddr, len, do_csum, initial_sum, odd_start);
1937 }
1938 
1939 uint32_t
pkt_mcopypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint16_t len,boolean_t do_cscum)1940 pkt_mcopypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
1941     uint16_t len, boolean_t do_cscum)
1942 {
1943 	return m_copypkt_sum(m, soff, dph, doff, len, do_cscum);
1944 }
1945 
1946 void
pkt_copy(void * src,void * dst,size_t len)1947 pkt_copy(void *src, void *dst, size_t len)
1948 {
1949 	return _pkt_copy(src, dst, len);
1950 }
1951