xref: /xnu-8792.61.2/bsd/skywalk/packet/packet_copy.c (revision 42e220869062b56f8d7d0726fd4c88954f87902c)
1 /*
2  * Copyright (c) 2017-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 #include <machine/endian.h>
31 #include <net/necp.h>
32 
33 #if (DEVELOPMENT || DEBUG)
34 SYSCTL_NODE(_kern_skywalk, OID_AUTO, packet,
35     CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Skywalk packet");
36 int pkt_trailers = 0; /* for testing trailing bytes */
37 SYSCTL_INT(_kern_skywalk_packet, OID_AUTO, trailers,
38     CTLFLAG_RW | CTLFLAG_LOCKED, &pkt_trailers, 0, "");
39 #endif /* !DEVELOPMENT && !DEBUG */
40 
41 
42 __attribute__((always_inline))
43 static inline void
_pkt_copy(void * src,void * dst,size_t len)44 _pkt_copy(void *src, void *dst, size_t len)
45 {
46 	if (__probable(IS_P2ALIGNED(src, 8) && IS_P2ALIGNED(dst, 8))) {
47 		switch (len) {
48 		case 20:        /* standard IPv4 header */
49 			sk_copy64_20(src, dst);
50 			return;
51 
52 		case 40:        /* IPv6 header */
53 			sk_copy64_40(src, dst);
54 			return;
55 
56 		default:
57 			if (IS_P2ALIGNED(len, 64)) {
58 				sk_copy64_64x(src, dst, len);
59 				return;
60 			} else if (IS_P2ALIGNED(len, 32)) {
61 				sk_copy64_32x(src, dst, len);
62 				return;
63 			} else if (IS_P2ALIGNED(len, 8)) {
64 				sk_copy64_8x(src, dst, len);
65 				return;
66 			} else if (IS_P2ALIGNED(len, 4)) {
67 				sk_copy64_4x(src, dst, len);
68 				return;
69 			}
70 			break;
71 		}
72 	}
73 	bcopy(src, dst, len);
74 }
75 
76 /*
77  * This routine is used for copying data across two kernel packets.
78  * Can also optionally compute 16-bit partial inet checksum as the
79  * data is copied.
80  * This routine is used by flowswitch while copying packet from vp
81  * adapter pool to packet in native netif pool and vice-a-versa.
82  *
83  * start/stuff is relative to soff, within [0, len], such that
84  * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
85  */
86 void
pkt_copy_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)87 pkt_copy_from_pkt(const enum txrx t, kern_packet_t dph, const uint16_t doff,
88     kern_packet_t sph, const uint16_t soff, const uint32_t len,
89     const boolean_t copysum, const uint16_t start, const uint16_t stuff,
90     const boolean_t invert)
91 {
92 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
93 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
94 	uint32_t partial;
95 	uint16_t csum = 0;
96 	uint8_t *sbaddr, *dbaddr;
97 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
98 
99 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
100 
101 	/* get buffer address from packet */
102 	MD_BUFLET_ADDR_ABS(spkt, sbaddr);
103 	ASSERT(sbaddr != NULL);
104 	sbaddr += soff;
105 	MD_BUFLET_ADDR_ABS(dpkt, dbaddr);
106 	ASSERT(dbaddr != NULL);
107 	dbaddr += doff;
108 	VERIFY((doff + len) <= PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
109 
110 	switch (t) {
111 	case NR_RX:
112 		dpkt->pkt_csum_flags = 0;
113 		if (__probable(do_sum)) {
114 			/*
115 			 * Use pkt_copy() to copy the portion up to the
116 			 * point where we need to start the checksum, and
117 			 * copy the remainder, checksumming as we go.
118 			 */
119 			if (__probable(start != 0)) {
120 				_pkt_copy(sbaddr, dbaddr, start);
121 			}
122 			partial = __packet_copy_and_sum((sbaddr + start),
123 			    (dbaddr + start), (len - start), 0);
124 			csum = __packet_fold_sum(partial);
125 
126 			__packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
127 			    start, csum, FALSE);
128 		} else {
129 			_pkt_copy(sbaddr, dbaddr, len);
130 			dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
131 			dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
132 			dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
133 		}
134 
135 		SK_DF(SK_VERB_COPY | SK_VERB_RX,
136 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
137 		    sk_proc_name_address(current_proc()),
138 		    sk_proc_pid(current_proc()), len,
139 		    (copysum ? (len - start) : 0), csum, start);
140 		SK_DF(SK_VERB_COPY | SK_VERB_RX,
141 		    "   pkt  0x%llx doff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
142 		    SK_KVA(dpkt), doff, dpkt->pkt_csum_flags,
143 		    (uint32_t)dpkt->pkt_csum_rx_start_off,
144 		    (uint32_t)dpkt->pkt_csum_rx_value);
145 		break;
146 
147 	case NR_TX:
148 		if (__probable(copysum)) {
149 			/*
150 			 * Use pkt_copy() to copy the portion up to the
151 			 * point where we need to start the checksum, and
152 			 * copy the remainder, checksumming as we go.
153 			 */
154 			if (__probable(start != 0)) {
155 				_pkt_copy(sbaddr, dbaddr, start);
156 			}
157 			partial = __packet_copy_and_sum((sbaddr + start),
158 			    (dbaddr + start), (len - start), 0);
159 			csum = __packet_fold_sum_final(partial);
160 
161 			/* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
162 			if (csum == 0 && invert) {
163 				csum = 0xffff;
164 			}
165 
166 			/* Insert checksum into packet */
167 			ASSERT(stuff <= (len - sizeof(csum)));
168 			if (IS_P2ALIGNED(dbaddr + stuff, sizeof(csum))) {
169 				*(uint16_t *)(uintptr_t)(dbaddr + stuff) = csum;
170 			} else {
171 				bcopy((void *)&csum, dbaddr + stuff,
172 				    sizeof(csum));
173 			}
174 		} else {
175 			_pkt_copy(sbaddr, dbaddr, len);
176 		}
177 		dpkt->pkt_csum_flags = spkt->pkt_csum_flags & (PACKET_CSUM_TSO_FLAGS);
178 		dpkt->pkt_csum_tx_start_off = 0;
179 		dpkt->pkt_csum_tx_stuff_off = 0;
180 
181 		SK_DF(SK_VERB_COPY | SK_VERB_TX,
182 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
183 		    sk_proc_name_address(current_proc()),
184 		    sk_proc_pid(current_proc()), len,
185 		    (copysum ? (len - start) : 0), csum, start);
186 		break;
187 
188 	default:
189 		VERIFY(0);
190 		/* NOTREACHED */
191 		__builtin_unreachable();
192 	}
193 	METADATA_ADJUST_LEN(dpkt, len, doff);
194 
195 	SK_DF(SK_VERB_COPY | SK_VERB_DUMP, "%s(%d) %s %s",
196 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
197 	    (t == NR_RX) ? "RX" : "TX",
198 	    sk_dump("buf", dbaddr, len, 128, NULL, 0));
199 }
200 
201 /*
202  * NOTE: soff is the offset within the packet
203  * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
204  * caller is responsible for further reducing it to 16-bit if needed,
205  * as well as to perform the final 1's complement on it.
206  */
207 uint32_t static inline
_pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)208 _pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
209     uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
210 {
211 	uint8_t odd = 0;
212 	uint8_t *sbaddr = NULL;
213 	uint32_t sum = initial_sum, partial;
214 	uint32_t len0 = len;
215 	boolean_t needs_swap, started_on_odd = FALSE;
216 	uint16_t clen, sboff, sblen, sbcnt, off0 = soff;
217 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
218 	kern_buflet_t sbuf = NULL, sbufp = NULL;
219 
220 	sbcnt = __packet_get_buflet_count(sph);
221 
222 	if (odd_start) {
223 		started_on_odd = *odd_start;
224 	}
225 
226 	/* fastpath (copy+sum, single buflet, even aligned, even length) */
227 	if (do_csum && sbcnt == 1 && len != 0) {
228 		PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
229 		ASSERT(sbuf != NULL);
230 		sboff = __buflet_get_data_offset(sbuf);
231 		sblen = __buflet_get_data_length(sbuf);
232 		ASSERT(sboff <= soff);
233 		ASSERT(soff < sboff + sblen);
234 		sblen -= (soff - sboff);
235 		sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
236 
237 		clen = (uint16_t)MIN(len, sblen);
238 
239 		if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
240 			sum = __packet_copy_and_sum(sbaddr, dbaddr, clen, sum);
241 			return __packet_fold_sum(sum);
242 		}
243 
244 		sbaddr = NULL;
245 		sbuf = sbufp = NULL;
246 	}
247 
248 	while (len != 0) {
249 		PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
250 		if (__improbable(sbuf == NULL)) {
251 			panic("%s: bad packet, 0x%llx [off %d, len %d]",
252 			    __func__, SK_KVA(spkt), off0, len0);
253 			/* NOTREACHED */
254 			__builtin_unreachable();
255 		}
256 		sbufp = sbuf;
257 		sboff = __buflet_get_data_offset(sbuf);
258 		sblen = __buflet_get_data_length(sbuf);
259 		ASSERT((sboff <= soff) && (soff < sboff + sblen));
260 		sblen -= (soff - sboff);
261 		sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
262 		soff = 0;
263 		clen = (uint16_t)MIN(len, sblen);
264 		if (__probable(do_csum)) {
265 			partial = 0;
266 			if (__improbable((uintptr_t)sbaddr & 1)) {
267 				/* Align on word boundary */
268 				started_on_odd = !started_on_odd;
269 #if BYTE_ORDER == LITTLE_ENDIAN
270 				partial = (uint8_t)*sbaddr << 8;
271 #else /* BYTE_ORDER != LITTLE_ENDIAN */
272 				partial = (uint8_t)*sbaddr;
273 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
274 				*dbaddr++ = *sbaddr++;
275 				sblen -= 1;
276 				clen -= 1;
277 				len -= 1;
278 			}
279 			needs_swap = started_on_odd;
280 
281 			odd = clen & 1u;
282 			clen -= odd;
283 
284 			if (clen != 0) {
285 				partial = __packet_copy_and_sum(sbaddr, dbaddr,
286 				    clen, partial);
287 			}
288 
289 			if (__improbable(partial & 0xc0000000)) {
290 				if (needs_swap) {
291 					partial = (partial << 8) +
292 					    (partial >> 24);
293 				}
294 				sum += (partial >> 16);
295 				sum += (partial & 0xffff);
296 				partial = 0;
297 			}
298 		} else {
299 			_pkt_copy(sbaddr, dbaddr, clen);
300 		}
301 
302 		dbaddr += clen;
303 		sbaddr += clen;
304 
305 		if (__probable(do_csum)) {
306 			if (odd != 0) {
307 #if BYTE_ORDER == LITTLE_ENDIAN
308 				partial += (uint8_t)*sbaddr;
309 #else /* BYTE_ORDER != LITTLE_ENDIAN */
310 				partial += (uint8_t)*sbaddr << 8;
311 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
312 				*dbaddr++ = *sbaddr++;
313 				started_on_odd = !started_on_odd;
314 			}
315 
316 			if (needs_swap) {
317 				partial = (partial << 8) + (partial >> 24);
318 			}
319 			sum += (partial >> 16) + (partial & 0xffff);
320 			/*
321 			 * Reduce sum to allow potential byte swap
322 			 * in the next iteration without carry.
323 			 */
324 			sum = (sum >> 16) + (sum & 0xffff);
325 		}
326 
327 		sblen -= clen + odd;
328 		len -= clen + odd;
329 		ASSERT(sblen == 0 || len == 0);
330 	}
331 
332 	if (odd_start) {
333 		*odd_start = started_on_odd;
334 	}
335 
336 	if (__probable(do_csum)) {
337 		/* Final fold (reduce 32-bit to 16-bit) */
338 		sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
339 		sum = (sum >> 16) + (sum & 0xffff);
340 	}
341 	return sum;
342 }
343 
344 /*
345  * NOTE: Caller of this function is responsible to adjust the length and offset
346  * of the first buflet of the destination packet if (doff != 0),
347  * i.e. additional data is being prependend to the packet.
348  * It should also finalize the packet.
349  * To simplify & optimize the routine, we have also assumed that soff & doff
350  * will lie within the first buffer, which is true for the current use cases
351  * where, doff is the offset of the checksum field in the TCP/IP header and
352  * soff is the L3 offset.
353  * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
354  * caller is responsible for further reducing it to 16-bit if needed,
355  * as well as to perform the final 1's complement on it.
356  */
357 static inline boolean_t
_pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint32_t len,uint32_t * csum_partial,boolean_t do_csum)358 _pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
359     uint16_t doff, uint32_t len, uint32_t *csum_partial, boolean_t do_csum)
360 {
361 	uint8_t odd = 0;
362 	uint32_t sum = 0, partial;
363 	boolean_t needs_swap, started_on_odd = FALSE;
364 	uint8_t *sbaddr = NULL, *dbaddr = NULL;
365 	uint16_t clen, sblen, dlen0, dlim, sbcnt, dbcnt, sboff;
366 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
367 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
368 	kern_buflet_t sbuf = NULL, sbufp = NULL, dbuf = NULL, dbufp = NULL;
369 
370 	ASSERT(csum_partial != NULL || !do_csum);
371 	sbcnt = __packet_get_buflet_count(sph);
372 	dbcnt = __packet_get_buflet_count(dph);
373 
374 	while (len != 0) {
375 		ASSERT(sbaddr == NULL || dbaddr == NULL);
376 		if (sbaddr == NULL) {
377 			PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
378 			if (__improbable(sbuf == NULL)) {
379 				break;
380 			}
381 			sbufp = sbuf;
382 			sblen = __buflet_get_data_length(sbuf);
383 			sboff = __buflet_get_data_offset(sbuf);
384 			ASSERT(soff >= sboff);
385 			ASSERT(sboff + sblen > soff);
386 			sblen -= (soff - sboff);
387 			sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
388 			soff = 0;
389 		}
390 
391 		if (dbaddr == NULL) {
392 			if (dbufp != NULL) {
393 				__buflet_set_data_length(dbufp, dlen0);
394 			}
395 
396 			PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
397 			if (__improbable(dbuf == NULL)) {
398 				break;
399 			}
400 			dbufp = dbuf;
401 			dlim = __buflet_get_data_limit(dbuf);
402 			ASSERT(dlim > doff);
403 			dlim -= doff;
404 			if (doff != 0) {
405 				VERIFY(__buflet_set_data_offset(dbuf, doff) == 0);
406 			}
407 			dbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(dbuf) + doff);
408 			dlen0 = dlim;
409 			doff = 0;
410 		}
411 
412 		clen = (uint16_t)MIN(len, sblen);
413 		clen = MIN(clen, dlim);
414 
415 		if (__probable(do_csum)) {
416 			partial = 0;
417 			if (__improbable((uintptr_t)sbaddr & 1)) {
418 				/* Align on word boundary */
419 				started_on_odd = !started_on_odd;
420 #if BYTE_ORDER == LITTLE_ENDIAN
421 				partial = (uint8_t)*sbaddr << 8;
422 #else /* BYTE_ORDER != LITTLE_ENDIAN */
423 				partial = (uint8_t)*sbaddr;
424 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
425 				*dbaddr++ = *sbaddr++;
426 				clen -= 1;
427 				dlim -= 1;
428 				len -= 1;
429 			}
430 			needs_swap = started_on_odd;
431 
432 			odd = clen & 1u;
433 			clen -= odd;
434 
435 			if (clen != 0) {
436 				partial = __packet_copy_and_sum(sbaddr, dbaddr,
437 				    clen, partial);
438 			}
439 
440 			if (__improbable(partial & 0xc0000000)) {
441 				if (needs_swap) {
442 					partial = (partial << 8) +
443 					    (partial >> 24);
444 				}
445 				sum += (partial >> 16);
446 				sum += (partial & 0xffff);
447 				partial = 0;
448 			}
449 		} else {
450 			_pkt_copy(sbaddr, dbaddr, clen);
451 		}
452 		sbaddr += clen;
453 		dbaddr += clen;
454 
455 		if (__probable(do_csum)) {
456 			if (odd != 0) {
457 #if BYTE_ORDER == LITTLE_ENDIAN
458 				partial += (uint8_t)*sbaddr;
459 #else /* BYTE_ORDER != LITTLE_ENDIAN */
460 				partial += (uint8_t)*sbaddr << 8;
461 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
462 				*dbaddr++ = *sbaddr++;
463 				started_on_odd = !started_on_odd;
464 			}
465 
466 			if (needs_swap) {
467 				partial = (partial << 8) + (partial >> 24);
468 			}
469 			sum += (partial >> 16) + (partial & 0xffff);
470 			/*
471 			 * Reduce sum to allow potential byte swap
472 			 * in the next iteration without carry.
473 			 */
474 			sum = (sum >> 16) + (sum & 0xffff);
475 		}
476 
477 		sblen -= clen + odd;
478 		dlim -= clen + odd;
479 		len -= clen + odd;
480 
481 		if (sblen == 0) {
482 			sbaddr = NULL;
483 		}
484 
485 		if (dlim == 0) {
486 			dbaddr = NULL;
487 		}
488 	}
489 
490 	if (__probable(dbuf != NULL)) {
491 		__buflet_set_data_length(dbuf, (dlen0 - dlim));
492 	}
493 	if (__probable(do_csum)) {
494 		/* Final fold (reduce 32-bit to 16-bit) */
495 		sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
496 		sum = (sum >> 16) + (sum & 0xffff);
497 		*csum_partial = (uint32_t)sum;
498 	}
499 	return len == 0;
500 }
501 
502 uint32_t
pkt_sum(kern_packet_t sph,uint16_t soff,uint16_t len)503 pkt_sum(kern_packet_t sph, uint16_t soff, uint16_t len)
504 {
505 	uint8_t odd = 0;
506 	uint32_t sum = 0, partial;
507 	boolean_t needs_swap, started_on_odd = FALSE;
508 	uint8_t *sbaddr = NULL;
509 	uint16_t clen, sblen, sbcnt, sboff;
510 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
511 	kern_buflet_t sbuf = NULL, sbufp = NULL;
512 
513 	sbcnt = __packet_get_buflet_count(sph);
514 
515 	/* fastpath (single buflet, even aligned, even length) */
516 	if (sbcnt == 1 && len != 0) {
517 		PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
518 		ASSERT(sbuf != NULL);
519 		sblen = __buflet_get_data_length(sbuf);
520 		sboff = __buflet_get_data_offset(sbuf);
521 		ASSERT(soff >= sboff);
522 		ASSERT(sboff + sblen > soff);
523 		sblen -= (soff - sboff);
524 		sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
525 
526 		clen = MIN(len, sblen);
527 
528 		if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
529 			sum = __packet_cksum(sbaddr, clen, 0);
530 			return __packet_fold_sum(sum);
531 		}
532 
533 		sbaddr = NULL;
534 		sbuf = sbufp = NULL;
535 	}
536 
537 	/* slowpath */
538 	while (len != 0) {
539 		ASSERT(sbaddr == NULL);
540 		if (sbaddr == NULL) {
541 			PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
542 			if (__improbable(sbuf == NULL)) {
543 				break;
544 			}
545 			sbufp = sbuf;
546 			sblen = __buflet_get_data_length(sbuf);
547 			sboff = __buflet_get_data_offset(sbuf);
548 			ASSERT(soff >= sboff);
549 			ASSERT(sboff + sblen > soff);
550 			sblen -= (soff - sboff);
551 			sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
552 			soff = 0;
553 		}
554 
555 		clen = MIN(len, sblen);
556 
557 		partial = 0;
558 		if (__improbable((uintptr_t)sbaddr & 1)) {
559 			/* Align on word boundary */
560 			started_on_odd = !started_on_odd;
561 #if BYTE_ORDER == LITTLE_ENDIAN
562 			partial = (uint8_t)*sbaddr << 8;
563 #else /* BYTE_ORDER != LITTLE_ENDIAN */
564 			partial = (uint8_t)*sbaddr;
565 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
566 			clen -= 1;
567 			len -= 1;
568 		}
569 		needs_swap = started_on_odd;
570 
571 		odd = clen & 1u;
572 		clen -= odd;
573 
574 		if (clen != 0) {
575 			partial = __packet_cksum(sbaddr,
576 			    clen, partial);
577 		}
578 
579 		if (__improbable(partial & 0xc0000000)) {
580 			if (needs_swap) {
581 				partial = (partial << 8) +
582 				    (partial >> 24);
583 			}
584 			sum += (partial >> 16);
585 			sum += (partial & 0xffff);
586 			partial = 0;
587 		}
588 		sbaddr += clen;
589 
590 		if (odd != 0) {
591 #if BYTE_ORDER == LITTLE_ENDIAN
592 			partial += (uint8_t)*sbaddr;
593 #else /* BYTE_ORDER != LITTLE_ENDIAN */
594 			partial += (uint8_t)*sbaddr << 8;
595 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
596 			started_on_odd = !started_on_odd;
597 		}
598 
599 		if (needs_swap) {
600 			partial = (partial << 8) + (partial >> 24);
601 		}
602 		sum += (partial >> 16) + (partial & 0xffff);
603 		/*
604 		 * Reduce sum to allow potential byte swap
605 		 * in the next iteration without carry.
606 		 */
607 		sum = (sum >> 16) + (sum & 0xffff);
608 
609 		sblen -= clen + odd;
610 		len -= clen + odd;
611 
612 		if (sblen == 0) {
613 			sbaddr = NULL;
614 		}
615 	}
616 
617 	/* Final fold (reduce 32-bit to 16-bit) */
618 	sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
619 	sum = (sum >> 16) + (sum & 0xffff);
620 	return (uint32_t)sum;
621 }
622 
623 
624 /*
625  * This is a multi-buflet variant of pkt_copy_from_pkt().
626  *
627  * start/stuff is relative to soff, within [0, len], such that
628  * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
629  */
630 void
pkt_copy_multi_buflet_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)631 pkt_copy_multi_buflet_from_pkt(const enum txrx t, kern_packet_t dph,
632     const uint16_t doff, kern_packet_t sph, const uint16_t soff,
633     const uint32_t len, const boolean_t copysum, const uint16_t start,
634     const uint16_t stuff, const boolean_t invert)
635 {
636 	boolean_t rc;
637 	uint32_t partial;
638 	uint16_t csum = 0;
639 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
640 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
641 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
642 
643 	VERIFY((doff + len) <= (PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp) *
644 	    __packet_get_buflet_count(dph)));
645 
646 	switch (t) {
647 	case NR_RX:
648 		dpkt->pkt_csum_flags = 0;
649 		if (__probable(do_sum)) {
650 			/*
651 			 * copy the portion up to the point where we need to
652 			 * start the checksum, and copy the remainder,
653 			 * checksumming as we go.
654 			 */
655 			if (__probable(start != 0)) {
656 				rc = _pkt_copypkt_sum(sph, soff, dph, doff,
657 				    start, NULL, FALSE);
658 				ASSERT(rc);
659 			}
660 			_pkt_copypkt_sum(sph, (soff + start), dph,
661 			    (doff + start), (len - start), &partial, TRUE);
662 			csum = __packet_fold_sum(partial);
663 			__packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
664 			    start, csum, FALSE);
665 			METADATA_ADJUST_LEN(dpkt, start, doff);
666 		} else {
667 			rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
668 			    FALSE);
669 			ASSERT(rc);
670 			dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
671 			dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
672 			dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
673 		}
674 		break;
675 
676 	case NR_TX:
677 		if (__probable(copysum)) {
678 			uint8_t *baddr;
679 			/*
680 			 * copy the portion up to the point where we need to
681 			 * start the checksum, and copy the remainder,
682 			 * checksumming as we go.
683 			 */
684 			if (__probable(start != 0)) {
685 				rc = _pkt_copypkt_sum(sph, soff, dph, doff,
686 				    start, NULL, FALSE);
687 				ASSERT(rc);
688 			}
689 			rc = _pkt_copypkt_sum(sph, (soff + start), dph,
690 			    (doff + start), (len - start), &partial, TRUE);
691 			ASSERT(rc);
692 			csum = __packet_fold_sum_final(partial);
693 
694 			/* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
695 			if (csum == 0 && invert) {
696 				csum = 0xffff;
697 			}
698 
699 			/*
700 			 * Insert checksum into packet.
701 			 * Here we assume that checksum will be in the
702 			 * first buffer.
703 			 */
704 			ASSERT((stuff + doff + sizeof(csum)) <=
705 			    PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
706 			ASSERT(stuff <= (len - sizeof(csum)));
707 
708 			/* get first buflet buffer address from packet */
709 			MD_BUFLET_ADDR_ABS(dpkt, baddr);
710 			ASSERT(baddr != NULL);
711 			baddr += doff;
712 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
713 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
714 			} else {
715 				bcopy((void *)&csum, baddr + stuff,
716 				    sizeof(csum));
717 			}
718 			METADATA_ADJUST_LEN(dpkt, start, doff);
719 		} else {
720 			rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
721 			    FALSE);
722 			ASSERT(rc);
723 		}
724 		dpkt->pkt_csum_flags = spkt->pkt_csum_flags & (PACKET_CSUM_TSO_FLAGS);
725 		dpkt->pkt_csum_tx_start_off = 0;
726 		dpkt->pkt_csum_tx_stuff_off = 0;
727 		break;
728 
729 	default:
730 		VERIFY(0);
731 		/* NOTREACHED */
732 		__builtin_unreachable();
733 	}
734 }
735 
736 /*
737  * This routine is used for copying an mbuf which originated in the host
738  * stack destined to a native skywalk interface (NR_TX), as well as for
739  * mbufs originating on compat network interfaces (NR_RX).
740  *
741  * start/stuff is relative to moff, within [0, len], such that
742  * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
743  */
744 void
pkt_copy_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)745 pkt_copy_from_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
746     struct mbuf *m, const uint16_t moff, const uint32_t len,
747     const boolean_t copysum, const uint16_t start)
748 {
749 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
750 	uint32_t partial;
751 	uint16_t csum = 0;
752 	uint8_t *baddr;
753 
754 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
755 
756 	/* get buffer address from packet */
757 	MD_BUFLET_ADDR_ABS(pkt, baddr);
758 	ASSERT(baddr != NULL);
759 	baddr += poff;
760 	VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
761 
762 	switch (t) {
763 	case NR_RX:
764 		pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
765 		pkt->pkt_csum_rx_start_off = 0;
766 		pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
767 		pkt->pkt_svc_class = m_get_service_class(m);
768 		if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
769 		    != CSUM_RX_FULL_FLAGS) && copysum)) {
770 			/*
771 			 * Use m_copydata() to copy the portion up to the
772 			 * point where we need to start the checksum, and
773 			 * copy the remainder, checksumming as we go.
774 			 */
775 			if (start != 0) {
776 				m_copydata(m, moff, start, baddr);
777 			}
778 			partial = m_copydata_sum(m, start, (len - start),
779 			    (baddr + start), 0, NULL);
780 			csum = __packet_fold_sum(partial);
781 
782 			__packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
783 			    start, csum, FALSE);
784 		} else {
785 			m_copydata(m, moff, len, baddr);
786 		}
787 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
788 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
789 		    sk_proc_name_address(current_proc()),
790 		    sk_proc_pid(current_proc()), len,
791 		    (copysum ? (len - start) : 0), csum, start);
792 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
793 		    "   mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
794 		    SK_KVA(m), m->m_pkthdr.csum_flags,
795 		    (uint32_t)m->m_pkthdr.csum_rx_start,
796 		    (uint32_t)m->m_pkthdr.csum_rx_val);
797 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
798 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
799 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
800 		    (uint32_t)pkt->pkt_csum_rx_start_off,
801 		    (uint32_t)pkt->pkt_csum_rx_value);
802 		break;
803 
804 	case NR_TX:
805 		if (__probable(copysum)) {
806 			uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
807 			/*
808 			 * Use m_copydata() to copy the portion up to the
809 			 * point where we need to start the checksum, and
810 			 * copy the remainder, checksumming as we go.
811 			 */
812 			if (start != 0) {
813 				m_copydata(m, moff, start, baddr);
814 			}
815 			partial = m_copydata_sum(m, start, (len - start),
816 			    (baddr + start), 0, NULL);
817 			csum = __packet_fold_sum_final(partial);
818 
819 			/*
820 			 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
821 			 * ideally we'd only test for CSUM_ZERO_INVERT
822 			 * here, but catch cases where the originator
823 			 * did not set it for UDP.
824 			 */
825 			if (csum == 0 && (m->m_pkthdr.csum_flags &
826 			    (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
827 				csum = 0xffff;
828 			}
829 
830 			/* Insert checksum into packet */
831 			ASSERT(stuff <= (len - sizeof(csum)));
832 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
833 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
834 			} else {
835 				bcopy((void *)&csum, baddr + stuff,
836 				    sizeof(csum));
837 			}
838 		} else {
839 			m_copydata(m, moff, len, baddr);
840 		}
841 		pkt->pkt_csum_flags = 0;
842 		pkt->pkt_csum_tx_start_off = 0;
843 		pkt->pkt_csum_tx_stuff_off = 0;
844 
845 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
846 			pkt->pkt_csum_flags |= PACKET_TSO_IPV4;
847 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
848 		}
849 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
850 			pkt->pkt_csum_flags |= PACKET_TSO_IPV6;
851 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
852 		}
853 
854 		/* translate mbuf metadata */
855 		pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
856 		pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
857 		pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
858 		pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
859 		pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
860 		switch (m->m_pkthdr.pkt_proto) {
861 		case IPPROTO_QUIC:
862 			pkt->pkt_flow_ip_proto = IPPROTO_UDP;
863 			pkt->pkt_transport_protocol = IPPROTO_QUIC;
864 			break;
865 
866 		default:
867 			pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
868 			pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
869 			break;
870 		}
871 		(void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
872 		pkt->pkt_svc_class = m_get_service_class(m);
873 		pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
874 		pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
875 		if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
876 			pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
877 		}
878 		if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) != 0) {
879 			pkt->pkt_pflags |= PKT_F_L4S;
880 		}
881 		necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
882 		pkt->pkt_policy_id =
883 		    (uint32_t)necp_get_policy_id_from_packet(m);
884 
885 		if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
886 			if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
887 				__packet_set_tx_completion_data(ph,
888 				    m->m_pkthdr.drv_tx_compl_arg,
889 				    m->m_pkthdr.drv_tx_compl_data);
890 			}
891 			pkt->pkt_tx_compl_context =
892 			    m->m_pkthdr.pkt_compl_context;
893 			pkt->pkt_tx_compl_callbacks =
894 			    m->m_pkthdr.pkt_compl_callbacks;
895 			/*
896 			 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
897 			 * mbuf can no longer trigger a completion callback.
898 			 * callback will be invoked when the kernel packet is
899 			 * completed.
900 			 */
901 			m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
902 
903 			m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
904 		}
905 
906 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
907 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
908 		    sk_proc_name_address(current_proc()),
909 		    sk_proc_pid(current_proc()), len,
910 		    (copysum ? (len - start) : 0), csum, start);
911 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
912 		    "   mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
913 		    SK_KVA(m), m->m_pkthdr.csum_flags,
914 		    (uint32_t)m->m_pkthdr.csum_tx_start,
915 		    (uint32_t)m->m_pkthdr.csum_tx_stuff);
916 		break;
917 
918 	default:
919 		VERIFY(0);
920 		/* NOTREACHED */
921 		__builtin_unreachable();
922 	}
923 	METADATA_ADJUST_LEN(pkt, len, poff);
924 
925 	if (m->m_flags & M_BCAST) {
926 		__packet_set_link_broadcast(ph);
927 	} else if (m->m_flags & M_MCAST) {
928 		__packet_set_link_multicast(ph);
929 	}
930 
931 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
932 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
933 	    (t == NR_RX) ? "RX" : "TX",
934 	    sk_dump("buf", baddr, len, 128, NULL, 0));
935 }
936 
937 /*
938  * Like m_copydata_sum(), but works on a destination kernel packet.
939  */
940 static inline uint32_t
m_copypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint32_t len,boolean_t do_cscum)941 m_copypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
942     uint32_t len, boolean_t do_cscum)
943 {
944 	boolean_t needs_swap, started_on_odd = FALSE;
945 	int off0 = soff;
946 	uint32_t len0 = len;
947 	struct mbuf *m0 = m;
948 	uint32_t sum = 0, partial;
949 	unsigned count0, count, odd, mlen_copied;
950 	uint8_t *sbaddr = NULL, *dbaddr = NULL;
951 	uint16_t dlen0, dlim, dbcnt = __packet_get_buflet_count(dph);
952 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
953 	kern_buflet_t dbuf = NULL, dbufp = NULL;
954 
955 	while (soff > 0) {
956 		if (__improbable(m == NULL)) {
957 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
958 			    __func__, m0, off0, len0);
959 			/* NOTREACHED */
960 			__builtin_unreachable();
961 		}
962 		if (soff < m->m_len) {
963 			break;
964 		}
965 		soff -= m->m_len;
966 		m = m->m_next;
967 	}
968 
969 	if (__improbable(m == NULL)) {
970 		panic("%s: invalid mbuf chain %p [off %d, len %d]",
971 		    __func__, m0, off0, len0);
972 		/* NOTREACHED */
973 		__builtin_unreachable();
974 	}
975 
976 	sbaddr = mtod(m, uint8_t *) + soff;
977 	count = m->m_len - soff;
978 	mlen_copied = 0;
979 
980 	while (len != 0) {
981 		ASSERT(sbaddr == NULL || dbaddr == NULL);
982 		if (sbaddr == NULL) {
983 			soff = 0;
984 			m = m->m_next;
985 			if (__improbable(m == NULL)) {
986 				panic("%s: invalid mbuf chain %p [off %d, "
987 				    "len %d]", __func__, m0, off0, len0);
988 				/* NOTREACHED */
989 				__builtin_unreachable();
990 			}
991 			sbaddr = mtod(m, uint8_t *);
992 			count = m->m_len;
993 			mlen_copied = 0;
994 		}
995 
996 		if (__improbable(count == 0)) {
997 			sbaddr = NULL;
998 			continue;
999 		}
1000 
1001 		if (dbaddr == NULL) {
1002 			if (dbufp != NULL) {
1003 				__buflet_set_data_length(dbufp, dlen0);
1004 			}
1005 
1006 			PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
1007 			if (__improbable(dbuf == NULL)) {
1008 				panic("%s: mbuf too large %p [off %d, "
1009 				    "len %d]", __func__, m0, off0, len0);
1010 				/* NOTREACHED */
1011 				__builtin_unreachable();
1012 			}
1013 			dbufp = dbuf;
1014 			dlim = __buflet_get_data_limit(dbuf) - doff;
1015 			dbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(dbuf) + doff);
1016 			dlen0 = dlim;
1017 			doff = 0;
1018 		}
1019 
1020 		count = MIN(count, (unsigned)len);
1021 		count0 = count = MIN(count, dlim);
1022 
1023 		if (!do_cscum) {
1024 			_pkt_copy(sbaddr, dbaddr, count);
1025 			sbaddr += count;
1026 			dbaddr += count;
1027 			goto skip_csum;
1028 		}
1029 
1030 		partial = 0;
1031 		if ((uintptr_t)sbaddr & 1) {
1032 			/* Align on word boundary */
1033 			started_on_odd = !started_on_odd;
1034 #if BYTE_ORDER == LITTLE_ENDIAN
1035 			partial = *sbaddr << 8;
1036 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1037 			partial = *sbaddr;
1038 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1039 			*dbaddr++ = *sbaddr++;
1040 			count -= 1;
1041 		}
1042 
1043 		needs_swap = started_on_odd;
1044 		odd = count & 1u;
1045 		count -= odd;
1046 
1047 		if (count) {
1048 			partial = __packet_copy_and_sum(sbaddr,
1049 			    dbaddr, count, partial);
1050 			sbaddr += count;
1051 			dbaddr += count;
1052 			if (__improbable(partial & 0xc0000000)) {
1053 				if (needs_swap) {
1054 					partial = (partial << 8) +
1055 					    (partial >> 24);
1056 				}
1057 				sum += (partial >> 16);
1058 				sum += (partial & 0xffff);
1059 				partial = 0;
1060 			}
1061 		}
1062 
1063 		if (odd) {
1064 #if BYTE_ORDER == LITTLE_ENDIAN
1065 			partial += *sbaddr;
1066 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1067 			partial += *sbaddr << 8;
1068 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1069 			*dbaddr++ = *sbaddr++;
1070 			started_on_odd = !started_on_odd;
1071 		}
1072 
1073 		if (needs_swap) {
1074 			partial = (partial << 8) + (partial >> 24);
1075 		}
1076 		sum += (partial >> 16) + (partial & 0xffff);
1077 		/*
1078 		 * Reduce sum to allow potential byte swap
1079 		 * in the next iteration without carry.
1080 		 */
1081 		sum = (sum >> 16) + (sum & 0xffff);
1082 
1083 skip_csum:
1084 		dlim -= count0;
1085 		len -= count0;
1086 		mlen_copied += count0;
1087 
1088 		if (dlim == 0) {
1089 			dbaddr = NULL;
1090 		}
1091 
1092 		count = m->m_len - soff - mlen_copied;
1093 		if (count == 0) {
1094 			sbaddr = NULL;
1095 		}
1096 	}
1097 
1098 	ASSERT(len == 0);
1099 	ASSERT(dbuf != NULL);
1100 	__buflet_set_data_length(dbuf, (dlen0 - dlim));
1101 
1102 	if (!do_cscum) {
1103 		return 0;
1104 	}
1105 
1106 	/* Final fold (reduce 32-bit to 16-bit) */
1107 	sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
1108 	sum = (sum >> 16) + (sum & 0xffff);
1109 	return sum;
1110 }
1111 
1112 /*
1113  * This is a multi-buflet variant of pkt_copy_from_mbuf().
1114  *
1115  * start/stuff is relative to moff, within [0, len], such that
1116  * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
1117  */
1118 void
pkt_copy_multi_buflet_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1119 pkt_copy_multi_buflet_from_mbuf(const enum txrx t, kern_packet_t ph,
1120     const uint16_t poff, struct mbuf *m, const uint16_t moff,
1121     const uint32_t len, const boolean_t copysum, const uint16_t start)
1122 {
1123 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1124 	uint32_t partial;
1125 	uint16_t csum = 0;
1126 	uint8_t *baddr;
1127 
1128 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
1129 
1130 	/* get buffer address from packet */
1131 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1132 	ASSERT(baddr != NULL);
1133 	baddr += poff;
1134 	VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp) *
1135 	    __packet_get_buflet_count(ph)));
1136 
1137 	switch (t) {
1138 	case NR_RX:
1139 		pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
1140 		pkt->pkt_csum_rx_start_off = 0;
1141 		pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
1142 		pkt->pkt_svc_class = m_get_service_class(m);
1143 		if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
1144 		    != CSUM_RX_FULL_FLAGS) && copysum)) {
1145 			/*
1146 			 * Use m_copydata() to copy the portion up to the
1147 			 * point where we need to start the checksum, and
1148 			 * copy the remainder, checksumming as we go.
1149 			 */
1150 			if (start != 0) {
1151 				m_copydata(m, moff, start, baddr);
1152 			}
1153 			partial = m_copypkt_sum(m, start, ph, (poff + start),
1154 			    (len - start), TRUE);
1155 			csum = __packet_fold_sum(partial);
1156 			__packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
1157 			    start, csum, FALSE);
1158 			METADATA_ADJUST_LEN(pkt, start, poff);
1159 		} else {
1160 			(void) m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1161 		}
1162 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1163 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1164 		    sk_proc_name_address(current_proc()),
1165 		    sk_proc_pid(current_proc()), len,
1166 		    (copysum ? (len - start) : 0), csum, start);
1167 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1168 		    "   mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
1169 		    SK_KVA(m), m->m_pkthdr.csum_flags,
1170 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1171 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1172 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1173 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1174 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1175 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1176 		    (uint32_t)pkt->pkt_csum_rx_value);
1177 		break;
1178 
1179 	case NR_TX:
1180 		if (__probable(copysum)) {
1181 			uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
1182 			/*
1183 			 * Use m_copydata() to copy the portion up to the
1184 			 * point where we need to start the checksum, and
1185 			 * copy the remainder, checksumming as we go.
1186 			 */
1187 			if (start != 0) {
1188 				m_copydata(m, moff, start, baddr);
1189 			}
1190 			partial = m_copypkt_sum(m, start, ph, (poff + start),
1191 			    (len - start), TRUE);
1192 			csum = __packet_fold_sum_final(partial);
1193 
1194 			/*
1195 			 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
1196 			 * ideally we'd only test for CSUM_ZERO_INVERT
1197 			 * here, but catch cases where the originator
1198 			 * did not set it for UDP.
1199 			 */
1200 			if (csum == 0 && (m->m_pkthdr.csum_flags &
1201 			    (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
1202 				csum = 0xffff;
1203 			}
1204 
1205 			/* Insert checksum into packet */
1206 			ASSERT(stuff <= (len - sizeof(csum)));
1207 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
1208 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
1209 			} else {
1210 				bcopy((void *)&csum, baddr + stuff,
1211 				    sizeof(csum));
1212 			}
1213 			METADATA_ADJUST_LEN(pkt, start, poff);
1214 		} else {
1215 			m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1216 		}
1217 		pkt->pkt_csum_flags = 0;
1218 		pkt->pkt_csum_tx_start_off = 0;
1219 		pkt->pkt_csum_tx_stuff_off = 0;
1220 
1221 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
1222 			pkt->pkt_csum_flags |= PACKET_TSO_IPV4;
1223 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
1224 		}
1225 		if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
1226 			pkt->pkt_csum_flags |= PACKET_TSO_IPV6;
1227 			ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
1228 		}
1229 
1230 		/* translate mbuf metadata */
1231 		pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
1232 		pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
1233 		pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
1234 		pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
1235 		pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
1236 		switch (m->m_pkthdr.pkt_proto) {
1237 		case IPPROTO_QUIC:
1238 			pkt->pkt_flow_ip_proto = IPPROTO_UDP;
1239 			pkt->pkt_transport_protocol = IPPROTO_QUIC;
1240 			break;
1241 
1242 		default:
1243 			pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
1244 			pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
1245 			break;
1246 		}
1247 		(void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
1248 		pkt->pkt_svc_class = m_get_service_class(m);
1249 		pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
1250 		pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
1251 		if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
1252 			pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
1253 		}
1254 		if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) != 0) {
1255 			pkt->pkt_pflags |= PKT_F_L4S;
1256 		}
1257 		necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
1258 		pkt->pkt_policy_id =
1259 		    (uint32_t)necp_get_policy_id_from_packet(m);
1260 
1261 		if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
1262 			if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
1263 				__packet_set_tx_completion_data(ph,
1264 				    m->m_pkthdr.drv_tx_compl_arg,
1265 				    m->m_pkthdr.drv_tx_compl_data);
1266 			}
1267 			pkt->pkt_tx_compl_context =
1268 			    m->m_pkthdr.pkt_compl_context;
1269 			pkt->pkt_tx_compl_callbacks =
1270 			    m->m_pkthdr.pkt_compl_callbacks;
1271 			/*
1272 			 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
1273 			 * mbuf can no longer trigger a completion callback.
1274 			 * callback will be invoked when the kernel packet is
1275 			 * completed.
1276 			 */
1277 			m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
1278 
1279 			m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
1280 		}
1281 
1282 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1283 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1284 		    sk_proc_name_address(current_proc()),
1285 		    sk_proc_pid(current_proc()), len,
1286 		    (copysum ? (len - start) : 0), csum, start);
1287 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1288 		    "   mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
1289 		    SK_KVA(m), m->m_pkthdr.csum_flags,
1290 		    (uint32_t)m->m_pkthdr.csum_tx_start,
1291 		    (uint32_t)m->m_pkthdr.csum_tx_stuff);
1292 		break;
1293 
1294 	default:
1295 		VERIFY(0);
1296 		/* NOTREACHED */
1297 		__builtin_unreachable();
1298 	}
1299 
1300 	if (m->m_flags & M_BCAST) {
1301 		__packet_set_link_broadcast(ph);
1302 	} else if (m->m_flags & M_MCAST) {
1303 		__packet_set_link_multicast(ph);
1304 	}
1305 
1306 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1307 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1308 	    (t == NR_RX) ? "RX" : "TX",
1309 	    sk_dump("buf", baddr, len, 128, NULL, 0));
1310 }
1311 
1312 /*
1313  * This routine is used for copying from a packet originating from a native
1314  * skywalk interface to an mbuf destined for the host legacy stack (NR_RX),
1315  * as well as for mbufs destined for the compat network interfaces (NR_TX).
1316  *
1317  * Note that this routine does not alter m_data pointer of the mbuf, as the
1318  * caller may want to use the original value upon return.  We do, however,
1319  * adjust the length to reflect the total data span.
1320  *
1321  * This routine supports copying into an mbuf chain for RX but not TX.
1322  *
1323  * start/stuff is relative to poff, within [0, len], such that
1324  * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1325  */
1326 void
pkt_copy_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1327 pkt_copy_to_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
1328     struct mbuf *m, const uint16_t moff, const uint32_t len,
1329     const boolean_t copysum, const uint16_t start)
1330 {
1331 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1332 	struct mbuf *curr_m;
1333 	uint32_t partial = 0;
1334 	uint32_t remaining_len = len, copied_len = 0;
1335 	uint16_t csum = 0;
1336 	uint8_t *baddr;
1337 	uint8_t *dp;
1338 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1339 
1340 	ASSERT(len >= start);
1341 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
1342 
1343 	/* get buffer address from packet */
1344 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1345 	ASSERT(baddr != NULL);
1346 	baddr += poff;
1347 	VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
1348 
1349 	ASSERT((m->m_flags & M_PKTHDR));
1350 	m->m_data += moff;
1351 
1352 	switch (t) {
1353 	case NR_RX:
1354 		m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1355 
1356 		/*
1357 		 * Use pkt_copy() to copy the portion up to the
1358 		 * point where we need to start the checksum, and
1359 		 * copy the remainder, checksumming as we go.
1360 		 */
1361 		if (__probable(do_sum && start != 0)) {
1362 			ASSERT(M_TRAILINGSPACE(m) >= start);
1363 			ASSERT(m->m_len == 0);
1364 			dp = (uint8_t *)m->m_data;
1365 			_pkt_copy(baddr, dp, start);
1366 			remaining_len -= start;
1367 			copied_len += start;
1368 			m->m_len += start;
1369 			m->m_pkthdr.len += start;
1370 		}
1371 		curr_m = m;
1372 		while (curr_m != NULL && remaining_len != 0) {
1373 			uint32_t tmp_len = MIN(remaining_len,
1374 			    (uint32_t)M_TRAILINGSPACE(curr_m));
1375 			dp = (uint8_t *)curr_m->m_data + curr_m->m_len;
1376 			if (__probable(do_sum)) {
1377 				partial = __packet_copy_and_sum((baddr + copied_len),
1378 				    dp, tmp_len, partial);
1379 			} else {
1380 				_pkt_copy((baddr + copied_len), dp, tmp_len);
1381 			}
1382 
1383 			curr_m->m_len += tmp_len;
1384 			m->m_pkthdr.len += tmp_len;
1385 			copied_len += tmp_len;
1386 			remaining_len -= tmp_len;
1387 			curr_m = curr_m->m_next;
1388 		}
1389 		ASSERT(remaining_len == 0);
1390 
1391 		if (__probable(do_sum)) {
1392 			csum = __packet_fold_sum(partial);
1393 
1394 			m->m_pkthdr.csum_flags |=
1395 			    (CSUM_DATA_VALID | CSUM_PARTIAL);
1396 			m->m_pkthdr.csum_rx_start = start;
1397 			m->m_pkthdr.csum_rx_val = csum;
1398 		} else {
1399 			m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1400 			m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1401 			_CASSERT(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1402 			m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1403 			if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1404 				m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1405 			}
1406 		}
1407 
1408 		/* translate packet metadata */
1409 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1410 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1411 
1412 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1413 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1414 		    sk_proc_name_address(current_proc()),
1415 		    sk_proc_pid(current_proc()), len,
1416 		    (copysum ? (len - start) : 0), csum, start);
1417 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1418 		    "   mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1419 		    SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1420 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1421 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1422 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1423 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1424 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1425 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1426 		    (uint32_t)pkt->pkt_csum_rx_value);
1427 		break;
1428 
1429 	case NR_TX:
1430 		dp = (uint8_t *)m->m_data;
1431 		ASSERT(m->m_next == NULL);
1432 
1433 		VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1434 		    (uint32_t)mbuf_maxlen(m));
1435 		m->m_len += len;
1436 		m->m_pkthdr.len += len;
1437 		VERIFY(m->m_len == m->m_pkthdr.len &&
1438 		    (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1439 
1440 		if (__probable(copysum)) {
1441 			uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1442 			/*
1443 			 * Use pkt_copy() to copy the portion up to the
1444 			 * point where we need to start the checksum, and
1445 			 * copy the remainder, checksumming as we go.
1446 			 */
1447 			if (__probable(start != 0)) {
1448 				_pkt_copy(baddr, dp, start);
1449 			}
1450 			partial = __packet_copy_and_sum((baddr + start),
1451 			    (dp + start), (len - start), 0);
1452 			csum = __packet_fold_sum_final(partial);
1453 
1454 			/* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1455 			if (csum == 0 &&
1456 			    (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1457 				csum = 0xffff;
1458 			}
1459 
1460 			/* Insert checksum into packet */
1461 			ASSERT(stuff <= (len - sizeof(csum)));
1462 			if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1463 				*(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1464 			} else {
1465 				bcopy((void *)&csum, dp + stuff, sizeof(csum));
1466 			}
1467 		} else {
1468 			_pkt_copy(baddr, dp, len);
1469 		}
1470 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1471 		m->m_pkthdr.csum_tx_start = 0;
1472 		m->m_pkthdr.csum_tx_stuff = 0;
1473 
1474 		/* translate packet metadata */
1475 		m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1476 		m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1477 		m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1478 		m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1479 		m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1480 		m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1481 		m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1482 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1483 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1484 		m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1485 		m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1486 		if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1487 			m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1488 		}
1489 
1490 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1491 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1492 		    sk_proc_name_address(current_proc()),
1493 		    sk_proc_pid(current_proc()), len,
1494 		    (copysum ? (len - start) : 0), csum, start);
1495 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1496 		    "   pkt  0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1497 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1498 		    (uint32_t)pkt->pkt_csum_tx_start_off,
1499 		    (uint32_t)pkt->pkt_csum_tx_stuff_off);
1500 		break;
1501 
1502 	default:
1503 		VERIFY(0);
1504 		/* NOTREACHED */
1505 		__builtin_unreachable();
1506 	}
1507 
1508 	if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1509 		m->m_flags |= M_BCAST;
1510 	} else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1511 		m->m_flags |= M_MCAST;
1512 	}
1513 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1514 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1515 	    (t == NR_RX) ? "RX" : "TX",
1516 	    sk_dump("buf", (uint8_t *)dp, m->m_pkthdr.len, 128, NULL, 0));
1517 }
1518 
1519 /*
1520  * This is a multi-buflet variant of pkt_copy_to_mbuf().
1521  * NOTE: poff is the offset within the packet.
1522  *
1523  * This routine supports copying into an mbuf chain for RX but not TX.
1524  *
1525  * start/stuff is relative to poff, within [0, len], such that
1526  * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1527  */
1528 void
pkt_copy_multi_buflet_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1529 pkt_copy_multi_buflet_to_mbuf(const enum txrx t, kern_packet_t ph,
1530     const uint16_t poff, struct mbuf *m, const uint16_t moff,
1531     const uint32_t len, const boolean_t copysum, const uint16_t start)
1532 {
1533 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1534 	struct mbuf *curr_m;
1535 	uint32_t partial = 0;
1536 	uint32_t remaining_len = len, copied_len = 0;
1537 	uint16_t csum = 0;
1538 	uint8_t *baddr;
1539 	uint8_t *dp;
1540 	boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1541 
1542 	ASSERT(len >= start);
1543 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
1544 
1545 	/* get buffer address from packet */
1546 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1547 	ASSERT(baddr != NULL);
1548 	baddr += poff;
1549 	VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp) *
1550 	    __packet_get_buflet_count(ph)));
1551 
1552 	ASSERT((m->m_flags & M_PKTHDR));
1553 	m->m_data += moff;
1554 
1555 	switch (t) {
1556 	case NR_RX:
1557 		m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1558 		if (__probable(do_sum && start != 0)) {
1559 			ASSERT(M_TRAILINGSPACE(m) >= start);
1560 			ASSERT(m->m_len == 0);
1561 			dp = (uint8_t *)m->m_data;
1562 			_pkt_copy(baddr, dp, start);
1563 			remaining_len -= start;
1564 			copied_len += start;
1565 			m->m_len += start;
1566 			m->m_pkthdr.len += start;
1567 		}
1568 		curr_m = m;
1569 		while (curr_m != NULL && remaining_len != 0) {
1570 			uint32_t tmp_len = MIN(remaining_len,
1571 			    (uint32_t)M_TRAILINGSPACE(curr_m));
1572 			uint16_t soff = poff + (uint16_t)copied_len;
1573 			dp = (uint8_t *)curr_m->m_data + curr_m->m_len;
1574 
1575 			if (__probable(do_sum)) {
1576 				partial = _pkt_copyaddr_sum(ph, soff,
1577 				    dp, tmp_len, TRUE, partial, NULL);
1578 			} else {
1579 				pkt_copyaddr_sum(ph, soff,
1580 				    dp, tmp_len, FALSE, 0, NULL);
1581 			}
1582 
1583 			curr_m->m_len += tmp_len;
1584 			m->m_pkthdr.len += tmp_len;
1585 			copied_len += tmp_len;
1586 			remaining_len -= tmp_len;
1587 			curr_m = curr_m->m_next;
1588 		}
1589 		ASSERT(remaining_len == 0);
1590 
1591 		if (__probable(do_sum)) {
1592 			csum = __packet_fold_sum(partial);
1593 
1594 			m->m_pkthdr.csum_flags |=
1595 			    (CSUM_DATA_VALID | CSUM_PARTIAL);
1596 			m->m_pkthdr.csum_rx_start = start;
1597 			m->m_pkthdr.csum_rx_val = csum;
1598 		} else {
1599 			m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1600 			m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1601 			_CASSERT(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1602 			m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1603 			if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1604 				m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1605 			}
1606 		}
1607 
1608 		/* translate packet metadata */
1609 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1610 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1611 
1612 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1613 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1614 		    sk_proc_name_address(current_proc()),
1615 		    sk_proc_pid(current_proc()), len,
1616 		    (copysum ? (len - start) : 0), csum, start);
1617 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1618 		    "   mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1619 		    SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1620 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1621 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1622 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1623 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1624 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1625 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1626 		    (uint32_t)pkt->pkt_csum_rx_value);
1627 		break;
1628 	case NR_TX:
1629 		dp = (uint8_t *)m->m_data;
1630 		ASSERT(m->m_next == NULL);
1631 		VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1632 		    (uint32_t)mbuf_maxlen(m));
1633 		m->m_len += len;
1634 		m->m_pkthdr.len += len;
1635 		VERIFY(m->m_len == m->m_pkthdr.len &&
1636 		    (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1637 		if (__probable(copysum)) {
1638 			uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1639 			/*
1640 			 * Use pkt_copy() to copy the portion up to the
1641 			 * point where we need to start the checksum, and
1642 			 * copy the remainder, checksumming as we go.
1643 			 */
1644 			if (__probable(start != 0)) {
1645 				_pkt_copy(baddr, dp, start);
1646 			}
1647 			partial = _pkt_copyaddr_sum(ph, (poff + start),
1648 			    (dp + start), (len - start), TRUE, 0, NULL);
1649 			csum = __packet_fold_sum_final(partial);
1650 
1651 			/* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1652 			if (csum == 0 &&
1653 			    (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1654 				csum = 0xffff;
1655 			}
1656 
1657 			/* Insert checksum into packet */
1658 			ASSERT(stuff <= (len - sizeof(csum)));
1659 			if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1660 				*(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1661 			} else {
1662 				bcopy((void *)&csum, dp + stuff, sizeof(csum));
1663 			}
1664 		} else {
1665 			(void) _pkt_copyaddr_sum(ph, poff, dp, len, FALSE, 0, NULL);
1666 		}
1667 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1668 		m->m_pkthdr.csum_tx_start = 0;
1669 		m->m_pkthdr.csum_tx_stuff = 0;
1670 
1671 		/* translate packet metadata */
1672 		m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1673 		m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1674 		m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1675 		m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1676 		m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1677 		m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1678 		m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1679 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1680 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1681 		m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1682 		m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1683 		if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1684 			m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1685 		}
1686 
1687 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1688 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1689 		    sk_proc_name_address(current_proc()),
1690 		    sk_proc_pid(current_proc()), len,
1691 		    (copysum ? (len - start) : 0), csum, start);
1692 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1693 		    "   pkt  0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1694 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1695 		    (uint32_t)pkt->pkt_csum_tx_start_off,
1696 		    (uint32_t)pkt->pkt_csum_tx_stuff_off);
1697 		break;
1698 
1699 	default:
1700 		VERIFY(0);
1701 		/* NOTREACHED */
1702 		__builtin_unreachable();
1703 	}
1704 
1705 	if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1706 		m->m_flags |= M_BCAST;
1707 	} else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1708 		m->m_flags |= M_MCAST;
1709 	}
1710 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1711 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1712 	    (t == NR_RX) ? "RX" : "TX",
1713 	    sk_dump("buf", (uint8_t *)dp, m->m_pkthdr.len, 128, NULL, 0));
1714 }
1715 
1716 /*
1717  * Like m_copydata(), but computes 16-bit sum as the data is copied.
1718  * Caller can provide an initial sum to be folded into the computed
1719  * sum.  The accumulated partial sum (32-bit) is returned to caller;
1720  * caller is responsible for further reducing it to 16-bit if needed,
1721  * as well as to perform the final 1's complement on it.
1722  */
1723 uint32_t
m_copydata_sum(struct mbuf * m,int off,int len,void * vp,uint32_t initial_sum,boolean_t * odd_start)1724 m_copydata_sum(struct mbuf *m, int off, int len, void *vp, uint32_t initial_sum,
1725     boolean_t *odd_start)
1726 {
1727 	boolean_t needs_swap, started_on_odd = FALSE;
1728 	int off0 = off, len0 = len;
1729 	struct mbuf *m0 = m;
1730 	uint64_t sum, partial;
1731 	unsigned count, odd;
1732 	char *cp = vp;
1733 
1734 	if (__improbable(off < 0 || len < 0)) {
1735 		panic("%s: invalid offset %d or len %d", __func__, off, len);
1736 		/* NOTREACHED */
1737 		__builtin_unreachable();
1738 	}
1739 
1740 	while (off > 0) {
1741 		if (__improbable(m == NULL)) {
1742 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
1743 			    __func__, m0, off0, len0);
1744 			/* NOTREACHED */
1745 			__builtin_unreachable();
1746 		}
1747 		if (off < m->m_len) {
1748 			break;
1749 		}
1750 		off -= m->m_len;
1751 		m = m->m_next;
1752 	}
1753 
1754 	if (odd_start) {
1755 		started_on_odd = *odd_start;
1756 	}
1757 	sum = initial_sum;
1758 
1759 	for (; len > 0; m = m->m_next) {
1760 		uint8_t *datap;
1761 
1762 		if (__improbable(m == NULL)) {
1763 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
1764 			    __func__, m0, off0, len0);
1765 			/* NOTREACHED */
1766 			__builtin_unreachable();
1767 		}
1768 
1769 		datap = mtod(m, uint8_t *) + off;
1770 		count = m->m_len;
1771 
1772 		if (__improbable(count == 0)) {
1773 			continue;
1774 		}
1775 
1776 		count = MIN(count - off, (unsigned)len);
1777 		partial = 0;
1778 
1779 		if ((uintptr_t)datap & 1) {
1780 			/* Align on word boundary */
1781 			started_on_odd = !started_on_odd;
1782 #if BYTE_ORDER == LITTLE_ENDIAN
1783 			partial = *datap << 8;
1784 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1785 			partial = *datap;
1786 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1787 			*cp++ = *datap++;
1788 			count -= 1;
1789 			len -= 1;
1790 		}
1791 
1792 		needs_swap = started_on_odd;
1793 		odd = count & 1u;
1794 		count -= odd;
1795 
1796 		if (count) {
1797 			partial = __packet_copy_and_sum(datap,
1798 			    cp, count, (uint32_t)partial);
1799 			datap += count;
1800 			cp += count;
1801 			len -= count;
1802 			if (__improbable((partial & (3ULL << 62)) != 0)) {
1803 				if (needs_swap) {
1804 					partial = (partial << 8) +
1805 					    (partial >> 56);
1806 				}
1807 				sum += (partial >> 32);
1808 				sum += (partial & 0xffffffff);
1809 				partial = 0;
1810 			}
1811 		}
1812 
1813 		if (odd) {
1814 #if BYTE_ORDER == LITTLE_ENDIAN
1815 			partial += *datap;
1816 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1817 			partial += *datap << 8;
1818 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1819 			*cp++ = *datap++;
1820 			len -= 1;
1821 			started_on_odd = !started_on_odd;
1822 		}
1823 		off = 0;
1824 
1825 		if (needs_swap) {
1826 			partial = (partial << 8) + (partial >> 24);
1827 		}
1828 		sum += (partial >> 32) + (partial & 0xffffffff);
1829 		/*
1830 		 * Reduce sum to allow potential byte swap
1831 		 * in the next iteration without carry.
1832 		 */
1833 		sum = (sum >> 32) + (sum & 0xffffffff);
1834 	}
1835 
1836 	if (odd_start) {
1837 		*odd_start = started_on_odd;
1838 	}
1839 
1840 	/* Final fold (reduce 64-bit to 32-bit) */
1841 	sum = (sum >> 32) + (sum & 0xffffffff); /* 33-bit */
1842 	sum = (sum >> 16) + (sum & 0xffff);     /* 17-bit + carry */
1843 
1844 	/* return 32-bit partial sum to caller */
1845 	return (uint32_t)sum;
1846 }
1847 
1848 #if DEBUG || DEVELOPMENT
1849 #define TRAILERS_MAX    16              /* max trailing bytes */
1850 #define TRAILERS_REGEN  (64 * 1024)     /* regeneration threshold */
1851 static uint8_t tb[TRAILERS_MAX];        /* random trailing bytes */
1852 static uint32_t regen = TRAILERS_REGEN; /* regeneration counter */
1853 
1854 uint32_t
pkt_add_trailers(kern_packet_t ph,const uint32_t len,const uint16_t start)1855 pkt_add_trailers(kern_packet_t ph, const uint32_t len, const uint16_t start)
1856 {
1857 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1858 	uint32_t extra;
1859 	uint8_t *baddr;
1860 
1861 	/* get buffer address from packet */
1862 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1863 	ASSERT(baddr != NULL);
1864 	ASSERT(len <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
1865 
1866 	extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
1867 	if (extra == 0 || extra > sizeof(tb) ||
1868 	    (len + extra) > PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp)) {
1869 		return 0;
1870 	}
1871 
1872 	/* generate random bytes once per TRAILERS_REGEN packets (approx.) */
1873 	if (regen++ == TRAILERS_REGEN) {
1874 		read_frandom(&tb[0], sizeof(tb));
1875 		regen = 0;
1876 	}
1877 
1878 	bcopy(&tb[0], (baddr + len), extra);
1879 
1880 	/* recompute partial sum (also to exercise related logic) */
1881 	pkt->pkt_csum_flags |= PACKET_CSUM_PARTIAL;
1882 	pkt->pkt_csum_rx_value = (uint16_t)__packet_cksum((baddr + start),
1883 	    ((len + extra) - start), 0);
1884 	pkt->pkt_csum_rx_start_off = start;
1885 
1886 	return extra;
1887 }
1888 
1889 uint32_t
pkt_add_trailers_mbuf(struct mbuf * m,const uint16_t start)1890 pkt_add_trailers_mbuf(struct mbuf *m, const uint16_t start)
1891 {
1892 	uint32_t extra;
1893 
1894 	extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
1895 	if (extra == 0 || extra > sizeof(tb)) {
1896 		return 0;
1897 	}
1898 
1899 	if (mbuf_copyback(m, m_pktlen(m), extra, &tb[0], M_NOWAIT) != 0) {
1900 		return 0;
1901 	}
1902 
1903 	/* generate random bytes once per TRAILERS_REGEN packets (approx.) */
1904 	if (regen++ == TRAILERS_REGEN) {
1905 		read_frandom(&tb[0], sizeof(tb));
1906 		regen = 0;
1907 	}
1908 
1909 	/* recompute partial sum (also to exercise related logic) */
1910 	m->m_pkthdr.csum_rx_val = m_sum16(m, start, (m_pktlen(m) - start));
1911 	m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1912 	m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
1913 	m->m_pkthdr.csum_rx_start = start;
1914 
1915 	return extra;
1916 }
1917 #endif /* DEBUG || DEVELOPMENT */
1918 
1919 void
pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint16_t len,uint32_t * partial,boolean_t do_csum)1920 pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
1921     uint16_t doff, uint16_t len, uint32_t *partial, boolean_t do_csum)
1922 {
1923 	VERIFY(_pkt_copypkt_sum(sph, soff, dph, doff, len, partial, do_csum));
1924 }
1925 
1926 uint32_t
pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)1927 pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
1928     uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
1929 {
1930 	return _pkt_copyaddr_sum(sph, soff, dbaddr, len, do_csum, initial_sum, odd_start);
1931 }
1932 
1933 uint32_t
pkt_mcopypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint16_t len,boolean_t do_cscum)1934 pkt_mcopypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
1935     uint16_t len, boolean_t do_cscum)
1936 {
1937 	return m_copypkt_sum(m, soff, dph, doff, len, do_cscum);
1938 }
1939 
1940 void
pkt_copy(void * src,void * dst,size_t len)1941 pkt_copy(void *src, void *dst, size_t len)
1942 {
1943 	return _pkt_copy(src, dst, len);
1944 }
1945