xref: /xnu-8020.140.41/bsd/skywalk/packet/packet_copy.c (revision 27b03b360a988dfd3dfdf34262bb0042026747cc)
1 /*
2  * Copyright (c) 2017-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 #include <machine/endian.h>
31 #include <net/necp.h>
32 
33 #if (DEVELOPMENT || DEBUG)
34 SYSCTL_NODE(_kern_skywalk, OID_AUTO, packet,
35     CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Skywalk packet");
36 int pkt_trailers = 0; /* for testing trailing bytes */
37 SYSCTL_INT(_kern_skywalk_packet, OID_AUTO, trailers,
38     CTLFLAG_RW | CTLFLAG_LOCKED, &pkt_trailers, 0, "");
39 #endif /* !DEVELOPMENT && !DEBUG */
40 
41 
42 __attribute__((always_inline))
43 static inline void
_pkt_copy(void * src,void * dst,size_t len)44 _pkt_copy(void *src, void *dst, size_t len)
45 {
46 	if (__probable(IS_P2ALIGNED(src, 8) && IS_P2ALIGNED(dst, 8))) {
47 		switch (len) {
48 		case 20:        /* standard IPv4 header */
49 			sk_copy64_20(src, dst);
50 			return;
51 
52 		case 40:        /* IPv6 header */
53 			sk_copy64_40(src, dst);
54 			return;
55 
56 		default:
57 			if (IS_P2ALIGNED(len, 64)) {
58 				sk_copy64_64x(src, dst, len);
59 				return;
60 			} else if (IS_P2ALIGNED(len, 32)) {
61 				sk_copy64_32x(src, dst, len);
62 				return;
63 			} else if (IS_P2ALIGNED(len, 8)) {
64 				sk_copy64_8x(src, dst, len);
65 				return;
66 			} else if (IS_P2ALIGNED(len, 4)) {
67 				sk_copy64_4x(src, dst, len);
68 				return;
69 			}
70 			break;
71 		}
72 	}
73 	bcopy(src, dst, len);
74 }
75 
76 /*
77  * This routine is used for copying data across two kernel packets.
78  * Can also optionally compute 16-bit partial inet checksum as the
79  * data is copied.
80  * This routine is used by flowswitch while copying packet from vp
81  * adapter pool to packet in native netif pool and vice-a-versa.
82  *
83  * start/stuff is relative to soff, within [0, len], such that
84  * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
85  */
86 void
pkt_copy_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)87 pkt_copy_from_pkt(const enum txrx t, kern_packet_t dph, const uint16_t doff,
88     kern_packet_t sph, const uint16_t soff, const uint32_t len,
89     const boolean_t copysum, const uint16_t start, const uint16_t stuff,
90     const boolean_t invert)
91 {
92 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
93 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
94 	uint32_t partial;
95 	uint16_t csum = 0;
96 	uint8_t *sbaddr, *dbaddr;
97 
98 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
99 
100 	/* get buffer address from packet */
101 	MD_BUFLET_ADDR_ABS(spkt, sbaddr);
102 	ASSERT(sbaddr != NULL);
103 	sbaddr += soff;
104 	MD_BUFLET_ADDR_ABS(dpkt, dbaddr);
105 	ASSERT(dbaddr != NULL);
106 	dbaddr += doff;
107 	VERIFY((doff + len) <= dpkt->pkt_qum.qum_pp->pp_buflet_size);
108 
109 	switch (t) {
110 	case NR_RX:
111 		dpkt->pkt_csum_flags = 0;
112 		if (__probable(copysum)) {
113 			/*
114 			 * Use pkt_copy() to copy the portion up to the
115 			 * point where we need to start the checksum, and
116 			 * copy the remainder, checksumming as we go.
117 			 */
118 			if (__probable(start != 0)) {
119 				_pkt_copy(sbaddr, dbaddr, start);
120 			}
121 			partial = __packet_copy_and_sum((sbaddr + start),
122 			    (dbaddr + start), (len - start), 0);
123 			csum = __packet_fold_sum(partial);
124 
125 			__packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
126 			    start, csum, FALSE);
127 		} else {
128 			_pkt_copy(sbaddr, dbaddr, len);
129 			dpkt->pkt_csum_rx_start_off = 0;
130 			dpkt->pkt_csum_rx_value = 0;
131 		}
132 
133 		SK_DF(SK_VERB_COPY | SK_VERB_RX,
134 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
135 		    sk_proc_name_address(current_proc()),
136 		    sk_proc_pid(current_proc()), len,
137 		    (copysum ? (len - start) : 0), csum, start);
138 		SK_DF(SK_VERB_COPY | SK_VERB_RX,
139 		    "   pkt  0x%llx doff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
140 		    SK_KVA(dpkt), doff, dpkt->pkt_csum_flags,
141 		    (uint32_t)dpkt->pkt_csum_rx_start_off,
142 		    (uint32_t)dpkt->pkt_csum_rx_value);
143 		break;
144 
145 	case NR_TX:
146 		if (__probable(copysum)) {
147 			/*
148 			 * Use pkt_copy() to copy the portion up to the
149 			 * point where we need to start the checksum, and
150 			 * copy the remainder, checksumming as we go.
151 			 */
152 			if (__probable(start != 0)) {
153 				_pkt_copy(sbaddr, dbaddr, start);
154 			}
155 			partial = __packet_copy_and_sum((sbaddr + start),
156 			    (dbaddr + start), (len - start), 0);
157 			csum = __packet_fold_sum_final(partial);
158 
159 			/* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
160 			if (csum == 0 && invert) {
161 				csum = 0xffff;
162 			}
163 
164 			/* Insert checksum into packet */
165 			ASSERT(stuff <= (len - sizeof(csum)));
166 			if (IS_P2ALIGNED(dbaddr + stuff, sizeof(csum))) {
167 				*(uint16_t *)(uintptr_t)(dbaddr + stuff) = csum;
168 			} else {
169 				bcopy((void *)&csum, dbaddr + stuff,
170 				    sizeof(csum));
171 			}
172 		} else {
173 			_pkt_copy(sbaddr, dbaddr, len);
174 		}
175 		dpkt->pkt_csum_flags = 0;
176 		dpkt->pkt_csum_tx_start_off = 0;
177 		dpkt->pkt_csum_tx_stuff_off = 0;
178 
179 		SK_DF(SK_VERB_COPY | SK_VERB_TX,
180 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
181 		    sk_proc_name_address(current_proc()),
182 		    sk_proc_pid(current_proc()), len,
183 		    (copysum ? (len - start) : 0), csum, start);
184 		break;
185 
186 	default:
187 		VERIFY(0);
188 		/* NOTREACHED */
189 		__builtin_unreachable();
190 	}
191 	METADATA_ADJUST_LEN(dpkt, len, doff);
192 
193 	SK_DF(SK_VERB_COPY | SK_VERB_DUMP, "%s(%d) %s %s",
194 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
195 	    (t == NR_RX) ? "RX" : "TX",
196 	    sk_dump("buf", dbaddr, len, 128, NULL, 0));
197 }
198 
199 /*
200  * NOTE: soff is the offset within the packet
201  * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
202  * caller is responsible for further reducing it to 16-bit if needed,
203  * as well as to perform the final 1's complement on it.
204  */
205 uint32_t static inline
_pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)206 _pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
207     uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
208 {
209 	uint8_t odd = 0;
210 	uint8_t *sbaddr = NULL;
211 	uint32_t sum = initial_sum, partial;
212 	uint32_t len0 = len;
213 	boolean_t needs_swap, started_on_odd = FALSE;
214 	uint16_t clen, sboff, sblen, sbcnt, off0 = soff;
215 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
216 	kern_buflet_t sbuf = NULL, sbufp = NULL;
217 
218 	sbcnt = __packet_get_buflet_count(sph);
219 
220 	if (odd_start) {
221 		started_on_odd = *odd_start;
222 	}
223 
224 	/* fastpath (copy+sum, single buflet, even aligned, even length) */
225 	if (do_csum && sbcnt == 1 && len != 0) {
226 		PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
227 		ASSERT(sbuf != NULL);
228 		sboff = __buflet_get_data_offset(sbuf);
229 		sblen = __buflet_get_data_length(sbuf);
230 		ASSERT(sboff <= soff);
231 		ASSERT(soff < sboff + sblen);
232 		sblen -= (soff - sboff);
233 		sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
234 
235 		clen = (uint16_t)MIN(len, sblen);
236 
237 		if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
238 			sum = __packet_copy_and_sum(sbaddr, dbaddr, clen, sum);
239 			return __packet_fold_sum(sum);
240 		}
241 
242 		sbaddr = NULL;
243 		sbuf = sbufp = NULL;
244 	}
245 
246 	while (len != 0) {
247 		PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
248 		if (__improbable(sbuf == NULL)) {
249 			panic("%s: bad packet, 0x%llx [off %d, len %d]",
250 			    __func__, SK_KVA(spkt), off0, len0);
251 			/* NOTREACHED */
252 			__builtin_unreachable();
253 		}
254 		sbufp = sbuf;
255 		sboff = __buflet_get_data_offset(sbuf);
256 		sblen = __buflet_get_data_length(sbuf);
257 		ASSERT((sboff <= soff) && (soff < sboff + sblen));
258 		sblen -= (soff - sboff);
259 		sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
260 		soff = 0;
261 		clen = (uint16_t)MIN(len, sblen);
262 		if (__probable(do_csum)) {
263 			partial = 0;
264 			if (__improbable((uintptr_t)sbaddr & 1)) {
265 				/* Align on word boundary */
266 				started_on_odd = !started_on_odd;
267 #if BYTE_ORDER == LITTLE_ENDIAN
268 				partial = (uint8_t)*sbaddr << 8;
269 #else /* BYTE_ORDER != LITTLE_ENDIAN */
270 				partial = (uint8_t)*sbaddr;
271 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
272 				*dbaddr++ = *sbaddr++;
273 				sblen -= 1;
274 				clen -= 1;
275 				len -= 1;
276 			}
277 			needs_swap = started_on_odd;
278 
279 			odd = clen & 1u;
280 			clen -= odd;
281 
282 			if (clen != 0) {
283 				partial = __packet_copy_and_sum(sbaddr, dbaddr,
284 				    clen, partial);
285 			}
286 
287 			if (__improbable(partial & 0xc0000000)) {
288 				if (needs_swap) {
289 					partial = (partial << 8) +
290 					    (partial >> 24);
291 				}
292 				sum += (partial >> 16);
293 				sum += (partial & 0xffff);
294 				partial = 0;
295 			}
296 		} else {
297 			_pkt_copy(sbaddr, dbaddr, clen);
298 		}
299 
300 		dbaddr += clen;
301 		sbaddr += clen;
302 
303 		if (__probable(do_csum)) {
304 			if (odd != 0) {
305 #if BYTE_ORDER == LITTLE_ENDIAN
306 				partial += (uint8_t)*sbaddr;
307 #else /* BYTE_ORDER != LITTLE_ENDIAN */
308 				partial += (uint8_t)*sbaddr << 8;
309 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
310 				*dbaddr++ = *sbaddr++;
311 				started_on_odd = !started_on_odd;
312 			}
313 
314 			if (needs_swap) {
315 				partial = (partial << 8) + (partial >> 24);
316 			}
317 			sum += (partial >> 16) + (partial & 0xffff);
318 			/*
319 			 * Reduce sum to allow potential byte swap
320 			 * in the next iteration without carry.
321 			 */
322 			sum = (sum >> 16) + (sum & 0xffff);
323 		}
324 
325 		sblen -= clen + odd;
326 		len -= clen + odd;
327 		ASSERT(sblen == 0 || len == 0);
328 	}
329 
330 	if (odd_start) {
331 		*odd_start = started_on_odd;
332 	}
333 
334 	if (__probable(do_csum)) {
335 		/* Final fold (reduce 32-bit to 16-bit) */
336 		sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
337 		sum = (sum >> 16) + (sum & 0xffff);
338 	}
339 	return sum;
340 }
341 
342 /*
343  * NOTE: Caller of this function is responsible to adjust the length and offset
344  * of the first buflet of the destination packet if (doff != 0),
345  * i.e. additional data is being prependend to the packet.
346  * It should also finalize the packet.
347  * To simplify & optimize the routine, we have also assumed that soff & doff
348  * will lie within the first buffer, which is true for the current use cases
349  * where, doff is the offset of the checksum field in the TCP/IP header and
350  * soff is the L3 offset.
351  * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
352  * caller is responsible for further reducing it to 16-bit if needed,
353  * as well as to perform the final 1's complement on it.
354  */
355 static inline boolean_t
_pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint32_t len,uint32_t * csum_partial,boolean_t do_csum)356 _pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
357     uint16_t doff, uint32_t len, uint32_t *csum_partial, boolean_t do_csum)
358 {
359 	uint8_t odd = 0;
360 	uint32_t sum = 0, partial;
361 	boolean_t needs_swap, started_on_odd = FALSE;
362 	uint8_t *sbaddr = NULL, *dbaddr = NULL;
363 	uint16_t clen, sblen, dlen0, dlim, sbcnt, dbcnt, sboff;
364 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
365 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
366 	kern_buflet_t sbuf = NULL, sbufp = NULL, dbuf = NULL, dbufp = NULL;
367 
368 	ASSERT(csum_partial != NULL || !do_csum);
369 	sbcnt = __packet_get_buflet_count(sph);
370 	dbcnt = __packet_get_buflet_count(dph);
371 
372 	while (len != 0) {
373 		ASSERT(sbaddr == NULL || dbaddr == NULL);
374 		if (sbaddr == NULL) {
375 			PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
376 			if (__improbable(sbuf == NULL)) {
377 				break;
378 			}
379 			sbufp = sbuf;
380 			sblen = __buflet_get_data_length(sbuf);
381 			sboff = __buflet_get_data_offset(sbuf);
382 			ASSERT(soff >= sboff);
383 			ASSERT(sboff + sblen > soff);
384 			sblen -= (soff - sboff);
385 			sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
386 			soff = 0;
387 		}
388 
389 		if (dbaddr == NULL) {
390 			if (dbufp != NULL) {
391 				__buflet_set_data_length(dbufp, dlen0);
392 			}
393 
394 			PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
395 			if (__improbable(dbuf == NULL)) {
396 				break;
397 			}
398 			dbufp = dbuf;
399 			dlim = __buflet_get_data_limit(dbuf);
400 			ASSERT(dlim > doff);
401 			dlim -= doff;
402 			if (doff != 0) {
403 				VERIFY(__buflet_set_data_offset(dbuf, doff) == 0);
404 			}
405 			dbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(dbuf) + doff);
406 			dlen0 = dlim;
407 			doff = 0;
408 		}
409 
410 		clen = (uint16_t)MIN(len, sblen);
411 		clen = MIN(clen, dlim);
412 
413 		if (__probable(do_csum)) {
414 			partial = 0;
415 			if (__improbable((uintptr_t)sbaddr & 1)) {
416 				/* Align on word boundary */
417 				started_on_odd = !started_on_odd;
418 #if BYTE_ORDER == LITTLE_ENDIAN
419 				partial = (uint8_t)*sbaddr << 8;
420 #else /* BYTE_ORDER != LITTLE_ENDIAN */
421 				partial = (uint8_t)*sbaddr;
422 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
423 				*dbaddr++ = *sbaddr++;
424 				clen -= 1;
425 				dlim -= 1;
426 				len -= 1;
427 			}
428 			needs_swap = started_on_odd;
429 
430 			odd = clen & 1u;
431 			clen -= odd;
432 
433 			if (clen != 0) {
434 				partial = __packet_copy_and_sum(sbaddr, dbaddr,
435 				    clen, partial);
436 			}
437 
438 			if (__improbable(partial & 0xc0000000)) {
439 				if (needs_swap) {
440 					partial = (partial << 8) +
441 					    (partial >> 24);
442 				}
443 				sum += (partial >> 16);
444 				sum += (partial & 0xffff);
445 				partial = 0;
446 			}
447 		} else {
448 			_pkt_copy(sbaddr, dbaddr, clen);
449 		}
450 		sbaddr += clen;
451 		dbaddr += clen;
452 
453 		if (__probable(do_csum)) {
454 			if (odd != 0) {
455 #if BYTE_ORDER == LITTLE_ENDIAN
456 				partial += (uint8_t)*sbaddr;
457 #else /* BYTE_ORDER != LITTLE_ENDIAN */
458 				partial += (uint8_t)*sbaddr << 8;
459 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
460 				*dbaddr++ = *sbaddr++;
461 				started_on_odd = !started_on_odd;
462 			}
463 
464 			if (needs_swap) {
465 				partial = (partial << 8) + (partial >> 24);
466 			}
467 			sum += (partial >> 16) + (partial & 0xffff);
468 			/*
469 			 * Reduce sum to allow potential byte swap
470 			 * in the next iteration without carry.
471 			 */
472 			sum = (sum >> 16) + (sum & 0xffff);
473 		}
474 
475 		sblen -= clen + odd;
476 		dlim -= clen + odd;
477 		len -= clen + odd;
478 
479 		if (sblen == 0) {
480 			sbaddr = NULL;
481 		}
482 
483 		if (dlim == 0) {
484 			dbaddr = NULL;
485 		}
486 	}
487 
488 	if (__probable(dbuf != NULL)) {
489 		__buflet_set_data_length(dbuf, (dlen0 - dlim));
490 	}
491 	if (__probable(do_csum)) {
492 		/* Final fold (reduce 32-bit to 16-bit) */
493 		sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
494 		sum = (sum >> 16) + (sum & 0xffff);
495 		*csum_partial = (uint32_t)sum;
496 	}
497 	return len == 0;
498 }
499 
500 uint32_t
pkt_sum(kern_packet_t sph,uint16_t soff,uint16_t len)501 pkt_sum(kern_packet_t sph, uint16_t soff, uint16_t len)
502 {
503 	uint8_t odd = 0;
504 	uint32_t sum = 0, partial;
505 	boolean_t needs_swap, started_on_odd = FALSE;
506 	uint8_t *sbaddr = NULL;
507 	uint16_t clen, sblen, sbcnt, sboff;
508 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
509 	kern_buflet_t sbuf = NULL, sbufp = NULL;
510 
511 	sbcnt = __packet_get_buflet_count(sph);
512 
513 	/* fastpath (single buflet, even aligned, even length) */
514 	if (sbcnt == 1 && len != 0) {
515 		PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
516 		ASSERT(sbuf != NULL);
517 		sblen = __buflet_get_data_length(sbuf);
518 		sboff = __buflet_get_data_offset(sbuf);
519 		ASSERT(soff >= sboff);
520 		ASSERT(sboff + sblen > soff);
521 		sblen -= (soff - sboff);
522 		sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
523 
524 		clen = MIN(len, sblen);
525 
526 		if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
527 			sum = __packet_cksum(sbaddr, clen, 0);
528 			return __packet_fold_sum(sum);
529 		}
530 
531 		sbaddr = NULL;
532 		sbuf = sbufp = NULL;
533 	}
534 
535 	/* slowpath */
536 	while (len != 0) {
537 		ASSERT(sbaddr == NULL);
538 		if (sbaddr == NULL) {
539 			PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
540 			if (__improbable(sbuf == NULL)) {
541 				break;
542 			}
543 			sbufp = sbuf;
544 			sblen = __buflet_get_data_length(sbuf);
545 			sboff = __buflet_get_data_offset(sbuf);
546 			ASSERT(soff >= sboff);
547 			ASSERT(sboff + sblen > soff);
548 			sblen -= (soff - sboff);
549 			sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
550 			soff = 0;
551 		}
552 
553 		clen = MIN(len, sblen);
554 
555 		partial = 0;
556 		if (__improbable((uintptr_t)sbaddr & 1)) {
557 			/* Align on word boundary */
558 			started_on_odd = !started_on_odd;
559 #if BYTE_ORDER == LITTLE_ENDIAN
560 			partial = (uint8_t)*sbaddr << 8;
561 #else /* BYTE_ORDER != LITTLE_ENDIAN */
562 			partial = (uint8_t)*sbaddr;
563 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
564 			clen -= 1;
565 			len -= 1;
566 		}
567 		needs_swap = started_on_odd;
568 
569 		odd = clen & 1u;
570 		clen -= odd;
571 
572 		if (clen != 0) {
573 			partial = __packet_cksum(sbaddr,
574 			    clen, partial);
575 		}
576 
577 		if (__improbable(partial & 0xc0000000)) {
578 			if (needs_swap) {
579 				partial = (partial << 8) +
580 				    (partial >> 24);
581 			}
582 			sum += (partial >> 16);
583 			sum += (partial & 0xffff);
584 			partial = 0;
585 		}
586 		sbaddr += clen;
587 
588 		if (odd != 0) {
589 #if BYTE_ORDER == LITTLE_ENDIAN
590 			partial += (uint8_t)*sbaddr;
591 #else /* BYTE_ORDER != LITTLE_ENDIAN */
592 			partial += (uint8_t)*sbaddr << 8;
593 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
594 			started_on_odd = !started_on_odd;
595 		}
596 
597 		if (needs_swap) {
598 			partial = (partial << 8) + (partial >> 24);
599 		}
600 		sum += (partial >> 16) + (partial & 0xffff);
601 		/*
602 		 * Reduce sum to allow potential byte swap
603 		 * in the next iteration without carry.
604 		 */
605 		sum = (sum >> 16) + (sum & 0xffff);
606 
607 		sblen -= clen + odd;
608 		len -= clen + odd;
609 
610 		if (sblen == 0) {
611 			sbaddr = NULL;
612 		}
613 	}
614 
615 	/* Final fold (reduce 32-bit to 16-bit) */
616 	sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
617 	sum = (sum >> 16) + (sum & 0xffff);
618 	return (uint32_t)sum;
619 }
620 
621 
622 /*
623  * This is a multi-buflet variant of pkt_copy_from_pkt().
624  *
625  * start/stuff is relative to soff, within [0, len], such that
626  * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
627  */
628 void
pkt_copy_multi_buflet_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)629 pkt_copy_multi_buflet_from_pkt(const enum txrx t, kern_packet_t dph,
630     const uint16_t doff, kern_packet_t sph, const uint16_t soff,
631     const uint32_t len, const boolean_t copysum, const uint16_t start,
632     const uint16_t stuff, const boolean_t invert)
633 {
634 	boolean_t rc;
635 	uint32_t partial;
636 	uint16_t csum = 0;
637 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
638 
639 	VERIFY((doff + len) <=
640 	    (dpkt->pkt_qum.qum_pp->pp_buflet_size *
641 	    __packet_get_buflet_count(dph)));
642 
643 	switch (t) {
644 	case NR_RX:
645 		dpkt->pkt_csum_flags = 0;
646 		if (__probable(copysum)) {
647 			/*
648 			 * copy the portion up to the point where we need to
649 			 * start the checksum, and copy the remainder,
650 			 * checksumming as we go.
651 			 */
652 			if (__probable(start != 0)) {
653 				rc = _pkt_copypkt_sum(sph, soff, dph, doff,
654 				    start, NULL, FALSE);
655 				ASSERT(rc);
656 			}
657 			_pkt_copypkt_sum(sph, (soff + start), dph,
658 			    (doff + start), (len - start), &partial, TRUE);
659 			csum = __packet_fold_sum(partial);
660 			__packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
661 			    start, csum, FALSE);
662 			METADATA_ADJUST_LEN(dpkt, start, doff);
663 		} else {
664 			rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
665 			    FALSE);
666 			ASSERT(rc);
667 			dpkt->pkt_csum_rx_start_off = 0;
668 			dpkt->pkt_csum_rx_value = 0;
669 		}
670 		break;
671 
672 	case NR_TX:
673 		if (__probable(copysum)) {
674 			uint8_t *baddr;
675 			/*
676 			 * copy the portion up to the point where we need to
677 			 * start the checksum, and copy the remainder,
678 			 * checksumming as we go.
679 			 */
680 			if (__probable(start != 0)) {
681 				rc = _pkt_copypkt_sum(sph, soff, dph, doff,
682 				    start, NULL, FALSE);
683 				ASSERT(rc);
684 			}
685 			rc = _pkt_copypkt_sum(sph, (soff + start), dph,
686 			    (doff + start), (len - start), &partial, TRUE);
687 			ASSERT(rc);
688 			csum = __packet_fold_sum_final(partial);
689 
690 			/* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
691 			if (csum == 0 && invert) {
692 				csum = 0xffff;
693 			}
694 
695 			/*
696 			 * Insert checksum into packet.
697 			 * Here we assume that checksum will be in the
698 			 * first buffer.
699 			 */
700 			ASSERT((stuff + doff + sizeof(csum)) <=
701 			    dpkt->pkt_qum.qum_pp->pp_buflet_size);
702 			ASSERT(stuff <= (len - sizeof(csum)));
703 
704 			/* get first buflet buffer address from packet */
705 			MD_BUFLET_ADDR_ABS(dpkt, baddr);
706 			ASSERT(baddr != NULL);
707 			baddr += doff;
708 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
709 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
710 			} else {
711 				bcopy((void *)&csum, baddr + stuff,
712 				    sizeof(csum));
713 			}
714 			METADATA_ADJUST_LEN(dpkt, start, doff);
715 		} else {
716 			rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
717 			    FALSE);
718 			ASSERT(rc);
719 		}
720 		dpkt->pkt_csum_flags = 0;
721 		dpkt->pkt_csum_tx_start_off = 0;
722 		dpkt->pkt_csum_tx_stuff_off = 0;
723 		break;
724 
725 	default:
726 		VERIFY(0);
727 		/* NOTREACHED */
728 		__builtin_unreachable();
729 	}
730 }
731 
732 /*
733  * This routine is used for copying an mbuf which originated in the host
734  * stack destined to a native skywalk interface (NR_TX), as well as for
735  * mbufs originating on compat network interfaces (NR_RX).
736  *
737  * start/stuff is relative to moff, within [0, len], such that
738  * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
739  */
740 void
pkt_copy_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)741 pkt_copy_from_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
742     struct mbuf *m, const uint16_t moff, const uint32_t len,
743     const boolean_t copysum, const uint16_t start)
744 {
745 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
746 	uint32_t partial;
747 	uint16_t csum = 0;
748 	uint8_t *baddr;
749 
750 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
751 
752 	/* get buffer address from packet */
753 	MD_BUFLET_ADDR_ABS(pkt, baddr);
754 	ASSERT(baddr != NULL);
755 	baddr += poff;
756 	VERIFY((poff + len) <= pkt->pkt_qum.qum_pp->pp_buflet_size);
757 
758 	switch (t) {
759 	case NR_RX:
760 		pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
761 		pkt->pkt_csum_rx_start_off = 0;
762 		pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
763 		pkt->pkt_svc_class = m_get_service_class(m);
764 		if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
765 		    != CSUM_RX_FULL_FLAGS) && copysum)) {
766 			/*
767 			 * Use m_copydata() to copy the portion up to the
768 			 * point where we need to start the checksum, and
769 			 * copy the remainder, checksumming as we go.
770 			 */
771 			if (start != 0) {
772 				m_copydata(m, moff, start, baddr);
773 			}
774 			partial = m_copydata_sum(m, start, (len - start),
775 			    (baddr + start), 0, NULL);
776 			csum = __packet_fold_sum(partial);
777 
778 			__packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
779 			    start, csum, FALSE);
780 		} else {
781 			m_copydata(m, moff, len, baddr);
782 		}
783 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
784 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
785 		    sk_proc_name_address(current_proc()),
786 		    sk_proc_pid(current_proc()), len,
787 		    (copysum ? (len - start) : 0), csum, start);
788 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
789 		    "   mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
790 		    SK_KVA(m), m->m_pkthdr.csum_flags,
791 		    (uint32_t)m->m_pkthdr.csum_rx_start,
792 		    (uint32_t)m->m_pkthdr.csum_rx_val);
793 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
794 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
795 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
796 		    (uint32_t)pkt->pkt_csum_rx_start_off,
797 		    (uint32_t)pkt->pkt_csum_rx_value);
798 		break;
799 
800 	case NR_TX:
801 		if (__probable(copysum)) {
802 			uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
803 			/*
804 			 * Use m_copydata() to copy the portion up to the
805 			 * point where we need to start the checksum, and
806 			 * copy the remainder, checksumming as we go.
807 			 */
808 			if (start != 0) {
809 				m_copydata(m, moff, start, baddr);
810 			}
811 			partial = m_copydata_sum(m, start, (len - start),
812 			    (baddr + start), 0, NULL);
813 			csum = __packet_fold_sum_final(partial);
814 
815 			/*
816 			 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
817 			 * ideally we'd only test for CSUM_ZERO_INVERT
818 			 * here, but catch cases where the originator
819 			 * did not set it for UDP.
820 			 */
821 			if (csum == 0 && (m->m_pkthdr.csum_flags &
822 			    (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
823 				csum = 0xffff;
824 			}
825 
826 			/* Insert checksum into packet */
827 			ASSERT(stuff <= (len - sizeof(csum)));
828 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
829 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
830 			} else {
831 				bcopy((void *)&csum, baddr + stuff,
832 				    sizeof(csum));
833 			}
834 		} else {
835 			m_copydata(m, moff, len, baddr);
836 		}
837 		pkt->pkt_csum_flags = 0;
838 		pkt->pkt_csum_tx_start_off = 0;
839 		pkt->pkt_csum_tx_stuff_off = 0;
840 
841 		/* translate mbuf metadata */
842 		pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
843 		pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
844 		pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
845 		switch (m->m_pkthdr.pkt_proto) {
846 		case IPPROTO_QUIC:
847 			pkt->pkt_flow_ip_proto = IPPROTO_UDP;
848 			pkt->pkt_transport_protocol = IPPROTO_QUIC;
849 			break;
850 
851 		default:
852 			pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
853 			pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
854 			break;
855 		}
856 		(void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
857 		pkt->pkt_svc_class = m_get_service_class(m);
858 		pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
859 		pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
860 		if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
861 			pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
862 		}
863 		necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
864 		pkt->pkt_policy_id =
865 		    (uint32_t)necp_get_policy_id_from_packet(m);
866 
867 		if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
868 			if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
869 				__packet_set_tx_completion_data(ph,
870 				    m->m_pkthdr.drv_tx_compl_arg,
871 				    m->m_pkthdr.drv_tx_compl_data);
872 			}
873 			pkt->pkt_tx_compl_context =
874 			    m->m_pkthdr.pkt_compl_context;
875 			pkt->pkt_tx_compl_callbacks =
876 			    m->m_pkthdr.pkt_compl_callbacks;
877 			/*
878 			 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
879 			 * mbuf can no longer trigger a completion callback.
880 			 * callback will be invoked when the kernel packet is
881 			 * completed.
882 			 */
883 			m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
884 
885 			m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
886 		}
887 
888 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
889 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
890 		    sk_proc_name_address(current_proc()),
891 		    sk_proc_pid(current_proc()), len,
892 		    (copysum ? (len - start) : 0), csum, start);
893 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
894 		    "   mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
895 		    SK_KVA(m), m->m_pkthdr.csum_flags,
896 		    (uint32_t)m->m_pkthdr.csum_tx_start,
897 		    (uint32_t)m->m_pkthdr.csum_tx_stuff);
898 		break;
899 
900 	default:
901 		VERIFY(0);
902 		/* NOTREACHED */
903 		__builtin_unreachable();
904 	}
905 	METADATA_ADJUST_LEN(pkt, len, poff);
906 
907 	if (m->m_flags & M_BCAST) {
908 		__packet_set_link_broadcast(ph);
909 	} else if (m->m_flags & M_MCAST) {
910 		__packet_set_link_multicast(ph);
911 	}
912 
913 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
914 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
915 	    (t == NR_RX) ? "RX" : "TX",
916 	    sk_dump("buf", baddr, len, 128, NULL, 0));
917 }
918 
919 /*
920  * Like m_copydata_sum(), but works on a destination kernel packet.
921  */
922 static inline uint32_t
m_copypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint32_t len,boolean_t do_cscum)923 m_copypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
924     uint32_t len, boolean_t do_cscum)
925 {
926 	boolean_t needs_swap, started_on_odd = FALSE;
927 	int off0 = soff;
928 	uint32_t len0 = len;
929 	struct mbuf *m0 = m;
930 	uint32_t sum = 0, partial;
931 	unsigned count0, count, odd, mlen_copied;
932 	uint8_t *sbaddr = NULL, *dbaddr = NULL;
933 	uint16_t dlen0, dlim, dbcnt = __packet_get_buflet_count(dph);
934 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
935 	kern_buflet_t dbuf = NULL, dbufp = NULL;
936 
937 	while (soff > 0) {
938 		if (__improbable(m == NULL)) {
939 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
940 			    __func__, m0, off0, len0);
941 			/* NOTREACHED */
942 			__builtin_unreachable();
943 		}
944 		if (soff < m->m_len) {
945 			break;
946 		}
947 		soff -= m->m_len;
948 		m = m->m_next;
949 	}
950 
951 	if (__improbable(m == NULL)) {
952 		panic("%s: invalid mbuf chain %p [off %d, len %d]",
953 		    __func__, m0, off0, len0);
954 		/* NOTREACHED */
955 		__builtin_unreachable();
956 	}
957 
958 	sbaddr = mtod(m, uint8_t *) + soff;
959 	count = m->m_len - soff;
960 	mlen_copied = 0;
961 
962 	while (len != 0) {
963 		ASSERT(sbaddr == NULL || dbaddr == NULL);
964 		if (sbaddr == NULL) {
965 			soff = 0;
966 			m = m->m_next;
967 			if (__improbable(m == NULL)) {
968 				panic("%s: invalid mbuf chain %p [off %d, "
969 				    "len %d]", __func__, m0, off0, len0);
970 				/* NOTREACHED */
971 				__builtin_unreachable();
972 			}
973 			sbaddr = mtod(m, uint8_t *);
974 			count = m->m_len;
975 			mlen_copied = 0;
976 		}
977 
978 		if (__improbable(count == 0)) {
979 			sbaddr = NULL;
980 			continue;
981 		}
982 
983 		if (dbaddr == NULL) {
984 			if (dbufp != NULL) {
985 				__buflet_set_data_length(dbufp, dlen0);
986 			}
987 
988 			PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
989 			if (__improbable(dbuf == NULL)) {
990 				panic("%s: mbuf too large %p [off %d, "
991 				    "len %d]", __func__, m0, off0, len0);
992 				/* NOTREACHED */
993 				__builtin_unreachable();
994 			}
995 			dbufp = dbuf;
996 			dlim = __buflet_get_data_limit(dbuf) - doff;
997 			dbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(dbuf) + doff);
998 			dlen0 = dlim;
999 			doff = 0;
1000 		}
1001 
1002 		count = MIN(count, (unsigned)len);
1003 		count0 = count = MIN(count, dlim);
1004 
1005 		if (!do_cscum) {
1006 			_pkt_copy(sbaddr, dbaddr, count);
1007 			sbaddr += count;
1008 			dbaddr += count;
1009 			goto skip_csum;
1010 		}
1011 
1012 		partial = 0;
1013 		if ((uintptr_t)sbaddr & 1) {
1014 			/* Align on word boundary */
1015 			started_on_odd = !started_on_odd;
1016 #if BYTE_ORDER == LITTLE_ENDIAN
1017 			partial = *sbaddr << 8;
1018 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1019 			partial = *sbaddr;
1020 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1021 			*dbaddr++ = *sbaddr++;
1022 			count -= 1;
1023 		}
1024 
1025 		needs_swap = started_on_odd;
1026 		odd = count & 1u;
1027 		count -= odd;
1028 
1029 		if (count) {
1030 			partial = __packet_copy_and_sum(sbaddr,
1031 			    dbaddr, count, partial);
1032 			sbaddr += count;
1033 			dbaddr += count;
1034 			if (__improbable(partial & 0xc0000000)) {
1035 				if (needs_swap) {
1036 					partial = (partial << 8) +
1037 					    (partial >> 24);
1038 				}
1039 				sum += (partial >> 16);
1040 				sum += (partial & 0xffff);
1041 				partial = 0;
1042 			}
1043 		}
1044 
1045 		if (odd) {
1046 #if BYTE_ORDER == LITTLE_ENDIAN
1047 			partial += *sbaddr;
1048 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1049 			partial += *sbaddr << 8;
1050 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1051 			*dbaddr++ = *sbaddr++;
1052 			started_on_odd = !started_on_odd;
1053 		}
1054 
1055 		if (needs_swap) {
1056 			partial = (partial << 8) + (partial >> 24);
1057 		}
1058 		sum += (partial >> 16) + (partial & 0xffff);
1059 		/*
1060 		 * Reduce sum to allow potential byte swap
1061 		 * in the next iteration without carry.
1062 		 */
1063 		sum = (sum >> 16) + (sum & 0xffff);
1064 
1065 skip_csum:
1066 		dlim -= count0;
1067 		len -= count0;
1068 		mlen_copied += count0;
1069 
1070 		if (dlim == 0) {
1071 			dbaddr = NULL;
1072 		}
1073 
1074 		count = m->m_len - soff - mlen_copied;
1075 		if (count == 0) {
1076 			sbaddr = NULL;
1077 		}
1078 	}
1079 
1080 	ASSERT(len == 0);
1081 	ASSERT(dbuf != NULL);
1082 	__buflet_set_data_length(dbuf, (dlen0 - dlim));
1083 
1084 	if (!do_cscum) {
1085 		return 0;
1086 	}
1087 
1088 	/* Final fold (reduce 32-bit to 16-bit) */
1089 	sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
1090 	sum = (sum >> 16) + (sum & 0xffff);
1091 	return sum;
1092 }
1093 
1094 /*
1095  * This is a multi-buflet variant of pkt_copy_from_mbuf().
1096  *
1097  * start/stuff is relative to moff, within [0, len], such that
1098  * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
1099  */
1100 void
pkt_copy_multi_buflet_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1101 pkt_copy_multi_buflet_from_mbuf(const enum txrx t, kern_packet_t ph,
1102     const uint16_t poff, struct mbuf *m, const uint16_t moff,
1103     const uint32_t len, const boolean_t copysum, const uint16_t start)
1104 {
1105 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1106 	uint32_t partial;
1107 	uint16_t csum = 0;
1108 	uint8_t *baddr;
1109 
1110 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
1111 
1112 	/* get buffer address from packet */
1113 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1114 	ASSERT(baddr != NULL);
1115 	baddr += poff;
1116 	VERIFY((poff + len) <= (pkt->pkt_qum.qum_pp->pp_buflet_size *
1117 	    __packet_get_buflet_count(ph)));
1118 
1119 	switch (t) {
1120 	case NR_RX:
1121 		pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
1122 		pkt->pkt_csum_rx_start_off = 0;
1123 		pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
1124 		pkt->pkt_svc_class = m_get_service_class(m);
1125 		if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
1126 		    != CSUM_RX_FULL_FLAGS) && copysum)) {
1127 			/*
1128 			 * Use m_copydata() to copy the portion up to the
1129 			 * point where we need to start the checksum, and
1130 			 * copy the remainder, checksumming as we go.
1131 			 */
1132 			if (start != 0) {
1133 				m_copydata(m, moff, start, baddr);
1134 			}
1135 			partial = m_copypkt_sum(m, start, ph, (poff + start),
1136 			    (len - start), TRUE);
1137 			csum = __packet_fold_sum(partial);
1138 			__packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
1139 			    start, csum, FALSE);
1140 			METADATA_ADJUST_LEN(pkt, start, poff);
1141 		} else {
1142 			(void) m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1143 		}
1144 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1145 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1146 		    sk_proc_name_address(current_proc()),
1147 		    sk_proc_pid(current_proc()), len,
1148 		    (copysum ? (len - start) : 0), csum, start);
1149 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1150 		    "   mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
1151 		    SK_KVA(m), m->m_pkthdr.csum_flags,
1152 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1153 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1154 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1155 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1156 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1157 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1158 		    (uint32_t)pkt->pkt_csum_rx_value);
1159 		break;
1160 
1161 	case NR_TX:
1162 		if (__probable(copysum)) {
1163 			uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
1164 			/*
1165 			 * Use m_copydata() to copy the portion up to the
1166 			 * point where we need to start the checksum, and
1167 			 * copy the remainder, checksumming as we go.
1168 			 */
1169 			if (start != 0) {
1170 				m_copydata(m, moff, start, baddr);
1171 			}
1172 			partial = m_copypkt_sum(m, start, ph, (poff + start),
1173 			    (len - start), TRUE);
1174 			csum = __packet_fold_sum_final(partial);
1175 
1176 			/*
1177 			 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
1178 			 * ideally we'd only test for CSUM_ZERO_INVERT
1179 			 * here, but catch cases where the originator
1180 			 * did not set it for UDP.
1181 			 */
1182 			if (csum == 0 && (m->m_pkthdr.csum_flags &
1183 			    (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
1184 				csum = 0xffff;
1185 			}
1186 
1187 			/* Insert checksum into packet */
1188 			ASSERT(stuff <= (len - sizeof(csum)));
1189 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
1190 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
1191 			} else {
1192 				bcopy((void *)&csum, baddr + stuff,
1193 				    sizeof(csum));
1194 			}
1195 			METADATA_ADJUST_LEN(pkt, start, poff);
1196 		} else {
1197 			m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1198 		}
1199 		pkt->pkt_csum_flags = 0;
1200 		pkt->pkt_csum_tx_start_off = 0;
1201 		pkt->pkt_csum_tx_stuff_off = 0;
1202 
1203 		/* translate mbuf metadata */
1204 		pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
1205 		pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
1206 		pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
1207 		switch (m->m_pkthdr.pkt_proto) {
1208 		case IPPROTO_QUIC:
1209 			pkt->pkt_flow_ip_proto = IPPROTO_UDP;
1210 			pkt->pkt_transport_protocol = IPPROTO_QUIC;
1211 			break;
1212 
1213 		default:
1214 			pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
1215 			pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
1216 			break;
1217 		}
1218 		(void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
1219 		pkt->pkt_svc_class = m_get_service_class(m);
1220 		pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
1221 		pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
1222 		if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
1223 			pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
1224 		}
1225 		necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
1226 		pkt->pkt_policy_id =
1227 		    (uint32_t)necp_get_policy_id_from_packet(m);
1228 
1229 		if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
1230 			if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
1231 				__packet_set_tx_completion_data(ph,
1232 				    m->m_pkthdr.drv_tx_compl_arg,
1233 				    m->m_pkthdr.drv_tx_compl_data);
1234 			}
1235 			pkt->pkt_tx_compl_context =
1236 			    m->m_pkthdr.pkt_compl_context;
1237 			pkt->pkt_tx_compl_callbacks =
1238 			    m->m_pkthdr.pkt_compl_callbacks;
1239 			/*
1240 			 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
1241 			 * mbuf can no longer trigger a completion callback.
1242 			 * callback will be invoked when the kernel packet is
1243 			 * completed.
1244 			 */
1245 			m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
1246 
1247 			m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
1248 		}
1249 
1250 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1251 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1252 		    sk_proc_name_address(current_proc()),
1253 		    sk_proc_pid(current_proc()), len,
1254 		    (copysum ? (len - start) : 0), csum, start);
1255 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1256 		    "   mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
1257 		    SK_KVA(m), m->m_pkthdr.csum_flags,
1258 		    (uint32_t)m->m_pkthdr.csum_tx_start,
1259 		    (uint32_t)m->m_pkthdr.csum_tx_stuff);
1260 		break;
1261 
1262 	default:
1263 		VERIFY(0);
1264 		/* NOTREACHED */
1265 		__builtin_unreachable();
1266 	}
1267 
1268 	if (m->m_flags & M_BCAST) {
1269 		__packet_set_link_broadcast(ph);
1270 	} else if (m->m_flags & M_MCAST) {
1271 		__packet_set_link_multicast(ph);
1272 	}
1273 
1274 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1275 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1276 	    (t == NR_RX) ? "RX" : "TX",
1277 	    sk_dump("buf", baddr, len, 128, NULL, 0));
1278 }
1279 
1280 /*
1281  * This routine is used for copying from a packet originating from a native
1282  * skywalk interface to an mbuf destined for the host legacy stack (NR_RX),
1283  * as well as for mbufs destined for the compat network interfaces (NR_TX).
1284  *
1285  * Note that this routine does not alter m_data pointer of the mbuf, as the
1286  * caller may want to use the original value upon return.  We do, however,
1287  * adjust the length to reflect the total data span.
1288  *
1289  * start/stuff is relative to poff, within [0, len], such that
1290  * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1291  */
1292 void
pkt_copy_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1293 pkt_copy_to_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
1294     struct mbuf *m, const uint16_t moff, const uint32_t len,
1295     const boolean_t copysum, const uint16_t start)
1296 {
1297 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1298 	uint32_t partial;
1299 	uint16_t csum = 0;
1300 	uint8_t *baddr;
1301 	uint8_t *dp;
1302 
1303 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
1304 
1305 	/* get buffer address from packet */
1306 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1307 	ASSERT(baddr != NULL);
1308 	baddr += poff;
1309 	VERIFY((poff + len) <= pkt->pkt_qum.qum_pp->pp_buflet_size);
1310 
1311 	ASSERT((m->m_flags & M_PKTHDR) && m->m_next == NULL);
1312 	m->m_data += moff;
1313 	dp = (uint8_t *)m->m_data;
1314 	VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1315 	    (uint32_t)mbuf_maxlen(m));
1316 	m->m_len += len;
1317 	m->m_pkthdr.len += len;
1318 	VERIFY(m->m_len == m->m_pkthdr.len &&
1319 	    (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1320 
1321 	switch (t) {
1322 	case NR_RX:
1323 		m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1324 		if (__probable(copysum)) {
1325 			/*
1326 			 * Use pkt_copy() to copy the portion up to the
1327 			 * point where we need to start the checksum, and
1328 			 * copy the remainder, checksumming as we go.
1329 			 */
1330 			if (__probable(start != 0)) {
1331 				_pkt_copy(baddr, dp, start);
1332 			}
1333 			partial = __packet_copy_and_sum((baddr + start),
1334 			    (dp + start), (len - start), 0);
1335 			csum = __packet_fold_sum(partial);
1336 
1337 			m->m_pkthdr.csum_flags |=
1338 			    (CSUM_DATA_VALID | CSUM_PARTIAL);
1339 			m->m_pkthdr.csum_rx_start = start;
1340 			m->m_pkthdr.csum_rx_val = csum;
1341 		} else {
1342 			_pkt_copy(baddr, dp, len);
1343 			m->m_pkthdr.csum_rx_start = 0;
1344 			m->m_pkthdr.csum_rx_val = 0;
1345 		}
1346 
1347 		/* translate packet metadata */
1348 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1349 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1350 
1351 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1352 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1353 		    sk_proc_name_address(current_proc()),
1354 		    sk_proc_pid(current_proc()), len,
1355 		    (copysum ? (len - start) : 0), csum, start);
1356 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1357 		    "   mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1358 		    SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1359 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1360 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1361 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1362 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1363 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1364 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1365 		    (uint32_t)pkt->pkt_csum_rx_value);
1366 		break;
1367 
1368 	case NR_TX:
1369 		if (__probable(copysum)) {
1370 			uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1371 			/*
1372 			 * Use pkt_copy() to copy the portion up to the
1373 			 * point where we need to start the checksum, and
1374 			 * copy the remainder, checksumming as we go.
1375 			 */
1376 			if (__probable(start != 0)) {
1377 				_pkt_copy(baddr, dp, start);
1378 			}
1379 			partial = __packet_copy_and_sum((baddr + start),
1380 			    (dp + start), (len - start), 0);
1381 			csum = __packet_fold_sum_final(partial);
1382 
1383 			/* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1384 			if (csum == 0 &&
1385 			    (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1386 				csum = 0xffff;
1387 			}
1388 
1389 			/* Insert checksum into packet */
1390 			ASSERT(stuff <= (len - sizeof(csum)));
1391 			if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1392 				*(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1393 			} else {
1394 				bcopy((void *)&csum, dp + stuff, sizeof(csum));
1395 			}
1396 		} else {
1397 			_pkt_copy(baddr, dp, len);
1398 		}
1399 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1400 		m->m_pkthdr.csum_tx_start = 0;
1401 		m->m_pkthdr.csum_tx_stuff = 0;
1402 
1403 		/* translate packet metadata */
1404 		m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1405 		m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1406 		m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1407 		m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1408 		m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1409 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1410 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1411 		m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1412 		m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1413 		if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1414 			m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1415 		}
1416 
1417 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1418 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1419 		    sk_proc_name_address(current_proc()),
1420 		    sk_proc_pid(current_proc()), len,
1421 		    (copysum ? (len - start) : 0), csum, start);
1422 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1423 		    "   pkt  0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1424 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1425 		    (uint32_t)pkt->pkt_csum_tx_start_off,
1426 		    (uint32_t)pkt->pkt_csum_tx_stuff_off);
1427 		break;
1428 
1429 	default:
1430 		VERIFY(0);
1431 		/* NOTREACHED */
1432 		__builtin_unreachable();
1433 	}
1434 
1435 	if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1436 		m->m_flags |= M_BCAST;
1437 	} else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1438 		m->m_flags |= M_MCAST;
1439 	}
1440 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1441 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1442 	    (t == NR_RX) ? "RX" : "TX",
1443 	    sk_dump("buf", (uint8_t *)dp, m->m_pkthdr.len, 128, NULL, 0));
1444 }
1445 
1446 /*
1447  * This is a multi-buflet variant of pkt_copy_to_mbuf().
1448  * NOTE: poff is the offset within the packet.
1449  *
1450  * start/stuff is relative to poff, within [0, len], such that
1451  * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1452  */
1453 void
pkt_copy_multi_buflet_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1454 pkt_copy_multi_buflet_to_mbuf(const enum txrx t, kern_packet_t ph,
1455     const uint16_t poff, struct mbuf *m, const uint16_t moff,
1456     const uint32_t len, const boolean_t copysum, const uint16_t start)
1457 {
1458 #pragma unused(moff) /* may be PROC_NULL */
1459 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1460 	uint32_t partial;
1461 	uint16_t csum = 0;
1462 	uint8_t *baddr;
1463 	uint8_t *dp;
1464 
1465 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
1466 
1467 	/* get buffer address from packet */
1468 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1469 	ASSERT(baddr != NULL);
1470 	baddr += poff;
1471 	VERIFY((poff + len) <= (pkt->pkt_qum.qum_pp->pp_buflet_size *
1472 	    __packet_get_buflet_count(ph)));
1473 
1474 	ASSERT((m->m_flags & M_PKTHDR) && m->m_next == NULL);
1475 	m->m_data += moff;
1476 	dp = (uint8_t *)m->m_data;
1477 	VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1478 	    (uint32_t)mbuf_maxlen(m));
1479 	m->m_len += len;
1480 	m->m_pkthdr.len += len;
1481 	VERIFY(m->m_len == m->m_pkthdr.len &&
1482 	    (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1483 
1484 	switch (t) {
1485 	case NR_RX:
1486 		m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1487 		if (__probable(copysum)) {
1488 			/*
1489 			 * Use pkt_copy() to copy the portion up to the
1490 			 * point where we need to start the checksum, and
1491 			 * copy the remainder, checksumming as we go.
1492 			 */
1493 			if (__probable(start != 0)) {
1494 				_pkt_copy(baddr, dp, start);
1495 			}
1496 			partial = _pkt_copyaddr_sum(ph, (poff + start),
1497 			    (dp + start), (len - start), TRUE, 0, NULL);
1498 			csum = __packet_fold_sum(partial);
1499 
1500 			m->m_pkthdr.csum_flags |=
1501 			    (CSUM_DATA_VALID | CSUM_PARTIAL);
1502 			m->m_pkthdr.csum_rx_start = start;
1503 			m->m_pkthdr.csum_rx_val = csum;
1504 		} else {
1505 			(void) _pkt_copyaddr_sum(ph, poff, dp, len, FALSE, 0, NULL);
1506 			m->m_pkthdr.csum_rx_start = 0;
1507 			m->m_pkthdr.csum_rx_val = 0;
1508 		}
1509 
1510 		/* translate packet metadata */
1511 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1512 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1513 
1514 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1515 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1516 		    sk_proc_name_address(current_proc()),
1517 		    sk_proc_pid(current_proc()), len,
1518 		    (copysum ? (len - start) : 0), csum, start);
1519 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1520 		    "   mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1521 		    SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1522 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1523 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1524 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1525 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1526 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1527 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1528 		    (uint32_t)pkt->pkt_csum_rx_value);
1529 		break;
1530 
1531 	case NR_TX:
1532 		if (__probable(copysum)) {
1533 			uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1534 			/*
1535 			 * Use pkt_copy() to copy the portion up to the
1536 			 * point where we need to start the checksum, and
1537 			 * copy the remainder, checksumming as we go.
1538 			 */
1539 			if (__probable(start != 0)) {
1540 				_pkt_copy(baddr, dp, start);
1541 			}
1542 			partial = _pkt_copyaddr_sum(ph, (poff + start),
1543 			    (dp + start), (len - start), TRUE, 0, NULL);
1544 			csum = __packet_fold_sum_final(partial);
1545 
1546 			/* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1547 			if (csum == 0 &&
1548 			    (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1549 				csum = 0xffff;
1550 			}
1551 
1552 			/* Insert checksum into packet */
1553 			ASSERT(stuff <= (len - sizeof(csum)));
1554 			if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1555 				*(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1556 			} else {
1557 				bcopy((void *)&csum, dp + stuff, sizeof(csum));
1558 			}
1559 		} else {
1560 			(void) _pkt_copyaddr_sum(ph, poff, dp, len, FALSE, 0, NULL);
1561 		}
1562 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1563 		m->m_pkthdr.csum_tx_start = 0;
1564 		m->m_pkthdr.csum_tx_stuff = 0;
1565 
1566 		/* translate packet metadata */
1567 		m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1568 		m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1569 		m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1570 		m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1571 		m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1572 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1573 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1574 		m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1575 		m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1576 		if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1577 			m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1578 		}
1579 
1580 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1581 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1582 		    sk_proc_name_address(current_proc()),
1583 		    sk_proc_pid(current_proc()), len,
1584 		    (copysum ? (len - start) : 0), csum, start);
1585 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1586 		    "   pkt  0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1587 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1588 		    (uint32_t)pkt->pkt_csum_tx_start_off,
1589 		    (uint32_t)pkt->pkt_csum_tx_stuff_off);
1590 		break;
1591 
1592 	default:
1593 		VERIFY(0);
1594 		/* NOTREACHED */
1595 		__builtin_unreachable();
1596 	}
1597 
1598 	if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1599 		m->m_flags |= M_BCAST;
1600 	} else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1601 		m->m_flags |= M_MCAST;
1602 	}
1603 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1604 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1605 	    (t == NR_RX) ? "RX" : "TX",
1606 	    sk_dump("buf", (uint8_t *)dp, m->m_pkthdr.len, 128, NULL, 0));
1607 }
1608 
1609 /*
1610  * Like m_copydata(), but computes 16-bit sum as the data is copied.
1611  * Caller can provide an initial sum to be folded into the computed
1612  * sum.  The accumulated partial sum (32-bit) is returned to caller;
1613  * caller is responsible for further reducing it to 16-bit if needed,
1614  * as well as to perform the final 1's complement on it.
1615  */
1616 uint32_t
m_copydata_sum(struct mbuf * m,int off,int len,void * vp,uint32_t initial_sum,boolean_t * odd_start)1617 m_copydata_sum(struct mbuf *m, int off, int len, void *vp, uint32_t initial_sum,
1618     boolean_t *odd_start)
1619 {
1620 	boolean_t needs_swap, started_on_odd = FALSE;
1621 	int off0 = off, len0 = len;
1622 	struct mbuf *m0 = m;
1623 	uint64_t sum, partial;
1624 	unsigned count, odd;
1625 	char *cp = vp;
1626 
1627 	if (__improbable(off < 0 || len < 0)) {
1628 		panic("%s: invalid offset %d or len %d", __func__, off, len);
1629 		/* NOTREACHED */
1630 		__builtin_unreachable();
1631 	}
1632 
1633 	while (off > 0) {
1634 		if (__improbable(m == NULL)) {
1635 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
1636 			    __func__, m0, off0, len0);
1637 			/* NOTREACHED */
1638 			__builtin_unreachable();
1639 		}
1640 		if (off < m->m_len) {
1641 			break;
1642 		}
1643 		off -= m->m_len;
1644 		m = m->m_next;
1645 	}
1646 
1647 	if (odd_start) {
1648 		started_on_odd = *odd_start;
1649 	}
1650 	sum = initial_sum;
1651 
1652 	for (; len > 0; m = m->m_next) {
1653 		uint8_t *datap;
1654 
1655 		if (__improbable(m == NULL)) {
1656 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
1657 			    __func__, m0, off0, len0);
1658 			/* NOTREACHED */
1659 			__builtin_unreachable();
1660 		}
1661 
1662 		datap = mtod(m, uint8_t *) + off;
1663 		count = m->m_len;
1664 
1665 		if (__improbable(count == 0)) {
1666 			continue;
1667 		}
1668 
1669 		count = MIN(count - off, (unsigned)len);
1670 		partial = 0;
1671 
1672 		if ((uintptr_t)datap & 1) {
1673 			/* Align on word boundary */
1674 			started_on_odd = !started_on_odd;
1675 #if BYTE_ORDER == LITTLE_ENDIAN
1676 			partial = *datap << 8;
1677 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1678 			partial = *datap;
1679 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1680 			*cp++ = *datap++;
1681 			count -= 1;
1682 			len -= 1;
1683 		}
1684 
1685 		needs_swap = started_on_odd;
1686 		odd = count & 1u;
1687 		count -= odd;
1688 
1689 		if (count) {
1690 			partial = __packet_copy_and_sum(datap,
1691 			    cp, count, (uint32_t)partial);
1692 			datap += count;
1693 			cp += count;
1694 			len -= count;
1695 			if (__improbable((partial & (3ULL << 62)) != 0)) {
1696 				if (needs_swap) {
1697 					partial = (partial << 8) +
1698 					    (partial >> 56);
1699 				}
1700 				sum += (partial >> 32);
1701 				sum += (partial & 0xffffffff);
1702 				partial = 0;
1703 			}
1704 		}
1705 
1706 		if (odd) {
1707 #if BYTE_ORDER == LITTLE_ENDIAN
1708 			partial += *datap;
1709 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1710 			partial += *datap << 8;
1711 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1712 			*cp++ = *datap++;
1713 			len -= 1;
1714 			started_on_odd = !started_on_odd;
1715 		}
1716 		off = 0;
1717 
1718 		if (needs_swap) {
1719 			partial = (partial << 8) + (partial >> 24);
1720 		}
1721 		sum += (partial >> 32) + (partial & 0xffffffff);
1722 		/*
1723 		 * Reduce sum to allow potential byte swap
1724 		 * in the next iteration without carry.
1725 		 */
1726 		sum = (sum >> 32) + (sum & 0xffffffff);
1727 	}
1728 
1729 	if (odd_start) {
1730 		*odd_start = started_on_odd;
1731 	}
1732 
1733 	/* Final fold (reduce 64-bit to 32-bit) */
1734 	sum = (sum >> 32) + (sum & 0xffffffff); /* 33-bit */
1735 	sum = (sum >> 16) + (sum & 0xffff);     /* 17-bit + carry */
1736 
1737 	/* return 32-bit partial sum to caller */
1738 	return (uint32_t)sum;
1739 }
1740 
1741 #if DEBUG || DEVELOPMENT
1742 #define TRAILERS_MAX    16              /* max trailing bytes */
1743 #define TRAILERS_REGEN  (64 * 1024)     /* regeneration threshold */
1744 static uint8_t tb[TRAILERS_MAX];        /* random trailing bytes */
1745 static uint32_t regen = TRAILERS_REGEN; /* regeneration counter */
1746 
1747 uint32_t
pkt_add_trailers(kern_packet_t ph,const uint32_t len,const uint16_t start)1748 pkt_add_trailers(kern_packet_t ph, const uint32_t len, const uint16_t start)
1749 {
1750 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1751 	uint32_t extra;
1752 	uint8_t *baddr;
1753 
1754 	/* get buffer address from packet */
1755 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1756 	ASSERT(baddr != NULL);
1757 	ASSERT(len <= pkt->pkt_qum.qum_pp->pp_buflet_size);
1758 
1759 	extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
1760 	if (extra == 0 || extra > sizeof(tb) ||
1761 	    (len + extra) > pkt->pkt_qum.qum_pp->pp_buflet_size) {
1762 		return 0;
1763 	}
1764 
1765 	/* generate random bytes once per TRAILERS_REGEN packets (approx.) */
1766 	if (regen++ == TRAILERS_REGEN) {
1767 		read_frandom(&tb[0], sizeof(tb));
1768 		regen = 0;
1769 	}
1770 
1771 	bcopy(&tb[0], (baddr + len), extra);
1772 
1773 	/* recompute partial sum (also to exercise related logic) */
1774 	pkt->pkt_csum_flags |= PACKET_CSUM_PARTIAL;
1775 	pkt->pkt_csum_rx_value = (uint16_t)__packet_cksum((baddr + start),
1776 	    ((len + extra) - start), 0);
1777 	pkt->pkt_csum_rx_start_off = start;
1778 
1779 	return extra;
1780 }
1781 
1782 uint32_t
pkt_add_trailers_mbuf(struct mbuf * m,const uint16_t start)1783 pkt_add_trailers_mbuf(struct mbuf *m, const uint16_t start)
1784 {
1785 	uint32_t extra;
1786 
1787 	extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
1788 	if (extra == 0 || extra > sizeof(tb)) {
1789 		return 0;
1790 	}
1791 
1792 	if (mbuf_copyback(m, m_pktlen(m), extra, &tb[0], M_NOWAIT) != 0) {
1793 		return 0;
1794 	}
1795 
1796 	/* generate random bytes once per TRAILERS_REGEN packets (approx.) */
1797 	if (regen++ == TRAILERS_REGEN) {
1798 		read_frandom(&tb[0], sizeof(tb));
1799 		regen = 0;
1800 	}
1801 
1802 	/* recompute partial sum (also to exercise related logic) */
1803 	m->m_pkthdr.csum_rx_val = m_sum16(m, start, (m_pktlen(m) - start));
1804 	m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1805 	m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
1806 	m->m_pkthdr.csum_rx_start = start;
1807 
1808 	return extra;
1809 }
1810 #endif /* DEBUG || DEVELOPMENT */
1811 
1812 void
pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint16_t len,uint32_t * partial,boolean_t do_csum)1813 pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
1814     uint16_t doff, uint16_t len, uint32_t *partial, boolean_t do_csum)
1815 {
1816 	VERIFY(_pkt_copypkt_sum(sph, soff, dph, doff, len, partial, do_csum));
1817 }
1818 
1819 uint32_t
pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * dbaddr,uint16_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)1820 pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
1821     uint16_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
1822 {
1823 	return _pkt_copyaddr_sum(sph, soff, dbaddr, len, do_csum, initial_sum, odd_start);
1824 }
1825 
1826 uint32_t
pkt_mcopypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint16_t len,boolean_t do_cscum)1827 pkt_mcopypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
1828     uint16_t len, boolean_t do_cscum)
1829 {
1830 	return m_copypkt_sum(m, soff, dph, doff, len, do_cscum);
1831 }
1832 
1833 void
pkt_copy(void * src,void * dst,size_t len)1834 pkt_copy(void *src, void *dst, size_t len)
1835 {
1836 	return _pkt_copy(src, dst, len);
1837 }
1838