xref: /xnu-8019.80.24/bsd/skywalk/packet/packet_copy.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2017-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 #include <machine/endian.h>
31 #include <net/necp.h>
32 
33 #if (DEVELOPMENT || DEBUG)
34 SYSCTL_NODE(_kern_skywalk, OID_AUTO, packet,
35     CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Skywalk packet");
36 int pkt_trailers = 0; /* for testing trailing bytes */
37 SYSCTL_INT(_kern_skywalk_packet, OID_AUTO, trailers,
38     CTLFLAG_RW | CTLFLAG_LOCKED, &pkt_trailers, 0, "");
39 #endif /* !DEVELOPMENT && !DEBUG */
40 
41 
42 __attribute__((always_inline))
43 static inline void
_pkt_copy(void * src,void * dst,size_t len)44 _pkt_copy(void *src, void *dst, size_t len)
45 {
46 	if (__probable(IS_P2ALIGNED(src, 8) && IS_P2ALIGNED(dst, 8))) {
47 		switch (len) {
48 		case 20:        /* standard IPv4 header */
49 			sk_copy64_20(src, dst);
50 			return;
51 
52 		case 40:        /* IPv6 header */
53 			sk_copy64_40(src, dst);
54 			return;
55 
56 		default:
57 			if (IS_P2ALIGNED(len, 64)) {
58 				sk_copy64_64x(src, dst, len);
59 				return;
60 			} else if (IS_P2ALIGNED(len, 32)) {
61 				sk_copy64_32x(src, dst, len);
62 				return;
63 			} else if (IS_P2ALIGNED(len, 8)) {
64 				sk_copy64_8x(src, dst, len);
65 				return;
66 			} else if (IS_P2ALIGNED(len, 4)) {
67 				sk_copy64_4x(src, dst, len);
68 				return;
69 			}
70 			break;
71 		}
72 	}
73 	bcopy(src, dst, len);
74 }
75 
76 /*
77  * This routine is used for copying data across two kernel packets.
78  * Can also optionally compute 16-bit partial inet checksum as the
79  * data is copied.
80  * This routine is used by flowswitch while copying packet from vp
81  * adapter pool to packet in native netif pool and vice-a-versa.
82  *
83  * start/stuff is relative to soff, within [0, len], such that
84  * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
85  */
86 void
pkt_copy_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)87 pkt_copy_from_pkt(const enum txrx t, kern_packet_t dph, const uint16_t doff,
88     kern_packet_t sph, const uint16_t soff, const uint32_t len,
89     const boolean_t copysum, const uint16_t start, const uint16_t stuff,
90     const boolean_t invert)
91 {
92 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
93 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
94 	uint32_t partial;
95 	uint16_t csum = 0;
96 	uint8_t *sbaddr, *dbaddr;
97 
98 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
99 
100 	/* get buffer address from packet */
101 	MD_BUFLET_ADDR_ABS(spkt, sbaddr);
102 	ASSERT(sbaddr != NULL);
103 	sbaddr += soff;
104 	MD_BUFLET_ADDR_ABS(dpkt, dbaddr);
105 	ASSERT(dbaddr != NULL);
106 	dbaddr += doff;
107 	VERIFY((doff + len) <= dpkt->pkt_qum.qum_pp->pp_buflet_size);
108 
109 	switch (t) {
110 	case NR_RX:
111 		dpkt->pkt_csum_flags = 0;
112 		if (__probable(copysum)) {
113 			/*
114 			 * Use pkt_copy() to copy the portion up to the
115 			 * point where we need to start the checksum, and
116 			 * copy the remainder, checksumming as we go.
117 			 */
118 			if (__probable(start != 0)) {
119 				_pkt_copy(sbaddr, dbaddr, start);
120 			}
121 			partial = __packet_copy_and_sum((sbaddr + start),
122 			    (dbaddr + start), (len - start), 0);
123 			csum = __packet_fold_sum(partial);
124 
125 			__packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
126 			    start, csum, FALSE);
127 		} else {
128 			_pkt_copy(sbaddr, dbaddr, len);
129 			dpkt->pkt_csum_rx_start_off = 0;
130 			dpkt->pkt_csum_rx_value = 0;
131 		}
132 
133 		SK_DF(SK_VERB_COPY | SK_VERB_RX,
134 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
135 		    sk_proc_name_address(current_proc()),
136 		    sk_proc_pid(current_proc()), len,
137 		    (copysum ? (len - start) : 0), csum, start);
138 		SK_DF(SK_VERB_COPY | SK_VERB_RX,
139 		    "   pkt  0x%llx doff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
140 		    SK_KVA(dpkt), doff, dpkt->pkt_csum_flags,
141 		    (uint32_t)dpkt->pkt_csum_rx_start_off,
142 		    (uint32_t)dpkt->pkt_csum_rx_value);
143 		break;
144 
145 	case NR_TX:
146 		if (__probable(copysum)) {
147 			/*
148 			 * Use pkt_copy() to copy the portion up to the
149 			 * point where we need to start the checksum, and
150 			 * copy the remainder, checksumming as we go.
151 			 */
152 			if (__probable(start != 0)) {
153 				_pkt_copy(sbaddr, dbaddr, start);
154 			}
155 			partial = __packet_copy_and_sum((sbaddr + start),
156 			    (dbaddr + start), (len - start), 0);
157 			csum = __packet_fold_sum_final(partial);
158 
159 			/* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
160 			if (csum == 0 && invert) {
161 				csum = 0xffff;
162 			}
163 
164 			/* Insert checksum into packet */
165 			ASSERT(stuff <= (len - sizeof(csum)));
166 			if (IS_P2ALIGNED(dbaddr + stuff, sizeof(csum))) {
167 				*(uint16_t *)(uintptr_t)(dbaddr + stuff) = csum;
168 			} else {
169 				bcopy((void *)&csum, dbaddr + stuff,
170 				    sizeof(csum));
171 			}
172 		} else {
173 			_pkt_copy(sbaddr, dbaddr, len);
174 		}
175 		dpkt->pkt_csum_flags = 0;
176 		dpkt->pkt_csum_tx_start_off = 0;
177 		dpkt->pkt_csum_tx_stuff_off = 0;
178 
179 		SK_DF(SK_VERB_COPY | SK_VERB_TX,
180 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
181 		    sk_proc_name_address(current_proc()),
182 		    sk_proc_pid(current_proc()), len,
183 		    (copysum ? (len - start) : 0), csum, start);
184 		break;
185 
186 	default:
187 		VERIFY(0);
188 		/* NOTREACHED */
189 		__builtin_unreachable();
190 	}
191 	METADATA_ADJUST_LEN(dpkt, len, doff);
192 
193 	SK_DF(SK_VERB_COPY | SK_VERB_DUMP, "%s(%d) %s %s",
194 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
195 	    (t == NR_RX) ? "RX" : "TX",
196 	    sk_dump("buf", dbaddr, len, 128, NULL, 0));
197 }
198 
199 /*
200  * NOTE: soff is the offset within the packet
201  * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
202  * caller is responsible for further reducing it to 16-bit if needed,
203  * as well as to perform the final 1's complement on it.
204  */
205 uint32_t static inline
_pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * dbaddr,uint32_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)206 _pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
207     uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
208 {
209 	uint8_t odd = 0;
210 	uint8_t *sbaddr = NULL;
211 	uint32_t sum = initial_sum, partial;
212 	uint32_t len0 = len;
213 	boolean_t needs_swap, started_on_odd = FALSE;
214 	uint16_t clen, sboff, sblen, sbcnt, off0 = soff;
215 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
216 	kern_buflet_t sbuf = NULL, sbufp = NULL;
217 
218 	sbcnt = __packet_get_buflet_count(sph);
219 
220 	if (odd_start) {
221 		started_on_odd = *odd_start;
222 	}
223 
224 	/* fastpath (copy+sum, single buflet, even aligned, even length) */
225 	if (do_csum && sbcnt == 1 && len != 0) {
226 		PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
227 		ASSERT(sbuf != NULL);
228 		sboff = __buflet_get_data_offset(sbuf);
229 		sblen = __buflet_get_data_length(sbuf);
230 		ASSERT(sboff <= soff);
231 		ASSERT(soff < sboff + sblen);
232 		sblen -= (soff - sboff);
233 		sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
234 
235 		clen = (uint16_t)MIN(len, sblen);
236 
237 		if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
238 			sum = __packet_copy_and_sum(sbaddr, dbaddr, clen, sum);
239 			return __packet_fold_sum(sum);
240 		}
241 
242 		sbaddr = NULL;
243 		sbuf = sbufp = NULL;
244 	}
245 
246 	while (len != 0) {
247 		PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
248 		if (__improbable(sbuf == NULL)) {
249 			panic("%s: bad packet, 0x%llx [off %d, len %d]",
250 			    __func__, SK_KVA(spkt), off0, len0);
251 			/* NOTREACHED */
252 			__builtin_unreachable();
253 		}
254 		sbufp = sbuf;
255 		sboff = __buflet_get_data_offset(sbuf);
256 		sblen = __buflet_get_data_length(sbuf);
257 		ASSERT((sboff <= soff) && (soff < sboff + sblen));
258 		sblen -= (soff - sboff);
259 		sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
260 		soff = 0;
261 		clen = (uint16_t)MIN(len, sblen);
262 		if (__probable(do_csum)) {
263 			partial = 0;
264 			if (__improbable((uintptr_t)sbaddr & 1)) {
265 				/* Align on word boundary */
266 				started_on_odd = !started_on_odd;
267 #if BYTE_ORDER == LITTLE_ENDIAN
268 				partial = (uint8_t)*sbaddr << 8;
269 #else /* BYTE_ORDER != LITTLE_ENDIAN */
270 				partial = (uint8_t)*sbaddr;
271 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
272 				*dbaddr++ = *sbaddr++;
273 				sblen -= 1;
274 				clen -= 1;
275 				len -= 1;
276 			}
277 			needs_swap = started_on_odd;
278 
279 			odd = clen & 1u;
280 			clen -= odd;
281 
282 			if (clen != 0) {
283 				partial = __packet_copy_and_sum(sbaddr, dbaddr,
284 				    clen, partial);
285 			}
286 
287 			if (__improbable(partial & 0xc0000000)) {
288 				if (needs_swap) {
289 					partial = (partial << 8) +
290 					    (partial >> 24);
291 				}
292 				sum += (partial >> 16);
293 				sum += (partial & 0xffff);
294 				partial = 0;
295 			}
296 		} else {
297 			_pkt_copy(sbaddr, dbaddr, clen);
298 		}
299 
300 		dbaddr += clen;
301 		sbaddr += clen;
302 
303 		if (__probable(do_csum)) {
304 			if (odd != 0) {
305 #if BYTE_ORDER == LITTLE_ENDIAN
306 				partial += (uint8_t)*sbaddr;
307 #else /* BYTE_ORDER != LITTLE_ENDIAN */
308 				partial += (uint8_t)*sbaddr << 8;
309 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
310 				*dbaddr++ = *sbaddr++;
311 				started_on_odd = !started_on_odd;
312 			}
313 
314 			if (needs_swap) {
315 				partial = (partial << 8) + (partial >> 24);
316 			}
317 			sum += (partial >> 16) + (partial & 0xffff);
318 			/*
319 			 * Reduce sum to allow potential byte swap
320 			 * in the next iteration without carry.
321 			 */
322 			sum = (sum >> 16) + (sum & 0xffff);
323 		}
324 
325 		sblen -= clen + odd;
326 		len -= clen + odd;
327 		ASSERT(sblen == 0 || len == 0);
328 	}
329 
330 	if (odd_start) {
331 		*odd_start = started_on_odd;
332 	}
333 
334 	if (__probable(do_csum)) {
335 		/* Final fold (reduce 32-bit to 16-bit) */
336 		sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
337 		sum = (sum >> 16) + (sum & 0xffff);
338 	}
339 	return sum;
340 }
341 
342 /*
343  * NOTE: Caller of this function is responsible to adjust the length and offset
344  * of the first buflet of the destination packet if (doff != 0),
345  * i.e. additional data is being prependend to the packet.
346  * It should also finalize the packet.
347  * To simplify & optimize the routine, we have also assumed that soff & doff
348  * will lie within the first buffer, which is true for the current use cases
349  * where, doff is the offset of the checksum field in the TCP/IP header and
350  * soff is the L3 offset.
351  * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
352  * caller is responsible for further reducing it to 16-bit if needed,
353  * as well as to perform the final 1's complement on it.
354  */
355 static inline boolean_t
_pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint32_t len,uint32_t * csum_partial,boolean_t do_csum)356 _pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
357     uint16_t doff, uint32_t len, uint32_t *csum_partial, boolean_t do_csum)
358 {
359 	uint8_t odd = 0;
360 	uint32_t sum = 0, partial;
361 	boolean_t needs_swap, started_on_odd = FALSE;
362 	uint8_t *sbaddr = NULL, *dbaddr = NULL;
363 	uint16_t clen, sblen, dlen0, dlim, sbcnt, dbcnt, sboff;
364 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
365 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
366 	kern_buflet_t sbuf = NULL, sbufp = NULL, dbuf = NULL, dbufp = NULL;
367 
368 	ASSERT(csum_partial != NULL || !do_csum);
369 	sbcnt = __packet_get_buflet_count(sph);
370 	dbcnt = __packet_get_buflet_count(dph);
371 
372 	while (len != 0) {
373 		ASSERT(sbaddr == NULL || dbaddr == NULL);
374 		if (sbaddr == NULL) {
375 			PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
376 			if (__improbable(sbuf == NULL)) {
377 				break;
378 			}
379 			sbufp = sbuf;
380 			sblen = __buflet_get_data_length(sbuf);
381 			sboff = __buflet_get_data_offset(sbuf);
382 			ASSERT(soff >= sboff);
383 			ASSERT(sboff + sblen > soff);
384 			sblen -= (soff - sboff);
385 			sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
386 			soff = 0;
387 		}
388 
389 		if (dbaddr == NULL) {
390 			if (dbufp != NULL) {
391 				__buflet_set_data_length(dbufp, dlen0);
392 			}
393 
394 			PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
395 			if (__improbable(dbuf == NULL)) {
396 				break;
397 			}
398 			dbufp = dbuf;
399 			dlim = __buflet_get_data_limit(dbuf);
400 			ASSERT(dlim > doff);
401 			dlim -= doff;
402 			if (doff != 0) {
403 				VERIFY(__buflet_set_data_offset(dbuf, doff) == 0);
404 			}
405 			dbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(dbuf) + doff);
406 			dlen0 = dlim;
407 			doff = 0;
408 		}
409 
410 		clen = (uint16_t)MIN(len, sblen);
411 		clen = MIN(clen, dlim);
412 
413 		if (__probable(do_csum)) {
414 			partial = 0;
415 			if (__improbable((uintptr_t)sbaddr & 1)) {
416 				/* Align on word boundary */
417 				started_on_odd = !started_on_odd;
418 #if BYTE_ORDER == LITTLE_ENDIAN
419 				partial = (uint8_t)*sbaddr << 8;
420 #else /* BYTE_ORDER != LITTLE_ENDIAN */
421 				partial = (uint8_t)*sbaddr;
422 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
423 				*dbaddr++ = *sbaddr++;
424 				clen -= 1;
425 				dlim -= 1;
426 				len -= 1;
427 			}
428 			needs_swap = started_on_odd;
429 
430 			odd = clen & 1u;
431 			clen -= odd;
432 
433 			if (clen != 0) {
434 				partial = __packet_copy_and_sum(sbaddr, dbaddr,
435 				    clen, partial);
436 			}
437 
438 			if (__improbable(partial & 0xc0000000)) {
439 				if (needs_swap) {
440 					partial = (partial << 8) +
441 					    (partial >> 24);
442 				}
443 				sum += (partial >> 16);
444 				sum += (partial & 0xffff);
445 				partial = 0;
446 			}
447 		} else {
448 			_pkt_copy(sbaddr, dbaddr, clen);
449 		}
450 		sbaddr += clen;
451 		dbaddr += clen;
452 
453 		if (__probable(do_csum)) {
454 			if (odd != 0) {
455 #if BYTE_ORDER == LITTLE_ENDIAN
456 				partial += (uint8_t)*sbaddr;
457 #else /* BYTE_ORDER != LITTLE_ENDIAN */
458 				partial += (uint8_t)*sbaddr << 8;
459 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
460 				*dbaddr++ = *sbaddr++;
461 				started_on_odd = !started_on_odd;
462 			}
463 
464 			if (needs_swap) {
465 				partial = (partial << 8) + (partial >> 24);
466 			}
467 			sum += (partial >> 16) + (partial & 0xffff);
468 			/*
469 			 * Reduce sum to allow potential byte swap
470 			 * in the next iteration without carry.
471 			 */
472 			sum = (sum >> 16) + (sum & 0xffff);
473 		}
474 
475 		sblen -= clen + odd;
476 		dlim -= clen + odd;
477 		len -= clen + odd;
478 
479 		if (sblen == 0) {
480 			sbaddr = NULL;
481 		}
482 
483 		if (dlim == 0) {
484 			dbaddr = NULL;
485 		}
486 	}
487 
488 	if (__probable(dbuf != NULL)) {
489 		__buflet_set_data_length(dbuf, (dlen0 - dlim));
490 	}
491 	if (__probable(do_csum)) {
492 		/* Final fold (reduce 32-bit to 16-bit) */
493 		sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
494 		sum = (sum >> 16) + (sum & 0xffff);
495 		*csum_partial = (uint32_t)sum;
496 	}
497 	return len == 0;
498 }
499 
500 uint32_t
pkt_sum(kern_packet_t sph,uint16_t soff,uint16_t len)501 pkt_sum(kern_packet_t sph, uint16_t soff, uint16_t len)
502 {
503 	uint8_t odd = 0;
504 	uint32_t sum = 0, partial;
505 	boolean_t needs_swap, started_on_odd = FALSE;
506 	uint8_t *sbaddr = NULL;
507 	uint16_t clen, sblen, sbcnt, sboff;
508 	struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
509 	kern_buflet_t sbuf = NULL, sbufp = NULL;
510 
511 	sbcnt = __packet_get_buflet_count(sph);
512 
513 	/* fastpath (single buflet, even aligned, even length) */
514 	if (sbcnt == 1 && len != 0) {
515 		PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
516 		ASSERT(sbuf != NULL);
517 		sblen = __buflet_get_data_length(sbuf);
518 		sboff = __buflet_get_data_offset(sbuf);
519 		ASSERT(soff >= sboff);
520 		ASSERT(sboff + sblen > soff);
521 		sblen -= (soff - sboff);
522 		sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
523 
524 		clen = MIN(len, sblen);
525 
526 		if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
527 			sum = __packet_cksum(sbaddr, clen, 0);
528 			return __packet_fold_sum(sum);
529 		}
530 
531 		sbaddr = NULL;
532 		sbuf = sbufp = NULL;
533 	}
534 
535 	/* slowpath */
536 	while (len != 0) {
537 		ASSERT(sbaddr == NULL);
538 		if (sbaddr == NULL) {
539 			PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
540 			if (__improbable(sbuf == NULL)) {
541 				break;
542 			}
543 			sbufp = sbuf;
544 			sblen = __buflet_get_data_length(sbuf);
545 			sboff = __buflet_get_data_offset(sbuf);
546 			ASSERT(soff >= sboff);
547 			ASSERT(sboff + sblen > soff);
548 			sblen -= (soff - sboff);
549 			sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(sbuf) + soff);
550 			soff = 0;
551 		}
552 
553 		clen = MIN(len, sblen);
554 
555 		partial = 0;
556 		if (__improbable((uintptr_t)sbaddr & 1)) {
557 			/* Align on word boundary */
558 			started_on_odd = !started_on_odd;
559 #if BYTE_ORDER == LITTLE_ENDIAN
560 			partial = (uint8_t)*sbaddr << 8;
561 #else /* BYTE_ORDER != LITTLE_ENDIAN */
562 			partial = (uint8_t)*sbaddr;
563 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
564 			clen -= 1;
565 			len -= 1;
566 		}
567 		needs_swap = started_on_odd;
568 
569 		odd = clen & 1u;
570 		clen -= odd;
571 
572 		if (clen != 0) {
573 			partial = __packet_cksum(sbaddr,
574 			    clen, partial);
575 		}
576 
577 		if (__improbable(partial & 0xc0000000)) {
578 			if (needs_swap) {
579 				partial = (partial << 8) +
580 				    (partial >> 24);
581 			}
582 			sum += (partial >> 16);
583 			sum += (partial & 0xffff);
584 			partial = 0;
585 		}
586 		sbaddr += clen;
587 
588 		if (odd != 0) {
589 #if BYTE_ORDER == LITTLE_ENDIAN
590 			partial += (uint8_t)*sbaddr;
591 #else /* BYTE_ORDER != LITTLE_ENDIAN */
592 			partial += (uint8_t)*sbaddr << 8;
593 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
594 			started_on_odd = !started_on_odd;
595 		}
596 
597 		if (needs_swap) {
598 			partial = (partial << 8) + (partial >> 24);
599 		}
600 		sum += (partial >> 16) + (partial & 0xffff);
601 		/*
602 		 * Reduce sum to allow potential byte swap
603 		 * in the next iteration without carry.
604 		 */
605 		sum = (sum >> 16) + (sum & 0xffff);
606 
607 		sblen -= clen + odd;
608 		len -= clen + odd;
609 
610 		if (sblen == 0) {
611 			sbaddr = NULL;
612 		}
613 	}
614 
615 	/* Final fold (reduce 32-bit to 16-bit) */
616 	sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
617 	sum = (sum >> 16) + (sum & 0xffff);
618 	return (uint32_t)sum;
619 }
620 
621 
622 /*
623  * This is a multi-buflet variant of pkt_copy_from_pkt().
624  *
625  * start/stuff is relative to soff, within [0, len], such that
626  * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
627  */
628 void
pkt_copy_multi_buflet_from_pkt(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t copysum,const uint16_t start,const uint16_t stuff,const boolean_t invert)629 pkt_copy_multi_buflet_from_pkt(const enum txrx t, kern_packet_t dph,
630     const uint16_t doff, kern_packet_t sph, const uint16_t soff,
631     const uint32_t len, const boolean_t copysum, const uint16_t start,
632     const uint16_t stuff, const boolean_t invert)
633 {
634 	boolean_t rc;
635 	uint32_t partial;
636 	uint16_t csum = 0;
637 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
638 
639 	VERIFY((doff + len) <=
640 	    (dpkt->pkt_qum.qum_pp->pp_buflet_size *
641 	    __packet_get_buflet_count(dph)));
642 
643 	switch (t) {
644 	case NR_RX:
645 		dpkt->pkt_csum_flags = 0;
646 		if (__probable(copysum)) {
647 			/*
648 			 * copy the portion up to the point where we need to
649 			 * start the checksum, and copy the remainder,
650 			 * checksumming as we go.
651 			 */
652 			if (__probable(start != 0)) {
653 				rc = _pkt_copypkt_sum(sph, soff, dph, doff,
654 				    start, NULL, FALSE);
655 				ASSERT(rc);
656 			}
657 			_pkt_copypkt_sum(sph, (soff + start), dph,
658 			    (doff + start), (len - start), &partial, TRUE);
659 			csum = __packet_fold_sum(partial);
660 			__packet_set_inet_checksum(dph, PACKET_CSUM_PARTIAL,
661 			    start, csum, FALSE);
662 			METADATA_ADJUST_LEN(dpkt, start, doff);
663 		} else {
664 			rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
665 			    FALSE);
666 			ASSERT(rc);
667 			dpkt->pkt_csum_rx_start_off = 0;
668 			dpkt->pkt_csum_rx_value = 0;
669 		}
670 		break;
671 
672 	case NR_TX:
673 		if (__probable(copysum)) {
674 			uint8_t *baddr;
675 			/*
676 			 * copy the portion up to the point where we need to
677 			 * start the checksum, and copy the remainder,
678 			 * checksumming as we go.
679 			 */
680 			if (__probable(start != 0)) {
681 				rc = _pkt_copypkt_sum(sph, soff, dph, doff,
682 				    start, NULL, FALSE);
683 				ASSERT(rc);
684 			}
685 			rc = _pkt_copypkt_sum(sph, (soff + start), dph,
686 			    (doff + start), (len - start), &partial, TRUE);
687 			ASSERT(rc);
688 			csum = __packet_fold_sum_final(partial);
689 
690 			/* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
691 			if (csum == 0 && invert) {
692 				csum = 0xffff;
693 			}
694 
695 			/*
696 			 * Insert checksum into packet.
697 			 * Here we assume that checksum will be in the
698 			 * first buffer.
699 			 */
700 			ASSERT((stuff + doff + sizeof(csum)) <=
701 			    dpkt->pkt_qum.qum_pp->pp_buflet_size);
702 			ASSERT(stuff <= (len - sizeof(csum)));
703 
704 			/* get first buflet buffer address from packet */
705 			MD_BUFLET_ADDR_ABS(dpkt, baddr);
706 			ASSERT(baddr != NULL);
707 			baddr += doff;
708 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
709 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
710 			} else {
711 				bcopy((void *)&csum, baddr + stuff,
712 				    sizeof(csum));
713 			}
714 			METADATA_ADJUST_LEN(dpkt, start, doff);
715 		} else {
716 			rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
717 			    FALSE);
718 			ASSERT(rc);
719 		}
720 		dpkt->pkt_csum_flags = 0;
721 		dpkt->pkt_csum_tx_start_off = 0;
722 		dpkt->pkt_csum_tx_stuff_off = 0;
723 		break;
724 
725 	default:
726 		VERIFY(0);
727 		/* NOTREACHED */
728 		__builtin_unreachable();
729 	}
730 }
731 
732 /*
733  * This routine is used for copying an mbuf which originated in the host
734  * stack destined to a native skywalk interface (NR_TX), as well as for
735  * mbufs originating on compat network interfaces (NR_RX).
736  *
737  * start/stuff is relative to moff, within [0, len], such that
738  * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
739  */
740 void
pkt_copy_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)741 pkt_copy_from_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
742     struct mbuf *m, const uint16_t moff, const uint32_t len,
743     const boolean_t copysum, const uint16_t start)
744 {
745 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
746 	uint32_t partial;
747 	uint16_t csum = 0;
748 	uint8_t *baddr;
749 
750 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
751 
752 	/* get buffer address from packet */
753 	MD_BUFLET_ADDR_ABS(pkt, baddr);
754 	ASSERT(baddr != NULL);
755 	baddr += poff;
756 	VERIFY((poff + len) <= pkt->pkt_qum.qum_pp->pp_buflet_size);
757 
758 	switch (t) {
759 	case NR_RX:
760 		pkt->pkt_csum_flags = 0;
761 		pkt->pkt_svc_class = m_get_service_class(m);
762 		if (__probable(copysum)) {
763 			/*
764 			 * Use m_copydata() to copy the portion up to the
765 			 * point where we need to start the checksum, and
766 			 * copy the remainder, checksumming as we go.
767 			 */
768 			if (start != 0) {
769 				m_copydata(m, moff, start, baddr);
770 			}
771 			partial = m_copydata_sum(m, start, (len - start),
772 			    (baddr + start), 0, NULL);
773 			csum = __packet_fold_sum(partial);
774 
775 			__packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
776 			    start, csum, FALSE);
777 		} else {
778 			m_copydata(m, moff, len, baddr);
779 			pkt->pkt_csum_rx_start_off = 0;
780 			pkt->pkt_csum_rx_value = 0;
781 		}
782 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
783 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
784 		    sk_proc_name_address(current_proc()),
785 		    sk_proc_pid(current_proc()), len,
786 		    (copysum ? (len - start) : 0), csum, start);
787 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
788 		    "   mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
789 		    SK_KVA(m), m->m_pkthdr.csum_flags,
790 		    (uint32_t)m->m_pkthdr.csum_rx_start,
791 		    (uint32_t)m->m_pkthdr.csum_rx_val);
792 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
793 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
794 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
795 		    (uint32_t)pkt->pkt_csum_rx_start_off,
796 		    (uint32_t)pkt->pkt_csum_rx_value);
797 		break;
798 
799 	case NR_TX:
800 		if (__probable(copysum)) {
801 			uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
802 			/*
803 			 * Use m_copydata() to copy the portion up to the
804 			 * point where we need to start the checksum, and
805 			 * copy the remainder, checksumming as we go.
806 			 */
807 			if (start != 0) {
808 				m_copydata(m, moff, start, baddr);
809 			}
810 			partial = m_copydata_sum(m, start, (len - start),
811 			    (baddr + start), 0, NULL);
812 			csum = __packet_fold_sum_final(partial);
813 
814 			/*
815 			 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
816 			 * ideally we'd only test for CSUM_ZERO_INVERT
817 			 * here, but catch cases where the originator
818 			 * did not set it for UDP.
819 			 */
820 			if (csum == 0 && (m->m_pkthdr.csum_flags &
821 			    (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
822 				csum = 0xffff;
823 			}
824 
825 			/* Insert checksum into packet */
826 			ASSERT(stuff <= (len - sizeof(csum)));
827 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
828 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
829 			} else {
830 				bcopy((void *)&csum, baddr + stuff,
831 				    sizeof(csum));
832 			}
833 		} else {
834 			m_copydata(m, moff, len, baddr);
835 		}
836 		pkt->pkt_csum_flags = 0;
837 		pkt->pkt_csum_tx_start_off = 0;
838 		pkt->pkt_csum_tx_stuff_off = 0;
839 
840 		/* translate mbuf metadata */
841 		pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
842 		pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
843 		pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
844 		switch (m->m_pkthdr.pkt_proto) {
845 		case IPPROTO_QUIC:
846 			pkt->pkt_flow_ip_proto = IPPROTO_UDP;
847 			pkt->pkt_transport_protocol = IPPROTO_QUIC;
848 			break;
849 
850 		default:
851 			pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
852 			pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
853 			break;
854 		}
855 		(void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
856 		pkt->pkt_svc_class = m_get_service_class(m);
857 		pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
858 		pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
859 		if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
860 			pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
861 		}
862 		necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
863 		pkt->pkt_policy_id =
864 		    (uint32_t)necp_get_policy_id_from_packet(m);
865 
866 		if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
867 			if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
868 				__packet_set_tx_completion_data(ph,
869 				    m->m_pkthdr.drv_tx_compl_arg,
870 				    m->m_pkthdr.drv_tx_compl_data);
871 			}
872 			pkt->pkt_tx_compl_context =
873 			    m->m_pkthdr.pkt_compl_context;
874 			pkt->pkt_tx_compl_callbacks =
875 			    m->m_pkthdr.pkt_compl_callbacks;
876 			/*
877 			 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
878 			 * mbuf can no longer trigger a completion callback.
879 			 * callback will be invoked when the kernel packet is
880 			 * completed.
881 			 */
882 			m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
883 
884 			m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
885 		}
886 
887 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
888 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
889 		    sk_proc_name_address(current_proc()),
890 		    sk_proc_pid(current_proc()), len,
891 		    (copysum ? (len - start) : 0), csum, start);
892 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
893 		    "   mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
894 		    SK_KVA(m), m->m_pkthdr.csum_flags,
895 		    (uint32_t)m->m_pkthdr.csum_tx_start,
896 		    (uint32_t)m->m_pkthdr.csum_tx_stuff);
897 		break;
898 
899 	default:
900 		VERIFY(0);
901 		/* NOTREACHED */
902 		__builtin_unreachable();
903 	}
904 	METADATA_ADJUST_LEN(pkt, len, poff);
905 
906 	if (m->m_flags & M_BCAST) {
907 		__packet_set_link_broadcast(ph);
908 	} else if (m->m_flags & M_MCAST) {
909 		__packet_set_link_multicast(ph);
910 	}
911 
912 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
913 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
914 	    (t == NR_RX) ? "RX" : "TX",
915 	    sk_dump("buf", baddr, len, 128, NULL, 0));
916 }
917 
918 /*
919  * Like m_copydata_sum(), but works on a destination kernel packet.
920  */
921 static inline uint32_t
m_copypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint32_t len,boolean_t do_cscum)922 m_copypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
923     uint32_t len, boolean_t do_cscum)
924 {
925 	boolean_t needs_swap, started_on_odd = FALSE;
926 	int off0 = soff;
927 	uint32_t len0 = len;
928 	struct mbuf *m0 = m;
929 	uint32_t sum = 0, partial;
930 	unsigned count0, count, odd, mlen_copied;
931 	uint8_t *sbaddr = NULL, *dbaddr = NULL;
932 	uint16_t dlen0, dlim, dbcnt = __packet_get_buflet_count(dph);
933 	struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
934 	kern_buflet_t dbuf = NULL, dbufp = NULL;
935 
936 	while (soff > 0) {
937 		if (__improbable(m == NULL)) {
938 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
939 			    __func__, m0, off0, len0);
940 			/* NOTREACHED */
941 			__builtin_unreachable();
942 		}
943 		if (soff < m->m_len) {
944 			break;
945 		}
946 		soff -= m->m_len;
947 		m = m->m_next;
948 	}
949 
950 	if (__improbable(m == NULL)) {
951 		panic("%s: invalid mbuf chain %p [off %d, len %d]",
952 		    __func__, m0, off0, len0);
953 		/* NOTREACHED */
954 		__builtin_unreachable();
955 	}
956 
957 	sbaddr = mtod(m, uint8_t *) + soff;
958 	count = m->m_len - soff;
959 	mlen_copied = 0;
960 
961 	while (len != 0) {
962 		ASSERT(sbaddr == NULL || dbaddr == NULL);
963 		if (sbaddr == NULL) {
964 			soff = 0;
965 			m = m->m_next;
966 			if (__improbable(m == NULL)) {
967 				panic("%s: invalid mbuf chain %p [off %d, "
968 				    "len %d]", __func__, m0, off0, len0);
969 				/* NOTREACHED */
970 				__builtin_unreachable();
971 			}
972 			sbaddr = mtod(m, uint8_t *);
973 			count = m->m_len;
974 			mlen_copied = 0;
975 		}
976 
977 		if (__improbable(count == 0)) {
978 			sbaddr = NULL;
979 			continue;
980 		}
981 
982 		if (dbaddr == NULL) {
983 			if (dbufp != NULL) {
984 				__buflet_set_data_length(dbufp, dlen0);
985 			}
986 
987 			PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
988 			if (__improbable(dbuf == NULL)) {
989 				panic("%s: mbuf too large %p [off %d, "
990 				    "len %d]", __func__, m0, off0, len0);
991 				/* NOTREACHED */
992 				__builtin_unreachable();
993 			}
994 			dbufp = dbuf;
995 			dlim = __buflet_get_data_limit(dbuf) - doff;
996 			dbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(dbuf) + doff);
997 			dlen0 = dlim;
998 			doff = 0;
999 		}
1000 
1001 		count = MIN(count, (unsigned)len);
1002 		count0 = count = MIN(count, dlim);
1003 
1004 		if (!do_cscum) {
1005 			_pkt_copy(sbaddr, dbaddr, count);
1006 			sbaddr += count;
1007 			dbaddr += count;
1008 			goto skip_csum;
1009 		}
1010 
1011 		partial = 0;
1012 		if ((uintptr_t)sbaddr & 1) {
1013 			/* Align on word boundary */
1014 			started_on_odd = !started_on_odd;
1015 #if BYTE_ORDER == LITTLE_ENDIAN
1016 			partial = *sbaddr << 8;
1017 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1018 			partial = *sbaddr;
1019 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1020 			*dbaddr++ = *sbaddr++;
1021 			count -= 1;
1022 		}
1023 
1024 		needs_swap = started_on_odd;
1025 		odd = count & 1u;
1026 		count -= odd;
1027 
1028 		if (count) {
1029 			partial = __packet_copy_and_sum(sbaddr,
1030 			    dbaddr, count, partial);
1031 			sbaddr += count;
1032 			dbaddr += count;
1033 			if (__improbable(partial & 0xc0000000)) {
1034 				if (needs_swap) {
1035 					partial = (partial << 8) +
1036 					    (partial >> 24);
1037 				}
1038 				sum += (partial >> 16);
1039 				sum += (partial & 0xffff);
1040 				partial = 0;
1041 			}
1042 		}
1043 
1044 		if (odd) {
1045 #if BYTE_ORDER == LITTLE_ENDIAN
1046 			partial += *sbaddr;
1047 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1048 			partial += *sbaddr << 8;
1049 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1050 			*dbaddr++ = *sbaddr++;
1051 			started_on_odd = !started_on_odd;
1052 		}
1053 
1054 		if (needs_swap) {
1055 			partial = (partial << 8) + (partial >> 24);
1056 		}
1057 		sum += (partial >> 16) + (partial & 0xffff);
1058 		/*
1059 		 * Reduce sum to allow potential byte swap
1060 		 * in the next iteration without carry.
1061 		 */
1062 		sum = (sum >> 16) + (sum & 0xffff);
1063 
1064 skip_csum:
1065 		dlim -= count0;
1066 		len -= count0;
1067 		mlen_copied += count0;
1068 
1069 		if (dlim == 0) {
1070 			dbaddr = NULL;
1071 		}
1072 
1073 		count = m->m_len - soff - mlen_copied;
1074 		if (count == 0) {
1075 			sbaddr = NULL;
1076 		}
1077 	}
1078 
1079 	ASSERT(len == 0);
1080 	ASSERT(dbuf != NULL);
1081 	__buflet_set_data_length(dbuf, (dlen0 - dlim));
1082 
1083 	if (!do_cscum) {
1084 		return 0;
1085 	}
1086 
1087 	/* Final fold (reduce 32-bit to 16-bit) */
1088 	sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
1089 	sum = (sum >> 16) + (sum & 0xffff);
1090 	return sum;
1091 }
1092 
1093 /*
1094  * This is a multi-buflet variant of pkt_copy_from_mbuf().
1095  *
1096  * start/stuff is relative to moff, within [0, len], such that
1097  * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
1098  */
1099 void
pkt_copy_multi_buflet_from_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1100 pkt_copy_multi_buflet_from_mbuf(const enum txrx t, kern_packet_t ph,
1101     const uint16_t poff, struct mbuf *m, const uint16_t moff,
1102     const uint32_t len, const boolean_t copysum, const uint16_t start)
1103 {
1104 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1105 	uint32_t partial;
1106 	uint16_t csum = 0;
1107 	uint8_t *baddr;
1108 
1109 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
1110 
1111 	/* get buffer address from packet */
1112 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1113 	ASSERT(baddr != NULL);
1114 	baddr += poff;
1115 	VERIFY((poff + len) <= (pkt->pkt_qum.qum_pp->pp_buflet_size *
1116 	    __packet_get_buflet_count(ph)));
1117 
1118 	switch (t) {
1119 	case NR_RX:
1120 		pkt->pkt_csum_flags = 0;
1121 		if (__probable(copysum)) {
1122 			/*
1123 			 * Use m_copydata() to copy the portion up to the
1124 			 * point where we need to start the checksum, and
1125 			 * copy the remainder, checksumming as we go.
1126 			 */
1127 			if (start != 0) {
1128 				m_copydata(m, moff, start, baddr);
1129 			}
1130 			partial = m_copypkt_sum(m, start, ph, (poff + start),
1131 			    (len - start), TRUE);
1132 			csum = __packet_fold_sum(partial);
1133 			__packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
1134 			    start, csum, FALSE);
1135 			METADATA_ADJUST_LEN(pkt, start, poff);
1136 		} else {
1137 			(void) m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1138 			pkt->pkt_csum_rx_start_off = 0;
1139 			pkt->pkt_csum_rx_value = 0;
1140 		}
1141 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1142 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1143 		    sk_proc_name_address(current_proc()),
1144 		    sk_proc_pid(current_proc()), len,
1145 		    (copysum ? (len - start) : 0), csum, start);
1146 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1147 		    "   mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
1148 		    SK_KVA(m), m->m_pkthdr.csum_flags,
1149 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1150 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1151 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1152 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1153 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1154 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1155 		    (uint32_t)pkt->pkt_csum_rx_value);
1156 		break;
1157 
1158 	case NR_TX:
1159 		if (__probable(copysum)) {
1160 			uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
1161 			/*
1162 			 * Use m_copydata() to copy the portion up to the
1163 			 * point where we need to start the checksum, and
1164 			 * copy the remainder, checksumming as we go.
1165 			 */
1166 			if (start != 0) {
1167 				m_copydata(m, moff, start, baddr);
1168 			}
1169 			partial = m_copypkt_sum(m, start, ph, (poff + start),
1170 			    (len - start), TRUE);
1171 			csum = __packet_fold_sum_final(partial);
1172 
1173 			/*
1174 			 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
1175 			 * ideally we'd only test for CSUM_ZERO_INVERT
1176 			 * here, but catch cases where the originator
1177 			 * did not set it for UDP.
1178 			 */
1179 			if (csum == 0 && (m->m_pkthdr.csum_flags &
1180 			    (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
1181 				csum = 0xffff;
1182 			}
1183 
1184 			/* Insert checksum into packet */
1185 			ASSERT(stuff <= (len - sizeof(csum)));
1186 			if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
1187 				*(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
1188 			} else {
1189 				bcopy((void *)&csum, baddr + stuff,
1190 				    sizeof(csum));
1191 			}
1192 			METADATA_ADJUST_LEN(pkt, start, poff);
1193 		} else {
1194 			m_copypkt_sum(m, moff, ph, poff, len, FALSE);
1195 		}
1196 		pkt->pkt_csum_flags = 0;
1197 		pkt->pkt_csum_tx_start_off = 0;
1198 		pkt->pkt_csum_tx_stuff_off = 0;
1199 
1200 		/* translate mbuf metadata */
1201 		pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
1202 		pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
1203 		pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
1204 		switch (m->m_pkthdr.pkt_proto) {
1205 		case IPPROTO_QUIC:
1206 			pkt->pkt_flow_ip_proto = IPPROTO_UDP;
1207 			pkt->pkt_transport_protocol = IPPROTO_QUIC;
1208 			break;
1209 
1210 		default:
1211 			pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
1212 			pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
1213 			break;
1214 		}
1215 		(void) mbuf_get_timestamp(m, &pkt->pkt_timestamp, NULL);
1216 		pkt->pkt_svc_class = m_get_service_class(m);
1217 		pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
1218 		pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
1219 		if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
1220 			pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
1221 		}
1222 		necp_get_app_uuid_from_packet(m, pkt->pkt_policy_euuid);
1223 		pkt->pkt_policy_id =
1224 		    (uint32_t)necp_get_policy_id_from_packet(m);
1225 
1226 		if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
1227 			if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
1228 				__packet_set_tx_completion_data(ph,
1229 				    m->m_pkthdr.drv_tx_compl_arg,
1230 				    m->m_pkthdr.drv_tx_compl_data);
1231 			}
1232 			pkt->pkt_tx_compl_context =
1233 			    m->m_pkthdr.pkt_compl_context;
1234 			pkt->pkt_tx_compl_callbacks =
1235 			    m->m_pkthdr.pkt_compl_callbacks;
1236 			/*
1237 			 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
1238 			 * mbuf can no longer trigger a completion callback.
1239 			 * callback will be invoked when the kernel packet is
1240 			 * completed.
1241 			 */
1242 			m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
1243 
1244 			m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
1245 		}
1246 
1247 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1248 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1249 		    sk_proc_name_address(current_proc()),
1250 		    sk_proc_pid(current_proc()), len,
1251 		    (copysum ? (len - start) : 0), csum, start);
1252 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1253 		    "   mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
1254 		    SK_KVA(m), m->m_pkthdr.csum_flags,
1255 		    (uint32_t)m->m_pkthdr.csum_tx_start,
1256 		    (uint32_t)m->m_pkthdr.csum_tx_stuff);
1257 		break;
1258 
1259 	default:
1260 		VERIFY(0);
1261 		/* NOTREACHED */
1262 		__builtin_unreachable();
1263 	}
1264 
1265 	if (m->m_flags & M_BCAST) {
1266 		__packet_set_link_broadcast(ph);
1267 	} else if (m->m_flags & M_MCAST) {
1268 		__packet_set_link_multicast(ph);
1269 	}
1270 
1271 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1272 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1273 	    (t == NR_RX) ? "RX" : "TX",
1274 	    sk_dump("buf", baddr, len, 128, NULL, 0));
1275 }
1276 
1277 /*
1278  * This routine is used for copying from a packet originating from a native
1279  * skywalk interface to an mbuf destined for the host legacy stack (NR_RX),
1280  * as well as for mbufs destined for the compat network interfaces (NR_TX).
1281  *
1282  * Note that this routine does not alter m_data pointer of the mbuf, as the
1283  * caller may want to use the original value upon return.  We do, however,
1284  * adjust the length to reflect the total data span.
1285  *
1286  * start/stuff is relative to poff, within [0, len], such that
1287  * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1288  */
1289 void
pkt_copy_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1290 pkt_copy_to_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
1291     struct mbuf *m, const uint16_t moff, const uint32_t len,
1292     const boolean_t copysum, const uint16_t start)
1293 {
1294 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1295 	uint32_t partial;
1296 	uint16_t csum = 0;
1297 	uint8_t *baddr;
1298 	uint8_t *dp;
1299 
1300 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
1301 
1302 	/* get buffer address from packet */
1303 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1304 	ASSERT(baddr != NULL);
1305 	baddr += poff;
1306 	VERIFY((poff + len) <= pkt->pkt_qum.qum_pp->pp_buflet_size);
1307 
1308 	ASSERT((m->m_flags & M_PKTHDR) && m->m_next == NULL);
1309 	m->m_data += moff;
1310 	dp = (uint8_t *)m->m_data;
1311 	VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1312 	    (uint32_t)mbuf_maxlen(m));
1313 	m->m_len += len;
1314 	m->m_pkthdr.len += len;
1315 	VERIFY(m->m_len == m->m_pkthdr.len &&
1316 	    (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1317 
1318 	switch (t) {
1319 	case NR_RX:
1320 		m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1321 		if (__probable(copysum)) {
1322 			/*
1323 			 * Use pkt_copy() to copy the portion up to the
1324 			 * point where we need to start the checksum, and
1325 			 * copy the remainder, checksumming as we go.
1326 			 */
1327 			if (__probable(start != 0)) {
1328 				_pkt_copy(baddr, dp, start);
1329 			}
1330 			partial = __packet_copy_and_sum((baddr + start),
1331 			    (dp + start), (len - start), 0);
1332 			csum = __packet_fold_sum(partial);
1333 
1334 			m->m_pkthdr.csum_flags |=
1335 			    (CSUM_DATA_VALID | CSUM_PARTIAL);
1336 			m->m_pkthdr.csum_rx_start = start;
1337 			m->m_pkthdr.csum_rx_val = csum;
1338 		} else {
1339 			_pkt_copy(baddr, dp, len);
1340 			m->m_pkthdr.csum_rx_start = 0;
1341 			m->m_pkthdr.csum_rx_val = 0;
1342 		}
1343 
1344 		/* translate packet metadata */
1345 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1346 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1347 
1348 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1349 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1350 		    sk_proc_name_address(current_proc()),
1351 		    sk_proc_pid(current_proc()), len,
1352 		    (copysum ? (len - start) : 0), csum, start);
1353 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1354 		    "   mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1355 		    SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1356 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1357 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1358 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1359 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1360 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1361 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1362 		    (uint32_t)pkt->pkt_csum_rx_value);
1363 		break;
1364 
1365 	case NR_TX:
1366 		if (__probable(copysum)) {
1367 			uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1368 			/*
1369 			 * Use pkt_copy() to copy the portion up to the
1370 			 * point where we need to start the checksum, and
1371 			 * copy the remainder, checksumming as we go.
1372 			 */
1373 			if (__probable(start != 0)) {
1374 				_pkt_copy(baddr, dp, start);
1375 			}
1376 			partial = __packet_copy_and_sum((baddr + start),
1377 			    (dp + start), (len - start), 0);
1378 			csum = __packet_fold_sum_final(partial);
1379 
1380 			/* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1381 			if (csum == 0 &&
1382 			    (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1383 				csum = 0xffff;
1384 			}
1385 
1386 			/* Insert checksum into packet */
1387 			ASSERT(stuff <= (len - sizeof(csum)));
1388 			if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1389 				*(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1390 			} else {
1391 				bcopy((void *)&csum, dp + stuff, sizeof(csum));
1392 			}
1393 		} else {
1394 			_pkt_copy(baddr, dp, len);
1395 		}
1396 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1397 		m->m_pkthdr.csum_tx_start = 0;
1398 		m->m_pkthdr.csum_tx_stuff = 0;
1399 
1400 		/* translate packet metadata */
1401 		m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1402 		m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1403 		m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1404 		m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1405 		m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1406 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1407 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1408 		m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1409 		m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1410 		if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1411 			m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1412 		}
1413 
1414 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1415 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1416 		    sk_proc_name_address(current_proc()),
1417 		    sk_proc_pid(current_proc()), len,
1418 		    (copysum ? (len - start) : 0), csum, start);
1419 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1420 		    "   pkt  0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1421 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1422 		    (uint32_t)pkt->pkt_csum_tx_start_off,
1423 		    (uint32_t)pkt->pkt_csum_tx_stuff_off);
1424 		break;
1425 
1426 	default:
1427 		VERIFY(0);
1428 		/* NOTREACHED */
1429 		__builtin_unreachable();
1430 	}
1431 
1432 	if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1433 		m->m_flags |= M_BCAST;
1434 	} else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1435 		m->m_flags |= M_MCAST;
1436 	}
1437 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1438 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1439 	    (t == NR_RX) ? "RX" : "TX",
1440 	    sk_dump("buf", (uint8_t *)dp, m->m_pkthdr.len, 128, NULL, 0));
1441 }
1442 
1443 /*
1444  * This is a multi-buflet variant of pkt_copy_to_mbuf().
1445  * NOTE: poff is the offset within the packet.
1446  *
1447  * start/stuff is relative to poff, within [0, len], such that
1448  * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1449  */
1450 void
pkt_copy_multi_buflet_to_mbuf(const enum txrx t,kern_packet_t ph,const uint16_t poff,struct mbuf * m,const uint16_t moff,const uint32_t len,const boolean_t copysum,const uint16_t start)1451 pkt_copy_multi_buflet_to_mbuf(const enum txrx t, kern_packet_t ph,
1452     const uint16_t poff, struct mbuf *m, const uint16_t moff,
1453     const uint32_t len, const boolean_t copysum, const uint16_t start)
1454 {
1455 #pragma unused(moff) /* may be PROC_NULL */
1456 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1457 	uint32_t partial;
1458 	uint16_t csum = 0;
1459 	uint8_t *baddr;
1460 	uint8_t *dp;
1461 
1462 	_CASSERT(sizeof(csum) == sizeof(uint16_t));
1463 
1464 	/* get buffer address from packet */
1465 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1466 	ASSERT(baddr != NULL);
1467 	baddr += poff;
1468 	VERIFY((poff + len) <= (pkt->pkt_qum.qum_pp->pp_buflet_size *
1469 	    __packet_get_buflet_count(ph)));
1470 
1471 	ASSERT((m->m_flags & M_PKTHDR) && m->m_next == NULL);
1472 	m->m_data += moff;
1473 	dp = (uint8_t *)m->m_data;
1474 	VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1475 	    (uint32_t)mbuf_maxlen(m));
1476 	m->m_len += len;
1477 	m->m_pkthdr.len += len;
1478 	VERIFY(m->m_len == m->m_pkthdr.len &&
1479 	    (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1480 
1481 	switch (t) {
1482 	case NR_RX:
1483 		m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1484 		if (__probable(copysum)) {
1485 			/*
1486 			 * Use pkt_copy() to copy the portion up to the
1487 			 * point where we need to start the checksum, and
1488 			 * copy the remainder, checksumming as we go.
1489 			 */
1490 			if (__probable(start != 0)) {
1491 				_pkt_copy(baddr, dp, start);
1492 			}
1493 			partial = _pkt_copyaddr_sum(ph, (poff + start),
1494 			    (dp + start), (len - start), TRUE, 0, NULL);
1495 			csum = __packet_fold_sum(partial);
1496 
1497 			m->m_pkthdr.csum_flags |=
1498 			    (CSUM_DATA_VALID | CSUM_PARTIAL);
1499 			m->m_pkthdr.csum_rx_start = start;
1500 			m->m_pkthdr.csum_rx_val = csum;
1501 		} else {
1502 			(void) _pkt_copyaddr_sum(ph, poff, dp, len, FALSE, 0, NULL);
1503 			m->m_pkthdr.csum_rx_start = 0;
1504 			m->m_pkthdr.csum_rx_val = 0;
1505 		}
1506 
1507 		/* translate packet metadata */
1508 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1509 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1510 
1511 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1512 		    "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1513 		    sk_proc_name_address(current_proc()),
1514 		    sk_proc_pid(current_proc()), len,
1515 		    (copysum ? (len - start) : 0), csum, start);
1516 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1517 		    "   mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1518 		    SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1519 		    (uint32_t)m->m_pkthdr.csum_rx_start,
1520 		    (uint32_t)m->m_pkthdr.csum_rx_val);
1521 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1522 		    "   pkt  0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1523 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1524 		    (uint32_t)pkt->pkt_csum_rx_start_off,
1525 		    (uint32_t)pkt->pkt_csum_rx_value);
1526 		break;
1527 
1528 	case NR_TX:
1529 		if (__probable(copysum)) {
1530 			uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1531 			/*
1532 			 * Use pkt_copy() to copy the portion up to the
1533 			 * point where we need to start the checksum, and
1534 			 * copy the remainder, checksumming as we go.
1535 			 */
1536 			if (__probable(start != 0)) {
1537 				_pkt_copy(baddr, dp, start);
1538 			}
1539 			partial = _pkt_copyaddr_sum(ph, (poff + start),
1540 			    (dp + start), (len - start), TRUE, 0, NULL);
1541 			csum = __packet_fold_sum_final(partial);
1542 
1543 			/* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1544 			if (csum == 0 &&
1545 			    (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1546 				csum = 0xffff;
1547 			}
1548 
1549 			/* Insert checksum into packet */
1550 			ASSERT(stuff <= (len - sizeof(csum)));
1551 			if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1552 				*(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1553 			} else {
1554 				bcopy((void *)&csum, dp + stuff, sizeof(csum));
1555 			}
1556 		} else {
1557 			(void) _pkt_copyaddr_sum(ph, poff, dp, len, FALSE, 0, NULL);
1558 		}
1559 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1560 		m->m_pkthdr.csum_tx_start = 0;
1561 		m->m_pkthdr.csum_tx_stuff = 0;
1562 
1563 		/* translate packet metadata */
1564 		m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1565 		m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1566 		m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1567 		m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1568 		m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1569 		mbuf_set_timestamp(m, pkt->pkt_timestamp,
1570 		    ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1571 		m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1572 		m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1573 		if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1574 			m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1575 		}
1576 
1577 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1578 		    "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1579 		    sk_proc_name_address(current_proc()),
1580 		    sk_proc_pid(current_proc()), len,
1581 		    (copysum ? (len - start) : 0), csum, start);
1582 		SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1583 		    "   pkt  0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1584 		    SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1585 		    (uint32_t)pkt->pkt_csum_tx_start_off,
1586 		    (uint32_t)pkt->pkt_csum_tx_stuff_off);
1587 		break;
1588 
1589 	default:
1590 		VERIFY(0);
1591 		/* NOTREACHED */
1592 		__builtin_unreachable();
1593 	}
1594 
1595 	if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1596 		m->m_flags |= M_BCAST;
1597 	} else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1598 		m->m_flags |= M_MCAST;
1599 	}
1600 	SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1601 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1602 	    (t == NR_RX) ? "RX" : "TX",
1603 	    sk_dump("buf", (uint8_t *)dp, m->m_pkthdr.len, 128, NULL, 0));
1604 }
1605 
1606 /*
1607  * Like m_copydata(), but computes 16-bit sum as the data is copied.
1608  * Caller can provide an initial sum to be folded into the computed
1609  * sum.  The accumulated partial sum (32-bit) is returned to caller;
1610  * caller is responsible for further reducing it to 16-bit if needed,
1611  * as well as to perform the final 1's complement on it.
1612  */
1613 uint32_t
m_copydata_sum(struct mbuf * m,int off,int len,void * vp,uint32_t initial_sum,boolean_t * odd_start)1614 m_copydata_sum(struct mbuf *m, int off, int len, void *vp, uint32_t initial_sum,
1615     boolean_t *odd_start)
1616 {
1617 	boolean_t needs_swap, started_on_odd = FALSE;
1618 	int off0 = off, len0 = len;
1619 	struct mbuf *m0 = m;
1620 	uint64_t sum, partial;
1621 	unsigned count, odd;
1622 	char *cp = vp;
1623 
1624 	if (__improbable(off < 0 || len < 0)) {
1625 		panic("%s: invalid offset %d or len %d", __func__, off, len);
1626 		/* NOTREACHED */
1627 		__builtin_unreachable();
1628 	}
1629 
1630 	while (off > 0) {
1631 		if (__improbable(m == NULL)) {
1632 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
1633 			    __func__, m0, off0, len0);
1634 			/* NOTREACHED */
1635 			__builtin_unreachable();
1636 		}
1637 		if (off < m->m_len) {
1638 			break;
1639 		}
1640 		off -= m->m_len;
1641 		m = m->m_next;
1642 	}
1643 
1644 	if (odd_start) {
1645 		started_on_odd = *odd_start;
1646 	}
1647 	sum = initial_sum;
1648 
1649 	for (; len > 0; m = m->m_next) {
1650 		uint8_t *datap;
1651 
1652 		if (__improbable(m == NULL)) {
1653 			panic("%s: invalid mbuf chain %p [off %d, len %d]",
1654 			    __func__, m0, off0, len0);
1655 			/* NOTREACHED */
1656 			__builtin_unreachable();
1657 		}
1658 
1659 		datap = mtod(m, uint8_t *) + off;
1660 		count = m->m_len;
1661 
1662 		if (__improbable(count == 0)) {
1663 			continue;
1664 		}
1665 
1666 		count = MIN(count - off, (unsigned)len);
1667 		partial = 0;
1668 
1669 		if ((uintptr_t)datap & 1) {
1670 			/* Align on word boundary */
1671 			started_on_odd = !started_on_odd;
1672 #if BYTE_ORDER == LITTLE_ENDIAN
1673 			partial = *datap << 8;
1674 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1675 			partial = *datap;
1676 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1677 			*cp++ = *datap++;
1678 			count -= 1;
1679 			len -= 1;
1680 		}
1681 
1682 		needs_swap = started_on_odd;
1683 		odd = count & 1u;
1684 		count -= odd;
1685 
1686 		if (count) {
1687 			partial = __packet_copy_and_sum(datap,
1688 			    cp, count, (uint32_t)partial);
1689 			datap += count;
1690 			cp += count;
1691 			len -= count;
1692 			if (__improbable((partial & (3ULL << 62)) != 0)) {
1693 				if (needs_swap) {
1694 					partial = (partial << 8) +
1695 					    (partial >> 56);
1696 				}
1697 				sum += (partial >> 32);
1698 				sum += (partial & 0xffffffff);
1699 				partial = 0;
1700 			}
1701 		}
1702 
1703 		if (odd) {
1704 #if BYTE_ORDER == LITTLE_ENDIAN
1705 			partial += *datap;
1706 #else /* BYTE_ORDER != LITTLE_ENDIAN */
1707 			partial += *datap << 8;
1708 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
1709 			*cp++ = *datap++;
1710 			len -= 1;
1711 			started_on_odd = !started_on_odd;
1712 		}
1713 		off = 0;
1714 
1715 		if (needs_swap) {
1716 			partial = (partial << 8) + (partial >> 24);
1717 		}
1718 		sum += (partial >> 32) + (partial & 0xffffffff);
1719 		/*
1720 		 * Reduce sum to allow potential byte swap
1721 		 * in the next iteration without carry.
1722 		 */
1723 		sum = (sum >> 32) + (sum & 0xffffffff);
1724 	}
1725 
1726 	if (odd_start) {
1727 		*odd_start = started_on_odd;
1728 	}
1729 
1730 	/* Final fold (reduce 64-bit to 32-bit) */
1731 	sum = (sum >> 32) + (sum & 0xffffffff); /* 33-bit */
1732 	sum = (sum >> 16) + (sum & 0xffff);     /* 17-bit + carry */
1733 
1734 	/* return 32-bit partial sum to caller */
1735 	return (uint32_t)sum;
1736 }
1737 
1738 #if DEBUG || DEVELOPMENT
1739 #define TRAILERS_MAX    16              /* max trailing bytes */
1740 #define TRAILERS_REGEN  (64 * 1024)     /* regeneration threshold */
1741 static uint8_t tb[TRAILERS_MAX];        /* random trailing bytes */
1742 static uint32_t regen = TRAILERS_REGEN; /* regeneration counter */
1743 
1744 uint32_t
pkt_add_trailers(kern_packet_t ph,const uint32_t len,const uint16_t start)1745 pkt_add_trailers(kern_packet_t ph, const uint32_t len, const uint16_t start)
1746 {
1747 	struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1748 	uint32_t extra;
1749 	uint8_t *baddr;
1750 
1751 	/* get buffer address from packet */
1752 	MD_BUFLET_ADDR_ABS(pkt, baddr);
1753 	ASSERT(baddr != NULL);
1754 	ASSERT(len <= pkt->pkt_qum.qum_pp->pp_buflet_size);
1755 
1756 	extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
1757 	if (extra == 0 || extra > sizeof(tb) ||
1758 	    (len + extra) > pkt->pkt_qum.qum_pp->pp_buflet_size) {
1759 		return 0;
1760 	}
1761 
1762 	/* generate random bytes once per TRAILERS_REGEN packets (approx.) */
1763 	if (regen++ == TRAILERS_REGEN) {
1764 		read_frandom(&tb[0], sizeof(tb));
1765 		regen = 0;
1766 	}
1767 
1768 	bcopy(&tb[0], (baddr + len), extra);
1769 
1770 	/* recompute partial sum (also to exercise related logic) */
1771 	pkt->pkt_csum_flags |= PACKET_CSUM_PARTIAL;
1772 	pkt->pkt_csum_rx_value = (uint16_t)__packet_cksum((baddr + start),
1773 	    ((len + extra) - start), 0);
1774 	pkt->pkt_csum_rx_start_off = start;
1775 
1776 	return extra;
1777 }
1778 
1779 uint32_t
pkt_add_trailers_mbuf(struct mbuf * m,const uint16_t start)1780 pkt_add_trailers_mbuf(struct mbuf *m, const uint16_t start)
1781 {
1782 	uint32_t extra;
1783 
1784 	extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
1785 	if (extra == 0 || extra > sizeof(tb)) {
1786 		return 0;
1787 	}
1788 
1789 	if (mbuf_copyback(m, m_pktlen(m), extra, &tb[0], M_NOWAIT) != 0) {
1790 		return 0;
1791 	}
1792 
1793 	/* generate random bytes once per TRAILERS_REGEN packets (approx.) */
1794 	if (regen++ == TRAILERS_REGEN) {
1795 		read_frandom(&tb[0], sizeof(tb));
1796 		regen = 0;
1797 	}
1798 
1799 	/* recompute partial sum (also to exercise related logic) */
1800 	m->m_pkthdr.csum_rx_val = m_sum16(m, start, (m_pktlen(m) - start));
1801 	m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1802 	m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
1803 	m->m_pkthdr.csum_rx_start = start;
1804 
1805 	return extra;
1806 }
1807 #endif /* DEBUG || DEVELOPMENT */
1808 
1809 void
pkt_copypkt_sum(kern_packet_t sph,uint16_t soff,kern_packet_t dph,uint16_t doff,uint16_t len,uint32_t * partial,boolean_t do_csum)1810 pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
1811     uint16_t doff, uint16_t len, uint32_t *partial, boolean_t do_csum)
1812 {
1813 	VERIFY(_pkt_copypkt_sum(sph, soff, dph, doff, len, partial, do_csum));
1814 }
1815 
1816 uint32_t
pkt_copyaddr_sum(kern_packet_t sph,uint16_t soff,uint8_t * dbaddr,uint16_t len,boolean_t do_csum,uint32_t initial_sum,boolean_t * odd_start)1817 pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
1818     uint16_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
1819 {
1820 	return _pkt_copyaddr_sum(sph, soff, dbaddr, len, do_csum, initial_sum, odd_start);
1821 }
1822 
1823 uint32_t
pkt_mcopypkt_sum(mbuf_t m,int soff,kern_packet_t dph,uint16_t doff,uint16_t len,boolean_t do_cscum)1824 pkt_mcopypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
1825     uint16_t len, boolean_t do_cscum)
1826 {
1827 	return m_copypkt_sum(m, soff, dph, doff, len, do_cscum);
1828 }
1829 
1830 void
pkt_copy(void * src,void * dst,size_t len)1831 pkt_copy(void *src, void *dst, size_t len)
1832 {
1833 	return _pkt_copy(src, dst, len);
1834 }
1835