xref: /xnu-11215.1.10/bsd/netinet/in_cksum.c (revision 8d741a5de7ff4191bf97d57b9f54c2f6d4a15585)
1 /*
2  * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1988, 1992, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  * 3. All advertising materials mentioning features or use of this software
41  *    must display the following acknowledgement:
42  *	This product includes software developed by the University of
43  *	California, Berkeley and its contributors.
44  * 4. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)in_cksum.c	8.1 (Berkeley) 6/10/93
61  */
62 
63 #include <sys/param.h>
64 #include <machine/endian.h>
65 #include <sys/mbuf.h>
66 #include <kern/debug.h>
67 #include <net/dlil.h>
68 #include <netinet/in.h>
69 #define _IP_VHL
70 #include <netinet/ip.h>
71 #include <netinet/ip_var.h>
72 
73 /*
74  * Checksum routine for Internet Protocol family headers (Portable Version).
75  *
76  * This routine is very heavily used in the network
77  * code and should be modified for each CPU to be as fast as possible.
78  */
79 #define REDUCE16 {                                                        \
80 	q_util.q = sum;                                                   \
81 	l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
82 	sum = l_util.s[0] + l_util.s[1];                                  \
83 	ADDCARRY(sum);                                                    \
84 }
85 
86 union l_util {
87 	uint16_t s[2];
88 	uint32_t l;
89 };
90 
91 union q_util {
92 	uint16_t s[4];
93 	uint32_t l[2];
94 	uint64_t q;
95 };
96 
97 extern uint32_t os_cpu_in_cksum(const void *__sized_by(len), uint32_t len, uint32_t);
98 
99 /*
100  * Perform 16-bit 1's complement sum on a contiguous span.
101  */
102 uint16_t
b_sum16(const void * __sized_by (len)buf,int len)103 b_sum16(const void *__sized_by(len) buf, int len)
104 {
105 	return (uint16_t)os_cpu_in_cksum(buf, len, 0);
106 }
107 
108 uint16_t inet_cksum_simple(struct mbuf *, int);
109 /*
110  * For the exported _in_cksum symbol in BSDKernel symbol set.
111  */
112 uint16_t
inet_cksum_simple(struct mbuf * m,int len)113 inet_cksum_simple(struct mbuf *m, int len)
114 {
115 	return inet_cksum(m, 0, 0, len);
116 }
117 
118 uint16_t
in_addword(uint16_t a,uint16_t b)119 in_addword(uint16_t a, uint16_t b)
120 {
121 	uint64_t sum = a + b;
122 
123 	ADDCARRY(sum);
124 	return (uint16_t)sum;
125 }
126 
127 uint16_t
in_pseudo(uint32_t a,uint32_t b,uint32_t c)128 in_pseudo(uint32_t a, uint32_t b, uint32_t c)
129 {
130 	uint64_t sum;
131 	union q_util q_util;
132 	union l_util l_util;
133 
134 	sum = (uint64_t)a + b + c;
135 	REDUCE16;
136 	return (uint16_t)sum;
137 }
138 
139 uint16_t
in_pseudo64(uint64_t a,uint64_t b,uint64_t c)140 in_pseudo64(uint64_t a, uint64_t b, uint64_t c)
141 {
142 	uint64_t sum;
143 	union q_util q_util;
144 	union l_util l_util;
145 
146 	sum = a + b + c;
147 	REDUCE16;
148 	return (uint16_t)sum;
149 }
150 
151 /*
152  * May be used on IP header with options.
153  */
154 uint16_t
in_cksum_hdr_opt(const struct ip * ip)155 in_cksum_hdr_opt(const struct ip *ip)
156 {
157 	int hdrlen;
158 	const uint8_t *hdr;
159 
160 	hdrlen = IP_VHL_HL(ip->ip_vhl) << 2;
161 	hdr = __unsafe_forge_bidi_indexable(const uint8_t *, ip, hdrlen);
162 	return ~b_sum16(hdr, hdrlen) & 0xffff;
163 }
164 
165 /*
166  * A wrapper around the simple in_cksum_hdr() and the more complicated
167  * inet_cksum(); the former is chosen if the IP header is simple,
168  * contiguous and 32-bit aligned.  Also does some stats accounting.
169  */
170 uint16_t
ip_cksum_hdr_dir(struct mbuf * m,uint32_t hlen,int out)171 ip_cksum_hdr_dir(struct mbuf *m, uint32_t hlen, int out)
172 {
173 	struct ip *ip = mtod(m, struct ip *);
174 
175 	if (out) {
176 		ipstat.ips_snd_swcsum++;
177 		ipstat.ips_snd_swcsum_bytes += hlen;
178 	} else {
179 		ipstat.ips_rcv_swcsum++;
180 		ipstat.ips_rcv_swcsum_bytes += hlen;
181 	}
182 
183 	if (hlen == sizeof(*ip) &&
184 	    m->m_len >= sizeof(*ip) && IP_HDR_ALIGNED_P(ip)) {
185 		return in_cksum_hdr(ip);
186 	}
187 
188 	return inet_cksum(m, 0, 0, hlen);
189 }
190 
191 uint16_t
ip_cksum_hdr_dir_buffer(const void * __sized_by (len)buffer,uint32_t hlen,uint32_t len,int out)192 ip_cksum_hdr_dir_buffer(const void *__sized_by(len) buffer, uint32_t hlen, uint32_t len,
193     int out)
194 {
195 	const struct ip *ip = buffer;
196 
197 	if (out) {
198 		ipstat.ips_snd_swcsum++;
199 		ipstat.ips_snd_swcsum_bytes += hlen;
200 	} else {
201 		ipstat.ips_rcv_swcsum++;
202 		ipstat.ips_rcv_swcsum_bytes += hlen;
203 	}
204 
205 	if (hlen == sizeof(*ip) &&
206 	    len >= sizeof(*ip) && IP_HDR_ALIGNED_P(ip)) {
207 		return in_cksum_hdr(ip);
208 	}
209 
210 	return inet_cksum_buffer(buffer, 0, 0, hlen);
211 }
212 
213 /*
214  * m MUST contain at least an IP header, if nxt is specified;
215  * nxt is the upper layer protocol number;
216  * off is an offset where TCP/UDP/ICMP header starts;
217  * len is a total length of a transport segment (e.g. TCP header + TCP payload)
218  */
219 uint16_t
inet_cksum(struct mbuf * m,uint32_t nxt,uint32_t off,uint32_t len)220 inet_cksum(struct mbuf *m, uint32_t nxt, uint32_t off, uint32_t len)
221 {
222 	uint32_t sum;
223 
224 	sum = m_sum16(m, off, len);
225 
226 	/* include pseudo header checksum? */
227 	if (nxt != 0) {
228 		struct ip *ip;
229 		unsigned char buf[sizeof((*ip))] __attribute__((aligned(8)));
230 		uint32_t mlen;
231 
232 		/*
233 		 * Sanity check
234 		 *
235 		 * Use m_length2() instead of m_length(), as we cannot rely on
236 		 * the caller setting m_pkthdr.len correctly, if the mbuf is
237 		 * a M_PKTHDR one.
238 		 */
239 		if ((mlen = m_length2(m, NULL)) < sizeof(*ip)) {
240 			panic("%s: mbuf %p too short (%d) for IPv4 header",
241 			    __func__, m, mlen);
242 			/* NOTREACHED */
243 		}
244 
245 		/*
246 		 * In case the IP header is not contiguous, or not 32-bit
247 		 * aligned, copy it to a local buffer.  Note here that we
248 		 * expect the data pointer to point to the IP header.
249 		 */
250 		if ((sizeof(*ip) > m->m_len) ||
251 		    !IP_HDR_ALIGNED_P(mtod(m, caddr_t))) {
252 			m_copydata(m, 0, sizeof(*ip), (caddr_t)buf);
253 			ip = (struct ip *)(void *)buf;
254 		} else {
255 			ip = (struct ip *)(void *)(m_mtod_current(m));
256 		}
257 
258 		/* add pseudo header checksum */
259 		sum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
260 		    htonl(len + nxt));
261 
262 		/* fold in carry bits */
263 		ADDCARRY(sum);
264 	}
265 
266 	return ~sum & 0xffff;
267 }
268 
269 /*
270  * buffer MUST contain at least an IP header, if nxt is specified;
271  * nxt is the upper layer protocol number;
272  * off is an offset where TCP/UDP/ICMP header starts;
273  * len is a total length of a transport segment (e.g. TCP header + TCP payload)
274  */
275 uint16_t
inet_cksum_buffer(const void * __sized_by (len)buffer,uint32_t nxt,uint32_t off,uint32_t len)276 inet_cksum_buffer(const void *__sized_by(len) buffer, uint32_t nxt, uint32_t off,
277     uint32_t len)
278 {
279 	uint32_t sum;
280 
281 	if (off >= len) {
282 		panic("%s: off (%d) >= len (%d)", __func__, off, len);
283 	}
284 
285 	sum = b_sum16(&((const uint8_t *)buffer)[off], len);
286 
287 	/* include pseudo header checksum? */
288 	if (nxt != 0) {
289 		const struct ip *ip;
290 		unsigned char buf[sizeof((*ip))] __attribute__((aligned(8)));
291 
292 		/*
293 		 * In case the IP header is not contiguous, or not 32-bit
294 		 * aligned, copy it to a local buffer.  Note here that we
295 		 * expect the data pointer to point to the IP header.
296 		 */
297 		if (!IP_HDR_ALIGNED_P(buffer)) {
298 			memcpy(buf, buffer, sizeof(*ip));
299 			ip = (const struct ip *)(const void *)buf;
300 		} else {
301 			ip = (const struct ip *)buffer;
302 		}
303 
304 		/* add pseudo header checksum */
305 		sum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
306 		    htonl(len + nxt));
307 
308 		/* fold in carry bits */
309 		ADDCARRY(sum);
310 	}
311 
312 	return ~sum & 0xffff;
313 }
314 
315 #if DEBUG || DEVELOPMENT
316 #include <pexpert/pexpert.h>
317 
318 #define CKSUM_ERR kprintf
319 
320 /*
321  * The following routines implement the portable, reference implementation
322  * of os_cpu_in_cksum_mbuf().  This is currently used only for validating
323  * the correctness of the platform-specific implementation, at boot time
324  * in dlil_verify_sum16().  It returns the 32-bit accumulator without doing
325  * a 1's complement on it.
326  */
327 #if !defined(__LP64__)
328 /* 32-bit version */
329 uint32_t
in_cksum_mbuf_ref(struct mbuf * m,int len,int off,uint32_t initial_sum)330 in_cksum_mbuf_ref(struct mbuf *m, int len, int off, uint32_t initial_sum)
331 {
332 	int mlen;
333 	uint32_t sum, partial;
334 	unsigned int final_acc;
335 	uint8_t *data;
336 	boolean_t needs_swap, started_on_odd;
337 
338 	VERIFY(len >= 0);
339 	VERIFY(off >= 0);
340 
341 	needs_swap = FALSE;
342 	started_on_odd = FALSE;
343 	sum = (initial_sum >> 16) + (initial_sum & 0xffff);
344 
345 	for (;;) {
346 		if (__improbable(m == NULL)) {
347 			CKSUM_ERR("%s: out of data\n", __func__);
348 			return (uint32_t)-1;
349 		}
350 		mlen = m->m_len;
351 		if (mlen > off) {
352 			mlen -= off;
353 			data = mtod(m, uint8_t *) + off;
354 			goto post_initial_offset;
355 		}
356 		off -= mlen;
357 		if (len == 0) {
358 			break;
359 		}
360 		m = m->m_next;
361 	}
362 
363 	for (; len > 0; m = m->m_next) {
364 		if (__improbable(m == NULL)) {
365 			CKSUM_ERR("%s: out of data\n", __func__);
366 			return (uint32_t)-1;
367 		}
368 		mlen = m->m_len;
369 		data = mtod(m, uint8_t *);
370 post_initial_offset:
371 		if (mlen == 0) {
372 			continue;
373 		}
374 		if (mlen > len) {
375 			mlen = len;
376 		}
377 		len -= mlen;
378 
379 		partial = 0;
380 		if ((uintptr_t)data & 1) {
381 			/* Align on word boundary */
382 			started_on_odd = !started_on_odd;
383 #if BYTE_ORDER == LITTLE_ENDIAN
384 			partial = *data << 8;
385 #else /* BYTE_ORDER != LITTLE_ENDIAN */
386 			partial = *data;
387 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
388 			++data;
389 			--mlen;
390 		}
391 		needs_swap = started_on_odd;
392 		while (mlen >= 32) {
393 			__builtin_prefetch(data + 32);
394 			partial += *(uint16_t *)(void *)data;
395 			partial += *(uint16_t *)(void *)(data + 2);
396 			partial += *(uint16_t *)(void *)(data + 4);
397 			partial += *(uint16_t *)(void *)(data + 6);
398 			partial += *(uint16_t *)(void *)(data + 8);
399 			partial += *(uint16_t *)(void *)(data + 10);
400 			partial += *(uint16_t *)(void *)(data + 12);
401 			partial += *(uint16_t *)(void *)(data + 14);
402 			partial += *(uint16_t *)(void *)(data + 16);
403 			partial += *(uint16_t *)(void *)(data + 18);
404 			partial += *(uint16_t *)(void *)(data + 20);
405 			partial += *(uint16_t *)(void *)(data + 22);
406 			partial += *(uint16_t *)(void *)(data + 24);
407 			partial += *(uint16_t *)(void *)(data + 26);
408 			partial += *(uint16_t *)(void *)(data + 28);
409 			partial += *(uint16_t *)(void *)(data + 30);
410 			data += 32;
411 			mlen -= 32;
412 			if (__improbable(partial & 0xc0000000)) {
413 				if (needs_swap) {
414 					partial = (partial << 8) +
415 					    (partial >> 24);
416 				}
417 				sum += (partial >> 16);
418 				sum += (partial & 0xffff);
419 				partial = 0;
420 			}
421 		}
422 		if (mlen & 16) {
423 			partial += *(uint16_t *)(void *)data;
424 			partial += *(uint16_t *)(void *)(data + 2);
425 			partial += *(uint16_t *)(void *)(data + 4);
426 			partial += *(uint16_t *)(void *)(data + 6);
427 			partial += *(uint16_t *)(void *)(data + 8);
428 			partial += *(uint16_t *)(void *)(data + 10);
429 			partial += *(uint16_t *)(void *)(data + 12);
430 			partial += *(uint16_t *)(void *)(data + 14);
431 			data += 16;
432 			mlen -= 16;
433 		}
434 		/*
435 		 * mlen is not updated below as the remaining tests
436 		 * are using bit masks, which are not affected.
437 		 */
438 		if (mlen & 8) {
439 			partial += *(uint16_t *)(void *)data;
440 			partial += *(uint16_t *)(void *)(data + 2);
441 			partial += *(uint16_t *)(void *)(data + 4);
442 			partial += *(uint16_t *)(void *)(data + 6);
443 			data += 8;
444 		}
445 		if (mlen & 4) {
446 			partial += *(uint16_t *)(void *)data;
447 			partial += *(uint16_t *)(void *)(data + 2);
448 			data += 4;
449 		}
450 		if (mlen & 2) {
451 			partial += *(uint16_t *)(void *)data;
452 			data += 2;
453 		}
454 		if (mlen & 1) {
455 #if BYTE_ORDER == LITTLE_ENDIAN
456 			partial += *data;
457 #else /* BYTE_ORDER != LITTLE_ENDIAN */
458 			partial += *data << 8;
459 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
460 			started_on_odd = !started_on_odd;
461 		}
462 
463 		if (needs_swap) {
464 			partial = (partial << 8) + (partial >> 24);
465 		}
466 		sum += (partial >> 16) + (partial & 0xffff);
467 		/*
468 		 * Reduce sum to allow potential byte swap
469 		 * in the next iteration without carry.
470 		 */
471 		sum = (sum >> 16) + (sum & 0xffff);
472 	}
473 	final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff);
474 	final_acc = (final_acc >> 16) + (final_acc & 0xffff);
475 	return final_acc & 0xffff;
476 }
477 
478 #else /* __LP64__ */
479 /* 64-bit version */
480 uint32_t
in_cksum_mbuf_ref(struct mbuf * m,int len,int off,uint32_t initial_sum)481 in_cksum_mbuf_ref(struct mbuf *m, int len, int off, uint32_t initial_sum)
482 {
483 	int mlen;
484 	uint64_t sum, partial;
485 	unsigned int final_acc;
486 	uint8_t *data;
487 	boolean_t needs_swap, started_on_odd;
488 
489 	VERIFY(len >= 0);
490 	VERIFY(off >= 0);
491 
492 	needs_swap = FALSE;
493 	started_on_odd = FALSE;
494 	sum = initial_sum;
495 
496 	for (;;) {
497 		if (__improbable(m == NULL)) {
498 			CKSUM_ERR("%s: out of data\n", __func__);
499 			return (uint32_t)-1;
500 		}
501 		mlen = m->m_len;
502 		if (mlen > off) {
503 			mlen -= off;
504 			data = mtod(m, uint8_t *) + off;
505 			goto post_initial_offset;
506 		}
507 		off -= mlen;
508 		if (len == 0) {
509 			break;
510 		}
511 		m = m->m_next;
512 	}
513 
514 	for (; len > 0; m = m->m_next) {
515 		if (__improbable(m == NULL)) {
516 			CKSUM_ERR("%s: out of data\n", __func__);
517 			return (uint32_t)-1;
518 		}
519 		mlen = m->m_len;
520 		data = mtod(m, uint8_t *);
521 post_initial_offset:
522 		if (mlen == 0) {
523 			continue;
524 		}
525 		if (mlen > len) {
526 			mlen = len;
527 		}
528 		len -= mlen;
529 
530 		partial = 0;
531 		if ((uintptr_t)data & 1) {
532 			/* Align on word boundary */
533 			started_on_odd = !started_on_odd;
534 #if BYTE_ORDER == LITTLE_ENDIAN
535 			partial = *data << 8;
536 #else /* BYTE_ORDER != LITTLE_ENDIAN */
537 			partial = *data;
538 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
539 			++data;
540 			--mlen;
541 		}
542 		needs_swap = started_on_odd;
543 		if ((uintptr_t)data & 2) {
544 			if (mlen < 2) {
545 				goto trailing_bytes;
546 			}
547 			partial += *(uint16_t *)(void *)data;
548 			data += 2;
549 			mlen -= 2;
550 		}
551 		while (mlen >= 64) {
552 			__builtin_prefetch(data + 32);
553 			__builtin_prefetch(data + 64);
554 			partial += *(uint32_t *)(void *)data;
555 			partial += *(uint32_t *)(void *)(data + 4);
556 			partial += *(uint32_t *)(void *)(data + 8);
557 			partial += *(uint32_t *)(void *)(data + 12);
558 			partial += *(uint32_t *)(void *)(data + 16);
559 			partial += *(uint32_t *)(void *)(data + 20);
560 			partial += *(uint32_t *)(void *)(data + 24);
561 			partial += *(uint32_t *)(void *)(data + 28);
562 			partial += *(uint32_t *)(void *)(data + 32);
563 			partial += *(uint32_t *)(void *)(data + 36);
564 			partial += *(uint32_t *)(void *)(data + 40);
565 			partial += *(uint32_t *)(void *)(data + 44);
566 			partial += *(uint32_t *)(void *)(data + 48);
567 			partial += *(uint32_t *)(void *)(data + 52);
568 			partial += *(uint32_t *)(void *)(data + 56);
569 			partial += *(uint32_t *)(void *)(data + 60);
570 			data += 64;
571 			mlen -= 64;
572 			if (__improbable(partial & (3ULL << 62))) {
573 				if (needs_swap) {
574 					partial = (partial << 8) +
575 					    (partial >> 56);
576 				}
577 				sum += (partial >> 32);
578 				sum += (partial & 0xffffffff);
579 				partial = 0;
580 			}
581 		}
582 		/*
583 		 * mlen is not updated below as the remaining tests
584 		 * are using bit masks, which are not affected.
585 		 */
586 		if (mlen & 32) {
587 			partial += *(uint32_t *)(void *)data;
588 			partial += *(uint32_t *)(void *)(data + 4);
589 			partial += *(uint32_t *)(void *)(data + 8);
590 			partial += *(uint32_t *)(void *)(data + 12);
591 			partial += *(uint32_t *)(void *)(data + 16);
592 			partial += *(uint32_t *)(void *)(data + 20);
593 			partial += *(uint32_t *)(void *)(data + 24);
594 			partial += *(uint32_t *)(void *)(data + 28);
595 			data += 32;
596 		}
597 		if (mlen & 16) {
598 			partial += *(uint32_t *)(void *)data;
599 			partial += *(uint32_t *)(void *)(data + 4);
600 			partial += *(uint32_t *)(void *)(data + 8);
601 			partial += *(uint32_t *)(void *)(data + 12);
602 			data += 16;
603 		}
604 		if (mlen & 8) {
605 			partial += *(uint32_t *)(void *)data;
606 			partial += *(uint32_t *)(void *)(data + 4);
607 			data += 8;
608 		}
609 		if (mlen & 4) {
610 			partial += *(uint32_t *)(void *)data;
611 			data += 4;
612 		}
613 		if (mlen & 2) {
614 			partial += *(uint16_t *)(void *)data;
615 			data += 2;
616 		}
617 trailing_bytes:
618 		if (mlen & 1) {
619 #if BYTE_ORDER == LITTLE_ENDIAN
620 			partial += *data;
621 #else /* BYTE_ORDER != LITTLE_ENDIAN */
622 			partial += *data << 8;
623 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
624 			started_on_odd = !started_on_odd;
625 		}
626 
627 		if (needs_swap) {
628 			partial = (partial << 8) + (partial >> 56);
629 		}
630 		sum += (partial >> 32) + (partial & 0xffffffff);
631 		/*
632 		 * Reduce sum to allow potential byte swap
633 		 * in the next iteration without carry.
634 		 */
635 		sum = (sum >> 32) + (sum & 0xffffffff);
636 	}
637 	final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) +
638 	    ((sum >> 16) & 0xffff) + (sum & 0xffff);
639 	final_acc = (final_acc >> 16) + (final_acc & 0xffff);
640 	final_acc = (final_acc >> 16) + (final_acc & 0xffff);
641 	return final_acc & 0xffff;
642 }
643 #endif /* __LP64 */
644 #endif /* DEBUG || DEVELOPMENT */
645