xref: /xnu-10002.61.3/bsd/netinet/in_cksum.c (revision 0f4c859e951fba394238ab619495c4e1d54d0f34)
1 /*
2  * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1988, 1992, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  * 3. All advertising materials mentioning features or use of this software
41  *    must display the following acknowledgement:
42  *	This product includes software developed by the University of
43  *	California, Berkeley and its contributors.
44  * 4. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)in_cksum.c	8.1 (Berkeley) 6/10/93
61  */
62 
63 #include <sys/param.h>
64 #include <machine/endian.h>
65 #include <sys/mbuf.h>
66 #include <kern/debug.h>
67 #include <net/dlil.h>
68 #include <netinet/in.h>
69 #define _IP_VHL
70 #include <netinet/ip.h>
71 #include <netinet/ip_var.h>
72 
73 /*
74  * Checksum routine for Internet Protocol family headers (Portable Version).
75  *
76  * This routine is very heavily used in the network
77  * code and should be modified for each CPU to be as fast as possible.
78  */
79 #define REDUCE16 {                                                        \
80 	q_util.q = sum;                                                   \
81 	l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
82 	sum = l_util.s[0] + l_util.s[1];                                  \
83 	ADDCARRY(sum);                                                    \
84 }
85 
86 union l_util {
87 	uint16_t s[2];
88 	uint32_t l;
89 };
90 
91 union q_util {
92 	uint16_t s[4];
93 	uint32_t l[2];
94 	uint64_t q;
95 };
96 
97 extern uint32_t os_cpu_in_cksum(const void *, uint32_t, uint32_t);
98 
99 /*
100  * Perform 16-bit 1's complement sum on a contiguous span.
101  */
102 uint16_t
b_sum16(const void * buf,int len)103 b_sum16(const void *buf, int len)
104 {
105 	return (uint16_t)os_cpu_in_cksum(buf, len, 0);
106 }
107 
108 uint16_t inet_cksum_simple(struct mbuf *, int);
109 /*
110  * For the exported _in_cksum symbol in BSDKernel symbol set.
111  */
112 uint16_t
inet_cksum_simple(struct mbuf * m,int len)113 inet_cksum_simple(struct mbuf *m, int len)
114 {
115 	return inet_cksum(m, 0, 0, len);
116 }
117 
118 uint16_t
in_addword(uint16_t a,uint16_t b)119 in_addword(uint16_t a, uint16_t b)
120 {
121 	uint64_t sum = a + b;
122 
123 	ADDCARRY(sum);
124 	return (uint16_t)sum;
125 }
126 
127 uint16_t
in_pseudo(uint32_t a,uint32_t b,uint32_t c)128 in_pseudo(uint32_t a, uint32_t b, uint32_t c)
129 {
130 	uint64_t sum;
131 	union q_util q_util;
132 	union l_util l_util;
133 
134 	sum = (uint64_t)a + b + c;
135 	REDUCE16;
136 	return (uint16_t)sum;
137 }
138 
139 uint16_t
in_pseudo64(uint64_t a,uint64_t b,uint64_t c)140 in_pseudo64(uint64_t a, uint64_t b, uint64_t c)
141 {
142 	uint64_t sum;
143 	union q_util q_util;
144 	union l_util l_util;
145 
146 	sum = a + b + c;
147 	REDUCE16;
148 	return (uint16_t)sum;
149 }
150 
151 /*
152  * May be used on IP header with options.
153  */
154 uint16_t
in_cksum_hdr_opt(const struct ip * ip)155 in_cksum_hdr_opt(const struct ip *ip)
156 {
157 	return ~b_sum16(ip, (IP_VHL_HL(ip->ip_vhl) << 2)) & 0xffff;
158 }
159 
160 /*
161  * A wrapper around the simple in_cksum_hdr() and the more complicated
162  * inet_cksum(); the former is chosen if the IP header is simple,
163  * contiguous and 32-bit aligned.  Also does some stats accounting.
164  */
165 uint16_t
ip_cksum_hdr_dir(struct mbuf * m,uint32_t hlen,int out)166 ip_cksum_hdr_dir(struct mbuf *m, uint32_t hlen, int out)
167 {
168 	struct ip *ip = mtod(m, struct ip *);
169 
170 	if (out) {
171 		ipstat.ips_snd_swcsum++;
172 		ipstat.ips_snd_swcsum_bytes += hlen;
173 	} else {
174 		ipstat.ips_rcv_swcsum++;
175 		ipstat.ips_rcv_swcsum_bytes += hlen;
176 	}
177 
178 	if (hlen == sizeof(*ip) &&
179 	    m->m_len >= sizeof(*ip) && IP_HDR_ALIGNED_P(ip)) {
180 		return in_cksum_hdr(ip);
181 	}
182 
183 	return inet_cksum(m, 0, 0, hlen);
184 }
185 
186 uint16_t
ip_cksum_hdr_dir_buffer(const void * buffer,uint32_t hlen,uint32_t len,int out)187 ip_cksum_hdr_dir_buffer(const void *buffer, uint32_t hlen, uint32_t len,
188     int out)
189 {
190 	const struct ip *ip = buffer;
191 
192 	if (out) {
193 		ipstat.ips_snd_swcsum++;
194 		ipstat.ips_snd_swcsum_bytes += hlen;
195 	} else {
196 		ipstat.ips_rcv_swcsum++;
197 		ipstat.ips_rcv_swcsum_bytes += hlen;
198 	}
199 
200 	if (hlen == sizeof(*ip) &&
201 	    len >= sizeof(*ip) && IP_HDR_ALIGNED_P(ip)) {
202 		return in_cksum_hdr(ip);
203 	}
204 
205 	return inet_cksum_buffer(buffer, 0, 0, hlen);
206 }
207 
208 /*
209  * m MUST contain at least an IP header, if nxt is specified;
210  * nxt is the upper layer protocol number;
211  * off is an offset where TCP/UDP/ICMP header starts;
212  * len is a total length of a transport segment (e.g. TCP header + TCP payload)
213  */
214 uint16_t
inet_cksum(struct mbuf * m,uint32_t nxt,uint32_t off,uint32_t len)215 inet_cksum(struct mbuf *m, uint32_t nxt, uint32_t off, uint32_t len)
216 {
217 	uint32_t sum;
218 
219 	sum = m_sum16(m, off, len);
220 
221 	/* include pseudo header checksum? */
222 	if (nxt != 0) {
223 		struct ip *ip;
224 		unsigned char buf[sizeof((*ip))] __attribute__((aligned(8)));
225 		uint32_t mlen;
226 
227 		/*
228 		 * Sanity check
229 		 *
230 		 * Use m_length2() instead of m_length(), as we cannot rely on
231 		 * the caller setting m_pkthdr.len correctly, if the mbuf is
232 		 * a M_PKTHDR one.
233 		 */
234 		if ((mlen = m_length2(m, NULL)) < sizeof(*ip)) {
235 			panic("%s: mbuf %p too short (%d) for IPv4 header",
236 			    __func__, m, mlen);
237 			/* NOTREACHED */
238 		}
239 
240 		/*
241 		 * In case the IP header is not contiguous, or not 32-bit
242 		 * aligned, copy it to a local buffer.  Note here that we
243 		 * expect the data pointer to point to the IP header.
244 		 */
245 		if ((sizeof(*ip) > m->m_len) ||
246 		    !IP_HDR_ALIGNED_P(mtod(m, caddr_t))) {
247 			m_copydata(m, 0, sizeof(*ip), (caddr_t)buf);
248 			ip = (struct ip *)(void *)buf;
249 		} else {
250 			ip = (struct ip *)(void *)(m->m_data);
251 		}
252 
253 		/* add pseudo header checksum */
254 		sum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
255 		    htonl(len + nxt));
256 
257 		/* fold in carry bits */
258 		ADDCARRY(sum);
259 	}
260 
261 	return ~sum & 0xffff;
262 }
263 
264 /*
265  * buffer MUST contain at least an IP header, if nxt is specified;
266  * nxt is the upper layer protocol number;
267  * off is an offset where TCP/UDP/ICMP header starts;
268  * len is a total length of a transport segment (e.g. TCP header + TCP payload)
269  */
270 uint16_t
inet_cksum_buffer(const void * buffer,uint32_t nxt,uint32_t off,uint32_t len)271 inet_cksum_buffer(const void *buffer, uint32_t nxt, uint32_t off,
272     uint32_t len)
273 {
274 	uint32_t sum;
275 
276 	if (off >= len) {
277 		panic("%s: off (%d) >= len (%d)", __func__, off, len);
278 	}
279 
280 	sum = b_sum16(&((const uint8_t *)buffer)[off], len);
281 
282 	/* include pseudo header checksum? */
283 	if (nxt != 0) {
284 		const struct ip *ip;
285 		unsigned char buf[sizeof((*ip))] __attribute__((aligned(8)));
286 
287 		/*
288 		 * In case the IP header is not contiguous, or not 32-bit
289 		 * aligned, copy it to a local buffer.  Note here that we
290 		 * expect the data pointer to point to the IP header.
291 		 */
292 		if (!IP_HDR_ALIGNED_P(buffer)) {
293 			memcpy(buf, buffer, sizeof(*ip));
294 			ip = (const struct ip *)(const void *)buf;
295 		} else {
296 			ip = (const struct ip *)buffer;
297 		}
298 
299 		/* add pseudo header checksum */
300 		sum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
301 		    htonl(len + nxt));
302 
303 		/* fold in carry bits */
304 		ADDCARRY(sum);
305 	}
306 
307 	return ~sum & 0xffff;
308 }
309 
310 #if DEBUG || DEVELOPMENT
311 #include <pexpert/pexpert.h>
312 
313 #define CKSUM_ERR kprintf
314 
315 /*
316  * The following routines implement the portable, reference implementation
317  * of os_cpu_in_cksum_mbuf().  This is currently used only for validating
318  * the correctness of the platform-specific implementation, at boot time
319  * in dlil_verify_sum16().  It returns the 32-bit accumulator without doing
320  * a 1's complement on it.
321  */
322 #if !defined(__LP64__)
323 /* 32-bit version */
324 uint32_t
in_cksum_mbuf_ref(struct mbuf * m,int len,int off,uint32_t initial_sum)325 in_cksum_mbuf_ref(struct mbuf *m, int len, int off, uint32_t initial_sum)
326 {
327 	int mlen;
328 	uint32_t sum, partial;
329 	unsigned int final_acc;
330 	uint8_t *data;
331 	boolean_t needs_swap, started_on_odd;
332 
333 	VERIFY(len >= 0);
334 	VERIFY(off >= 0);
335 
336 	needs_swap = FALSE;
337 	started_on_odd = FALSE;
338 	sum = (initial_sum >> 16) + (initial_sum & 0xffff);
339 
340 	for (;;) {
341 		if (__improbable(m == NULL)) {
342 			CKSUM_ERR("%s: out of data\n", __func__);
343 			return (uint32_t)-1;
344 		}
345 		mlen = m->m_len;
346 		if (mlen > off) {
347 			mlen -= off;
348 			data = mtod(m, uint8_t *) + off;
349 			goto post_initial_offset;
350 		}
351 		off -= mlen;
352 		if (len == 0) {
353 			break;
354 		}
355 		m = m->m_next;
356 	}
357 
358 	for (; len > 0; m = m->m_next) {
359 		if (__improbable(m == NULL)) {
360 			CKSUM_ERR("%s: out of data\n", __func__);
361 			return (uint32_t)-1;
362 		}
363 		mlen = m->m_len;
364 		data = mtod(m, uint8_t *);
365 post_initial_offset:
366 		if (mlen == 0) {
367 			continue;
368 		}
369 		if (mlen > len) {
370 			mlen = len;
371 		}
372 		len -= mlen;
373 
374 		partial = 0;
375 		if ((uintptr_t)data & 1) {
376 			/* Align on word boundary */
377 			started_on_odd = !started_on_odd;
378 #if BYTE_ORDER == LITTLE_ENDIAN
379 			partial = *data << 8;
380 #else /* BYTE_ORDER != LITTLE_ENDIAN */
381 			partial = *data;
382 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
383 			++data;
384 			--mlen;
385 		}
386 		needs_swap = started_on_odd;
387 		while (mlen >= 32) {
388 			__builtin_prefetch(data + 32);
389 			partial += *(uint16_t *)(void *)data;
390 			partial += *(uint16_t *)(void *)(data + 2);
391 			partial += *(uint16_t *)(void *)(data + 4);
392 			partial += *(uint16_t *)(void *)(data + 6);
393 			partial += *(uint16_t *)(void *)(data + 8);
394 			partial += *(uint16_t *)(void *)(data + 10);
395 			partial += *(uint16_t *)(void *)(data + 12);
396 			partial += *(uint16_t *)(void *)(data + 14);
397 			partial += *(uint16_t *)(void *)(data + 16);
398 			partial += *(uint16_t *)(void *)(data + 18);
399 			partial += *(uint16_t *)(void *)(data + 20);
400 			partial += *(uint16_t *)(void *)(data + 22);
401 			partial += *(uint16_t *)(void *)(data + 24);
402 			partial += *(uint16_t *)(void *)(data + 26);
403 			partial += *(uint16_t *)(void *)(data + 28);
404 			partial += *(uint16_t *)(void *)(data + 30);
405 			data += 32;
406 			mlen -= 32;
407 			if (__improbable(partial & 0xc0000000)) {
408 				if (needs_swap) {
409 					partial = (partial << 8) +
410 					    (partial >> 24);
411 				}
412 				sum += (partial >> 16);
413 				sum += (partial & 0xffff);
414 				partial = 0;
415 			}
416 		}
417 		if (mlen & 16) {
418 			partial += *(uint16_t *)(void *)data;
419 			partial += *(uint16_t *)(void *)(data + 2);
420 			partial += *(uint16_t *)(void *)(data + 4);
421 			partial += *(uint16_t *)(void *)(data + 6);
422 			partial += *(uint16_t *)(void *)(data + 8);
423 			partial += *(uint16_t *)(void *)(data + 10);
424 			partial += *(uint16_t *)(void *)(data + 12);
425 			partial += *(uint16_t *)(void *)(data + 14);
426 			data += 16;
427 			mlen -= 16;
428 		}
429 		/*
430 		 * mlen is not updated below as the remaining tests
431 		 * are using bit masks, which are not affected.
432 		 */
433 		if (mlen & 8) {
434 			partial += *(uint16_t *)(void *)data;
435 			partial += *(uint16_t *)(void *)(data + 2);
436 			partial += *(uint16_t *)(void *)(data + 4);
437 			partial += *(uint16_t *)(void *)(data + 6);
438 			data += 8;
439 		}
440 		if (mlen & 4) {
441 			partial += *(uint16_t *)(void *)data;
442 			partial += *(uint16_t *)(void *)(data + 2);
443 			data += 4;
444 		}
445 		if (mlen & 2) {
446 			partial += *(uint16_t *)(void *)data;
447 			data += 2;
448 		}
449 		if (mlen & 1) {
450 #if BYTE_ORDER == LITTLE_ENDIAN
451 			partial += *data;
452 #else /* BYTE_ORDER != LITTLE_ENDIAN */
453 			partial += *data << 8;
454 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
455 			started_on_odd = !started_on_odd;
456 		}
457 
458 		if (needs_swap) {
459 			partial = (partial << 8) + (partial >> 24);
460 		}
461 		sum += (partial >> 16) + (partial & 0xffff);
462 		/*
463 		 * Reduce sum to allow potential byte swap
464 		 * in the next iteration without carry.
465 		 */
466 		sum = (sum >> 16) + (sum & 0xffff);
467 	}
468 	final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff);
469 	final_acc = (final_acc >> 16) + (final_acc & 0xffff);
470 	return final_acc & 0xffff;
471 }
472 
473 #else /* __LP64__ */
474 /* 64-bit version */
475 uint32_t
in_cksum_mbuf_ref(struct mbuf * m,int len,int off,uint32_t initial_sum)476 in_cksum_mbuf_ref(struct mbuf *m, int len, int off, uint32_t initial_sum)
477 {
478 	int mlen;
479 	uint64_t sum, partial;
480 	unsigned int final_acc;
481 	uint8_t *data;
482 	boolean_t needs_swap, started_on_odd;
483 
484 	VERIFY(len >= 0);
485 	VERIFY(off >= 0);
486 
487 	needs_swap = FALSE;
488 	started_on_odd = FALSE;
489 	sum = initial_sum;
490 
491 	for (;;) {
492 		if (__improbable(m == NULL)) {
493 			CKSUM_ERR("%s: out of data\n", __func__);
494 			return (uint32_t)-1;
495 		}
496 		mlen = m->m_len;
497 		if (mlen > off) {
498 			mlen -= off;
499 			data = mtod(m, uint8_t *) + off;
500 			goto post_initial_offset;
501 		}
502 		off -= mlen;
503 		if (len == 0) {
504 			break;
505 		}
506 		m = m->m_next;
507 	}
508 
509 	for (; len > 0; m = m->m_next) {
510 		if (__improbable(m == NULL)) {
511 			CKSUM_ERR("%s: out of data\n", __func__);
512 			return (uint32_t)-1;
513 		}
514 		mlen = m->m_len;
515 		data = mtod(m, uint8_t *);
516 post_initial_offset:
517 		if (mlen == 0) {
518 			continue;
519 		}
520 		if (mlen > len) {
521 			mlen = len;
522 		}
523 		len -= mlen;
524 
525 		partial = 0;
526 		if ((uintptr_t)data & 1) {
527 			/* Align on word boundary */
528 			started_on_odd = !started_on_odd;
529 #if BYTE_ORDER == LITTLE_ENDIAN
530 			partial = *data << 8;
531 #else /* BYTE_ORDER != LITTLE_ENDIAN */
532 			partial = *data;
533 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
534 			++data;
535 			--mlen;
536 		}
537 		needs_swap = started_on_odd;
538 		if ((uintptr_t)data & 2) {
539 			if (mlen < 2) {
540 				goto trailing_bytes;
541 			}
542 			partial += *(uint16_t *)(void *)data;
543 			data += 2;
544 			mlen -= 2;
545 		}
546 		while (mlen >= 64) {
547 			__builtin_prefetch(data + 32);
548 			__builtin_prefetch(data + 64);
549 			partial += *(uint32_t *)(void *)data;
550 			partial += *(uint32_t *)(void *)(data + 4);
551 			partial += *(uint32_t *)(void *)(data + 8);
552 			partial += *(uint32_t *)(void *)(data + 12);
553 			partial += *(uint32_t *)(void *)(data + 16);
554 			partial += *(uint32_t *)(void *)(data + 20);
555 			partial += *(uint32_t *)(void *)(data + 24);
556 			partial += *(uint32_t *)(void *)(data + 28);
557 			partial += *(uint32_t *)(void *)(data + 32);
558 			partial += *(uint32_t *)(void *)(data + 36);
559 			partial += *(uint32_t *)(void *)(data + 40);
560 			partial += *(uint32_t *)(void *)(data + 44);
561 			partial += *(uint32_t *)(void *)(data + 48);
562 			partial += *(uint32_t *)(void *)(data + 52);
563 			partial += *(uint32_t *)(void *)(data + 56);
564 			partial += *(uint32_t *)(void *)(data + 60);
565 			data += 64;
566 			mlen -= 64;
567 			if (__improbable(partial & (3ULL << 62))) {
568 				if (needs_swap) {
569 					partial = (partial << 8) +
570 					    (partial >> 56);
571 				}
572 				sum += (partial >> 32);
573 				sum += (partial & 0xffffffff);
574 				partial = 0;
575 			}
576 		}
577 		/*
578 		 * mlen is not updated below as the remaining tests
579 		 * are using bit masks, which are not affected.
580 		 */
581 		if (mlen & 32) {
582 			partial += *(uint32_t *)(void *)data;
583 			partial += *(uint32_t *)(void *)(data + 4);
584 			partial += *(uint32_t *)(void *)(data + 8);
585 			partial += *(uint32_t *)(void *)(data + 12);
586 			partial += *(uint32_t *)(void *)(data + 16);
587 			partial += *(uint32_t *)(void *)(data + 20);
588 			partial += *(uint32_t *)(void *)(data + 24);
589 			partial += *(uint32_t *)(void *)(data + 28);
590 			data += 32;
591 		}
592 		if (mlen & 16) {
593 			partial += *(uint32_t *)(void *)data;
594 			partial += *(uint32_t *)(void *)(data + 4);
595 			partial += *(uint32_t *)(void *)(data + 8);
596 			partial += *(uint32_t *)(void *)(data + 12);
597 			data += 16;
598 		}
599 		if (mlen & 8) {
600 			partial += *(uint32_t *)(void *)data;
601 			partial += *(uint32_t *)(void *)(data + 4);
602 			data += 8;
603 		}
604 		if (mlen & 4) {
605 			partial += *(uint32_t *)(void *)data;
606 			data += 4;
607 		}
608 		if (mlen & 2) {
609 			partial += *(uint16_t *)(void *)data;
610 			data += 2;
611 		}
612 trailing_bytes:
613 		if (mlen & 1) {
614 #if BYTE_ORDER == LITTLE_ENDIAN
615 			partial += *data;
616 #else /* BYTE_ORDER != LITTLE_ENDIAN */
617 			partial += *data << 8;
618 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
619 			started_on_odd = !started_on_odd;
620 		}
621 
622 		if (needs_swap) {
623 			partial = (partial << 8) + (partial >> 56);
624 		}
625 		sum += (partial >> 32) + (partial & 0xffffffff);
626 		/*
627 		 * Reduce sum to allow potential byte swap
628 		 * in the next iteration without carry.
629 		 */
630 		sum = (sum >> 32) + (sum & 0xffffffff);
631 	}
632 	final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) +
633 	    ((sum >> 16) & 0xffff) + (sum & 0xffff);
634 	final_acc = (final_acc >> 16) + (final_acc & 0xffff);
635 	final_acc = (final_acc >> 16) + (final_acc & 0xffff);
636 	return final_acc & 0xffff;
637 }
638 #endif /* __LP64 */
639 #endif /* DEBUG || DEVELOPMENT */
640