xref: /xnu-8020.101.4/bsd/net/flowhash.c (revision e7776783b89a353188416a9a346c6cdb4928faad)
1 /*
2  * Copyright (c) 2011-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * http://code.google.com/p/smhasher/
31  *
32  * Copyright (c) 2009-2011 Austin Appleby.
33  *
34  * MurmurHash3 was written by Austin Appleby, and is placed in the public
35  * domain. The author hereby disclaims copyright to this source code.
36  */
37 
38 /*
39  * http://burtleburtle.net/bob/hash/
40  *
41  * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
42  *
43  * You can use this free for any purpose.  It's in the public domain.
44  * It has no warranty.
45  */
46 
47 #include <stdbool.h>
48 #include <sys/types.h>
49 #include <machine/endian.h>
50 #include <net/flowhash.h>
51 #include <os/base.h>
52 
53 static inline u_int32_t getblock32(const u_int32_t *, int);
54 static inline u_int64_t getblock64(const u_int64_t *, int);
55 static inline u_int32_t mh3_fmix32(u_int32_t);
56 static inline u_int64_t mh3_fmix64(u_int64_t);
57 
58 #define ALIGNED16(v)    ((((uintptr_t)(v)) & 1) == 0)
59 #define ALIGNED32(v)    ((((uintptr_t)(v)) & 3) == 0)
60 #define ALIGNED64(v)    ((((uintptr_t)(v)) & 7) == 0)
61 
62 #define ROTL32(x, r)    (((x) << (r)) | ((x) >> (32 - (r))))
63 #define ROTL64(x, r)    (((x) << (r)) | ((x) >> (64 - (r))))
64 
65 /*
66  * The following hash algorithms are selected based on performance:
67  *
68  * 64-bit:	MurmurHash3_x64_128
69  * 32-bit:	JHash
70  */
71 #if   defined(__LP64__)
72 net_flowhash_fn_t *net_flowhash = net_flowhash_mh3_x64_128;
73 #else /* !__LP64__ */
74 net_flowhash_fn_t *net_flowhash = net_flowhash_jhash;
75 #endif /* !__LP64__ */
76 
77 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
78 static inline u_int32_t
getblock32(const u_int32_t * p,int i)79 getblock32(const u_int32_t *p, int i)
80 {
81 	return p[i];
82 }
83 
84 static inline u_int64_t
getblock64(const u_int64_t * p,int i)85 getblock64(const u_int64_t *p, int i)
86 {
87 	return p[i];
88 }
89 #else /* !__i386__ && !__x86_64__ && !__arm64__*/
90 static inline u_int32_t
getblock32(const u_int32_t * p,int i)91 getblock32(const u_int32_t *p, int i)
92 {
93 	const u_int8_t *bytes = (u_int8_t *)(void *)(uintptr_t)(p + i);
94 	u_int32_t value;
95 
96 	if (ALIGNED32(p)) {
97 		value = p[i];
98 	} else {
99 #if BYTE_ORDER == BIG_ENDIAN
100 		value =
101 		    (((u_int32_t)bytes[0]) << 24) |
102 		    (((u_int32_t)bytes[1]) << 16) |
103 		    (((u_int32_t)bytes[2]) << 8) |
104 		    ((u_int32_t)bytes[3]);
105 #else /* LITTLE_ENDIAN */
106 		value =
107 		    (((u_int32_t)bytes[3]) << 24) |
108 		    (((u_int32_t)bytes[2]) << 16) |
109 		    (((u_int32_t)bytes[1]) << 8) |
110 		    ((u_int32_t)bytes[0]);
111 #endif /* LITTLE_ENDIAN */
112 	}
113 	return value;
114 }
115 
116 static inline u_int64_t
getblock64(const u_int64_t * p,int i)117 getblock64(const u_int64_t *p, int i)
118 {
119 	const u_int8_t *bytes = (const u_int8_t *)(void *)(uintptr_t)(p + i);
120 	u_int64_t value;
121 
122 	if (ALIGNED64(p)) {
123 		value = p[i];
124 	} else {
125 #if BYTE_ORDER == BIG_ENDIAN
126 		value =
127 		    (((u_int64_t)bytes[0]) << 56) |
128 		    (((u_int64_t)bytes[1]) << 48) |
129 		    (((u_int64_t)bytes[2]) << 40) |
130 		    (((u_int64_t)bytes[3]) << 32) |
131 		    (((u_int64_t)bytes[4]) << 24) |
132 		    (((u_int64_t)bytes[5]) << 16) |
133 		    (((u_int64_t)bytes[6]) << 8) |
134 		    ((u_int64_t)bytes[7]);
135 #else /* LITTLE_ENDIAN */
136 		value =
137 		    (((u_int64_t)bytes[7]) << 56) |
138 		    (((u_int64_t)bytes[6]) << 48) |
139 		    (((u_int64_t)bytes[5]) << 40) |
140 		    (((u_int64_t)bytes[4]) << 32) |
141 		    (((u_int64_t)bytes[3]) << 24) |
142 		    (((u_int64_t)bytes[2]) << 16) |
143 		    (((u_int64_t)bytes[1]) << 8) |
144 		    ((u_int64_t)bytes[0]);
145 #endif /* LITTLE_ENDIAN */
146 	}
147 	return value;
148 }
149 #endif /* !__i386__ && !__x86_64 && !__arm64__ */
150 
151 static inline u_int32_t
mh3_fmix32(u_int32_t h)152 mh3_fmix32(u_int32_t h)
153 {
154 	h ^= h >> 16;
155 	h *= 0x85ebca6b;
156 	h ^= h >> 13;
157 	h *= 0xc2b2ae35;
158 	h ^= h >> 16;
159 
160 	return h;
161 }
162 
163 static inline u_int64_t
mh3_fmix64(u_int64_t k)164 mh3_fmix64(u_int64_t k)
165 {
166 	k ^= k >> 33;
167 	k *= 0xff51afd7ed558ccdLLU;
168 	k ^= k >> 33;
169 	k *= 0xc4ceb9fe1a85ec53LLU;
170 	k ^= k >> 33;
171 
172 	return k;
173 }
174 
175 /*
176  * MurmurHash3_x86_32
177  */
178 #define MH3_X86_32_C1   0xcc9e2d51
179 #define MH3_X86_32_C2   0x1b873593
180 
181 u_int32_t
net_flowhash_mh3_x86_32(const void * key,u_int32_t len,const u_int32_t seed)182 net_flowhash_mh3_x86_32(const void *key, u_int32_t len, const u_int32_t seed)
183 {
184 	const u_int8_t *data = (const u_int8_t *)key;
185 	const u_int32_t nblocks = len / 4;
186 	const u_int32_t *blocks;
187 	const u_int8_t *tail;
188 	u_int32_t h1 = seed, k1;
189 	int i;
190 
191 	/* body */
192 	blocks = (const u_int32_t *)(const void *)(data + nblocks * 4);
193 
194 	for (i = -nblocks; i; i++) {
195 		k1 = getblock32(blocks, i);
196 
197 		k1 *= MH3_X86_32_C1;
198 		k1 = ROTL32(k1, 15);
199 		k1 *= MH3_X86_32_C2;
200 
201 		h1 ^= k1;
202 		h1 = ROTL32(h1, 13);
203 		h1 = h1 * 5 + 0xe6546b64;
204 	}
205 
206 	/* tail */
207 	tail = (const u_int8_t *)(const void *)(data + nblocks * 4);
208 	k1 = 0;
209 
210 	switch (len & 3) {
211 	case 3:
212 		k1 ^= tail[2] << 16;
213 		OS_FALLTHROUGH;
214 	case 2:
215 		k1 ^= tail[1] << 8;
216 		OS_FALLTHROUGH;
217 	case 1:
218 		k1 ^= tail[0];
219 		k1 *= MH3_X86_32_C1;
220 		k1 = ROTL32(k1, 15);
221 		k1 *= MH3_X86_32_C2;
222 		h1 ^= k1;
223 	}
224 	;
225 
226 	/* finalization */
227 	h1 ^= len;
228 
229 	h1 = mh3_fmix32(h1);
230 
231 	return h1;
232 }
233 
234 /*
235  * MurmurHash3_x64_128
236  */
237 #define MH3_X64_128_C1  0x87c37b91114253d5LLU
238 #define MH3_X64_128_C2  0x4cf5ad432745937fLLU
239 
240 u_int32_t
net_flowhash_mh3_x64_128(const void * key,u_int32_t len,const u_int32_t seed)241 net_flowhash_mh3_x64_128(const void *key, u_int32_t len, const u_int32_t seed)
242 {
243 	const u_int8_t *data = (const u_int8_t *)key;
244 	const u_int32_t nblocks = len / 16;
245 	const u_int64_t *blocks;
246 	const u_int8_t *tail;
247 	u_int64_t h1 = seed, k1;
248 	u_int64_t h2 = seed, k2;
249 	u_int32_t i;
250 
251 	/* body */
252 	blocks = (const u_int64_t *)(const void *)data;
253 
254 	for (i = 0; i < nblocks; i++) {
255 		k1 = getblock64(blocks, i * 2 + 0);
256 		k2 = getblock64(blocks, i * 2 + 1);
257 
258 		k1 *= MH3_X64_128_C1;
259 #if defined(__x86_64__)
260 		__asm__ ( "rol   $31, %[k1]\n\t" :[k1] "+r" (k1) : :);
261 #elif defined(__arm64__)
262 		__asm__ ( "ror   %[k1], %[k1], #(64-31)\n\t" :[k1] "+r" (k1) : :);
263 #else /* !__x86_64__ && !__arm64__ */
264 		k1 = ROTL64(k1, 31);
265 #endif /* !__x86_64__ && !__arm64__ */
266 		k1 *= MH3_X64_128_C2;
267 		h1 ^= k1;
268 
269 #if defined(__x86_64__)
270 		__asm__ ( "rol   $27, %[h1]\n\t" :[h1] "+r" (h1) : :);
271 #elif defined(__arm64__)
272 		__asm__ ( "ror   %[h1], %[h1], #(64-27)\n\t" :[h1] "+r" (h1) : :);
273 #else /* !__x86_64__ && !__arm64__ */
274 		h1 = ROTL64(h1, 27);
275 #endif /* !__x86_64__ && !__arm64__ */
276 		h1 += h2;
277 		h1 = h1 * 5 + 0x52dce729;
278 
279 		k2 *= MH3_X64_128_C2;
280 #if defined(__x86_64__)
281 		__asm__ ( "rol   $33, %[k2]\n\t" :[k2] "+r" (k2) : :);
282 #elif defined(__arm64__)
283 		__asm__ ( "ror   %[k2], %[k2], #(64-33)\n\t" :[k2] "+r" (k2) : :);
284 #else /* !__x86_64__ && !__arm64__ */
285 		k2 = ROTL64(k2, 33);
286 #endif /* !__x86_64__ && !__arm64__ */
287 		k2 *= MH3_X64_128_C1;
288 		h2 ^= k2;
289 
290 #if defined(__x86_64__)
291 		__asm__ ( "rol   $31, %[h2]\n\t" :[h2] "+r" (h2) : :);
292 #elif defined(__arm64__)
293 		__asm__ ( "ror   %[h2], %[h2], #(64-31)\n\t" :[h2] "+r" (h2) : :);
294 #else /* !__x86_64__ && !__arm64__ */
295 		h2 = ROTL64(h2, 31);
296 #endif /* !__x86_64__ && !__arm64__ */
297 		h2 += h1;
298 		h2 = h2 * 5 + 0x38495ab5;
299 	}
300 
301 	/* tail */
302 	tail = (const u_int8_t *)(const void *)(data + nblocks * 16);
303 	k1 = 0;
304 	k2 = 0;
305 
306 	switch (len & 15) {
307 	case 15:
308 		k2 ^= ((u_int64_t)tail[14]) << 48;
309 		OS_FALLTHROUGH;
310 	case 14:
311 		k2 ^= ((u_int64_t)tail[13]) << 40;
312 		OS_FALLTHROUGH;
313 	case 13:
314 		k2 ^= ((u_int64_t)tail[12]) << 32;
315 		OS_FALLTHROUGH;
316 	case 12:
317 		k2 ^= ((u_int64_t)tail[11]) << 24;
318 		OS_FALLTHROUGH;
319 	case 11:
320 		k2 ^= ((u_int64_t)tail[10]) << 16;
321 		OS_FALLTHROUGH;
322 	case 10:
323 		k2 ^= ((u_int64_t)tail[9]) << 8;
324 		OS_FALLTHROUGH;
325 	case 9:
326 		k2 ^= ((u_int64_t)tail[8]) << 0;
327 		k2 *= MH3_X64_128_C2;
328 #if defined(__x86_64__)
329 		__asm__ ( "rol   $33, %[k2]\n\t" :[k2] "+r" (k2) : :);
330 #elif defined(__arm64__)
331 		__asm__ ( "ror   %[k2], %[k2], #(64-33)\n\t" :[k2] "+r" (k2) : :);
332 #else /* !__x86_64__ && !__arm64__ */
333 		k2 = ROTL64(k2, 33);
334 #endif /* !__x86_64__ && !__arm64__ */
335 		k2 *= MH3_X64_128_C1;
336 		h2 ^= k2;
337 		OS_FALLTHROUGH;
338 	case 8:
339 		k1 ^= ((u_int64_t)tail[7]) << 56;
340 		OS_FALLTHROUGH;
341 	case 7:
342 		k1 ^= ((u_int64_t)tail[6]) << 48;
343 		OS_FALLTHROUGH;
344 	case 6:
345 		k1 ^= ((u_int64_t)tail[5]) << 40;
346 		OS_FALLTHROUGH;
347 	case 5:
348 		k1 ^= ((u_int64_t)tail[4]) << 32;
349 		OS_FALLTHROUGH;
350 	case 4:
351 		k1 ^= ((u_int64_t)tail[3]) << 24;
352 		OS_FALLTHROUGH;
353 	case 3:
354 		k1 ^= ((u_int64_t)tail[2]) << 16;
355 		OS_FALLTHROUGH;
356 	case 2:
357 		k1 ^= ((u_int64_t)tail[1]) << 8;
358 		OS_FALLTHROUGH;
359 	case 1:
360 		k1 ^= ((u_int64_t)tail[0]) << 0;
361 		k1 *= MH3_X64_128_C1;
362 #if defined(__x86_64__)
363 		__asm__ ( "rol   $31, %[k1]\n\t" :[k1] "+r" (k1) : :);
364 #elif defined(__arm64__)
365 		__asm__ ( "ror   %[k1], %[k1], #(64-31)\n\t" :[k1] "+r" (k1) : :);
366 #else /* !__x86_64__ && !__arm64__ */
367 		k1 = ROTL64(k1, 31);
368 #endif /* !__x86_64__ && !__arm64__ */
369 		k1 *= MH3_X64_128_C2;
370 		h1 ^= k1;
371 	}
372 	;
373 
374 	/* finalization */
375 	h1 ^= len;
376 	h2 ^= len;
377 
378 	h1 += h2;
379 	h2 += h1;
380 
381 	h1 = mh3_fmix64(h1);
382 	h2 = mh3_fmix64(h2);
383 
384 	h1 += h2;
385 	h2 += h1;
386 
387 	/* throw all but lowest 32-bit */
388 	return h1 & 0xffffffff;
389 }
390 
391 #define JHASH_INIT      0xdeadbeef
392 
393 #define JHASH_MIX(a, b, c) {                    \
394 	a -= c;  a ^= ROTL32(c, 4);   c += b;   \
395 	b -= a;  b ^= ROTL32(a, 6);   a += c;   \
396 	c -= b;  c ^= ROTL32(b, 8);   b += a;   \
397 	a -= c;  a ^= ROTL32(c, 16);  c += b;   \
398 	b -= a;  b ^= ROTL32(a, 19);  a += c;   \
399 	c -= b;  c ^= ROTL32(b, 4);   b += a;   \
400 }
401 
402 #define JHASH_FINAL(a, b, c) {                  \
403 	c ^= b;  c -= ROTL32(b, 14);            \
404 	a ^= c;  a -= ROTL32(c, 11);            \
405 	b ^= a;  b -= ROTL32(a, 25);            \
406 	c ^= b;  c -= ROTL32(b, 16);            \
407 	a ^= c;  a -= ROTL32(c, 4);             \
408 	b ^= a;  b -= ROTL32(a, 14);            \
409 	c ^= b;  c -= ROTL32(b, 24);            \
410 }
411 
412 #if BYTE_ORDER == BIG_ENDIAN
413 /*
414  * hashbig()
415  */
416 u_int32_t
net_flowhash_jhash(const void * key,u_int32_t len,const u_int32_t seed)417 net_flowhash_jhash(const void *key, u_int32_t len, const u_int32_t seed)
418 {
419 	u_int32_t a, b, c;
420 
421 	/* Set up the internal state */
422 	a = b = c = JHASH_INIT + len + seed;
423 
424 	if (ALIGNED32(key)) {
425 		/* read 32-bit chunks */
426 		const u_int32_t *k = (const u_int32_t *)key;
427 
428 		/*
429 		 * all but last block:
430 		 * aligned reads and affect 32 bits of (a,b,c)
431 		 */
432 		while (len > 12) {
433 			a += k[0];
434 			b += k[1];
435 			c += k[2];
436 			JHASH_MIX(a, b, c);
437 			len -= 12;
438 			k += 3;
439 		}
440 
441 		/*
442 		 * handle the last (probably partial) block
443 		 *
444 		 * "k[2] << 8" actually reads beyond the end of the string,
445 		 * but then shifts out the part it's not allowed to read.
446 		 * Because the string is aligned, the illegal read is in
447 		 * the same word as the rest of the string.  The masking
448 		 * trick does make the hash noticably faster for short
449 		 * strings (like English words).
450 		 */
451 		switch (len) {
452 		case 12:
453 			c += k[2];
454 			b += k[1];
455 			a += k[0];
456 			break;
457 
458 		case 11:
459 			c += k[2] & 0xffffff00;
460 			b += k[1];
461 			a += k[0];
462 			break;
463 
464 		case 10:
465 			c += k[2] & 0xffff0000;
466 			b += k[1];
467 			a += k[0];
468 			break;
469 
470 		case 9:
471 			c += k[2] & 0xff000000;
472 			b += k[1];
473 			a += k[0];
474 			break;
475 
476 		case 8:
477 			b += k[1];
478 			a += k[0];
479 			break;
480 
481 		case 7:
482 			b += k[1] & 0xffffff00;
483 			a += k[0];
484 			break;
485 
486 		case 6:
487 			b += k[1] & 0xffff0000;
488 			a += k[0];
489 			break;
490 
491 		case 5:
492 			b += k[1] & 0xff000000;
493 			a += k[0];
494 			break;
495 
496 		case 4:
497 			a += k[0];
498 			break;
499 
500 		case 3:
501 			a += k[0] & 0xffffff00;
502 			break;
503 
504 		case 2:
505 			a += k[0] & 0xffff0000;
506 			break;
507 
508 		case 1:
509 			a += k[0] & 0xff000000;
510 			break;
511 
512 		case 0:
513 			/* zero length requires no mixing */
514 			return c;
515 		}
516 
517 		JHASH_FINAL(a, b, c);
518 
519 		return c;
520 	}
521 
522 	/* need to read the key one byte at a time */
523 	const u_int8_t *k = (const u_int8_t *)key;
524 
525 	/* all but the last block: affect some 32 bits of (a,b,c) */
526 	while (len > 12) {
527 		a += ((u_int32_t)k[0]) << 24;
528 		a += ((u_int32_t)k[1]) << 16;
529 		a += ((u_int32_t)k[2]) << 8;
530 		a += ((u_int32_t)k[3]);
531 		b += ((u_int32_t)k[4]) << 24;
532 		b += ((u_int32_t)k[5]) << 16;
533 		b += ((u_int32_t)k[6]) << 8;
534 		b += ((u_int32_t)k[7]);
535 		c += ((u_int32_t)k[8]) << 24;
536 		c += ((u_int32_t)k[9]) << 16;
537 		c += ((u_int32_t)k[10]) << 8;
538 		c += ((u_int32_t)k[11]);
539 		JHASH_MIX(a, b, c);
540 		len -= 12;
541 		k += 12;
542 	}
543 
544 	/* last block: affect all 32 bits of (c) */
545 	switch (len) {
546 	case 12:
547 		c += k[11];
548 		OS_FALLTHROUGH;
549 	case 11:
550 		c += ((u_int32_t)k[10]) << 8;
551 		OS_FALLTHROUGH;
552 	case 10:
553 		c += ((u_int32_t)k[9]) << 16;
554 		OS_FALLTHROUGH;
555 	case 9:
556 		c += ((u_int32_t)k[8]) << 24;
557 		OS_FALLTHROUGH;
558 	case 8:
559 		b += k[7];
560 		OS_FALLTHROUGH;
561 	case 7:
562 		b += ((u_int32_t)k[6]) << 8;
563 		OS_FALLTHROUGH;
564 	case 6:
565 		b += ((u_int32_t)k[5]) << 16;
566 		OS_FALLTHROUGH;
567 	case 5:
568 		b += ((u_int32_t)k[4]) << 24;
569 		OS_FALLTHROUGH;
570 	case 4:
571 		a += k[3];
572 		OS_FALLTHROUGH;
573 	case 3:
574 		a += ((u_int32_t)k[2]) << 8;
575 		OS_FALLTHROUGH;
576 	case 2:
577 		a += ((u_int32_t)k[1]) << 16;
578 		OS_FALLTHROUGH;
579 	case 1:
580 		a += ((u_int32_t)k[0]) << 24;
581 		break;
582 
583 	case 0:
584 		/* zero length requires no mixing */
585 		return c;
586 	}
587 
588 	JHASH_FINAL(a, b, c);
589 
590 	return c;
591 }
592 #else /* LITTLE_ENDIAN */
593 /*
594  * hashlittle()
595  */
596 u_int32_t
net_flowhash_jhash(const void * key,u_int32_t len,const u_int32_t seed)597 net_flowhash_jhash(const void *key, u_int32_t len, const u_int32_t seed)
598 {
599 	u_int32_t a, b, c;
600 
601 	/* Set up the internal state */
602 	a = b = c = JHASH_INIT + len + seed;
603 
604 #if defined(__i386__) || defined(__x86_64__)
605 	/*
606 	 * On i386/x86_64, it is faster to read 32-bit chunks if the key
607 	 * is aligned 32-bit OR not 16-bit, and perform 16-bit reads if it
608 	 * is aligned 16-bit.
609 	 */
610 	if (ALIGNED32(key) || !ALIGNED16(key)) {
611 #else /* !defined(__i386__) && !defined(__x86_64__) */
612 	if (ALIGNED32(key)) {
613 #endif /* !defined(__i386__) && !defined(__x86_64__) */
614 		/* read 32-bit chunks */
615 		const u_int32_t *k = (const u_int32_t *)key;
616 
617 		/*
618 		 * all but last block:
619 		 * aligned reads and affect 32 bits of (a,b,c)
620 		 */
621 		while (len > 12) {
622 			a += k[0];
623 			b += k[1];
624 			c += k[2];
625 			JHASH_MIX(a, b, c);
626 			len -= 12;
627 			k += 3;
628 		}
629 
630 		/*
631 		 * handle the last (probably partial) block
632 		 *
633 		 * "k[2] & 0xffffff" actually reads beyond the end of the
634 		 * string, but then masks off the part it's not allowed
635 		 * to read.  Because the string is aligned, the masked-off
636 		 * tail is in the same word as the rest of the string.
637 		 * The masking trick does make the hash noticably faster
638 		 * for short strings (like English words).
639 		 */
640 		switch (len) {
641 		case 12:
642 			c += k[2];
643 			b += k[1];
644 			a += k[0];
645 			break;
646 
647 		case 11:
648 			c += k[2] & 0xffffff;
649 			b += k[1];
650 			a += k[0];
651 			break;
652 
653 		case 10:
654 			c += k[2] & 0xffff;
655 			b += k[1];
656 			a += k[0];
657 			break;
658 
659 		case 9:
660 			c += k[2] & 0xff;
661 			b += k[1];
662 			a += k[0];
663 			break;
664 
665 		case 8:
666 			b += k[1];
667 			a += k[0];
668 			break;
669 
670 		case 7:
671 			b += k[1] & 0xffffff;
672 			a += k[0];
673 			break;
674 
675 		case 6:
676 			b += k[1] & 0xffff;
677 			a += k[0];
678 			break;
679 
680 		case 5:
681 			b += k[1] & 0xff;
682 			a += k[0];
683 			break;
684 
685 		case 4:
686 			a += k[0];
687 			break;
688 
689 		case 3:
690 			a += k[0] & 0xffffff;
691 			break;
692 
693 		case 2:
694 			a += k[0] & 0xffff;
695 			break;
696 
697 		case 1:
698 			a += k[0] & 0xff;
699 			break;
700 
701 		case 0:
702 			/* zero length requires no mixing */
703 			return c;
704 		}
705 
706 		JHASH_FINAL(a, b, c);
707 
708 		return c;
709 	}
710 #if !defined(__i386__) && !defined(__x86_64__)
711 	else if (ALIGNED16(key)) {
712 #endif /* !defined(__i386__) && !defined(__x86_64__) */
713 	/* read 16-bit chunks */
714 	const u_int16_t *k = (const u_int16_t *)key;
715 	const u_int8_t *k8;
716 
717 	/* all but last block: aligned reads and different mixing */
718 	while (len > 12) {
719 		a += k[0] + (((u_int32_t)k[1]) << 16);
720 		b += k[2] + (((u_int32_t)k[3]) << 16);
721 		c += k[4] + (((u_int32_t)k[5]) << 16);
722 		JHASH_MIX(a, b, c);
723 		len -= 12;
724 		k += 6;
725 	}
726 
727 	/* handle the last (probably partial) block */
728 	k8 = (const u_int8_t *)k;
729 	switch (len) {
730 	case 12:
731 		c += k[4] + (((u_int32_t)k[5]) << 16);
732 		b += k[2] + (((u_int32_t)k[3]) << 16);
733 		a += k[0] + (((u_int32_t)k[1]) << 16);
734 		break;
735 
736 	case 11:
737 		c += ((u_int32_t)k8[10]) << 16;
738 		OS_FALLTHROUGH;
739 	case 10:
740 		c += k[4];
741 		b += k[2] + (((u_int32_t)k[3]) << 16);
742 		a += k[0] + (((u_int32_t)k[1]) << 16);
743 		break;
744 
745 	case 9:
746 		c += k8[8];
747 		OS_FALLTHROUGH;
748 	case 8:
749 		b += k[2] + (((u_int32_t)k[3]) << 16);
750 		a += k[0] + (((u_int32_t)k[1]) << 16);
751 		break;
752 
753 	case 7:
754 		b += ((u_int32_t)k8[6]) << 16;
755 		OS_FALLTHROUGH;
756 	case 6:
757 		b += k[2];
758 		a += k[0] + (((u_int32_t)k[1]) << 16);
759 		break;
760 
761 	case 5:
762 		b += k8[4];
763 		OS_FALLTHROUGH;
764 	case 4:
765 		a += k[0] + (((u_int32_t)k[1]) << 16);
766 		break;
767 
768 	case 3:
769 		a += ((u_int32_t)k8[2]) << 16;
770 		OS_FALLTHROUGH;
771 	case 2:
772 		a += k[0];
773 		break;
774 
775 	case 1:
776 		a += k8[0];
777 		break;
778 
779 	case 0:
780 		/* zero length requires no mixing */
781 		return c;
782 	}
783 
784 	JHASH_FINAL(a, b, c);
785 
786 	return c;
787 #if !defined(__i386__) && !defined(__x86_64__)
788 }
789 
790 /* need to read the key one byte at a time */
791 const u_int8_t *k = (const u_int8_t *)key;
792 
793 /* all but the last block: affect some 32 bits of (a,b,c) */
794 while (len > 12) {
795 	a += k[0];
796 	a += ((u_int32_t)k[1]) << 8;
797 	a += ((u_int32_t)k[2]) << 16;
798 	a += ((u_int32_t)k[3]) << 24;
799 	b += k[4];
800 	b += ((u_int32_t)k[5]) << 8;
801 	b += ((u_int32_t)k[6]) << 16;
802 	b += ((u_int32_t)k[7]) << 24;
803 	c += k[8];
804 	c += ((u_int32_t)k[9]) << 8;
805 	c += ((u_int32_t)k[10]) << 16;
806 	c += ((u_int32_t)k[11]) << 24;
807 	JHASH_MIX(a, b, c);
808 	len -= 12;
809 	k += 12;
810 }
811 
812 /* last block: affect all 32 bits of (c) */
813 switch (len) {
814 case 12:
815 	c += ((u_int32_t)k[11]) << 24;
816 	OS_FALLTHROUGH;
817 case 11:
818 	c += ((u_int32_t)k[10]) << 16;
819 	OS_FALLTHROUGH;
820 case 10:
821 	c += ((u_int32_t)k[9]) << 8;
822 	OS_FALLTHROUGH;
823 case 9:
824 	c += k[8];
825 	OS_FALLTHROUGH;
826 case 8:
827 	b += ((u_int32_t)k[7]) << 24;
828 	OS_FALLTHROUGH;
829 case 7:
830 	b += ((u_int32_t)k[6]) << 16;
831 	OS_FALLTHROUGH;
832 case 6:
833 	b += ((u_int32_t)k[5]) << 8;
834 	OS_FALLTHROUGH;
835 case 5:
836 	b += k[4];
837 	OS_FALLTHROUGH;
838 case 4:
839 	a += ((u_int32_t)k[3]) << 24;
840 	OS_FALLTHROUGH;
841 case 3:
842 	a += ((u_int32_t)k[2]) << 16;
843 	OS_FALLTHROUGH;
844 case 2:
845 	a += ((u_int32_t)k[1]) << 8;
846 	OS_FALLTHROUGH;
847 case 1:
848 	a += k[0];
849 	break;
850 
851 case 0:
852 	/* zero length requires no mixing */
853 	return c;
854 }
855 
856 JHASH_FINAL(a, b, c);
857 
858 return c;
859 #endif /* !defined(__i386__) && !defined(__x86_64__) */
860 }
861 #endif /* LITTLE_ENDIAN */
862