xref: /xnu-11417.140.69/bsd/net/flowhash.c (revision 43a90889846e00bfb5cf1d255cdc0a701a1e05a4)
1 /*
2  * Copyright (c) 2011-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * http://code.google.com/p/smhasher/
31  *
32  * Copyright (c) 2009-2011 Austin Appleby.
33  *
34  * MurmurHash3 was written by Austin Appleby, and is placed in the public
35  * domain. The author hereby disclaims copyright to this source code.
36  */
37 
38 /*
39  * http://burtleburtle.net/bob/hash/
40  *
41  * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
42  *
43  * You can use this free for any purpose.  It's in the public domain.
44  * It has no warranty.
45  */
46 
47 #include <stdbool.h>
48 #include <sys/types.h>
49 #include <machine/endian.h>
50 #include <machine/trap.h>
51 #include <net/flowhash.h>
52 #include <os/base.h>
53 
54 static inline u_int32_t getblock32(const u_int32_t *__bidi_indexable, int);
55 static inline u_int64_t getblock64(const u_int64_t *__bidi_indexable, int);
56 static inline u_int32_t mh3_fmix32(u_int32_t);
57 static inline u_int64_t mh3_fmix64(u_int64_t);
58 
59 #define ALIGNED16(v)    ((((uintptr_t)(v)) & 1) == 0)
60 #define ALIGNED32(v)    ((((uintptr_t)(v)) & 3) == 0)
61 #define ALIGNED64(v)    ((((uintptr_t)(v)) & 7) == 0)
62 
63 #define ROTL32(x, r)    (((x) << (r)) | ((x) >> (32 - (r))))
64 #define ROTL64(x, r)    (((x) << (r)) | ((x) >> (64 - (r))))
65 
66 /*
67  * The following hash algorithms are selected based on performance:
68  *
69  * 64-bit:	MurmurHash3_x64_128
70  * 32-bit:	JHash
71  */
72 #if   defined(__LP64__)
73 net_flowhash_fn_t *net_flowhash = net_flowhash_mh3_x64_128;
74 #else /* !__LP64__ */
75 net_flowhash_fn_t *net_flowhash = net_flowhash_jhash;
76 #endif /* !__LP64__ */
77 
78 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
79 static inline u_int32_t
getblock32(const u_int32_t * __bidi_indexable p,int i)80 getblock32(const u_int32_t *__bidi_indexable p, int i)
81 {
82 	return p[i];
83 }
84 
85 static inline u_int64_t
getblock64(const u_int64_t * __bidi_indexable p,int i)86 getblock64(const u_int64_t *__bidi_indexable p, int i)
87 {
88 	return p[i];
89 }
90 #else /* !__i386__ && !__x86_64__ && !__arm64__*/
91 static inline u_int32_t
getblock32(const u_int32_t * __bidi_indexable p,int i)92 getblock32(const u_int32_t *__bidi_indexable p, int i)
93 {
94 	const u_int8_t *bytes = (u_int8_t *)(void *)(uintptr_t)(p + i);
95 	u_int32_t value;
96 
97 	if (ALIGNED32(p)) {
98 		value = p[i];
99 	} else {
100 #if BYTE_ORDER == BIG_ENDIAN
101 		value =
102 		    (((u_int32_t)bytes[0]) << 24) |
103 		    (((u_int32_t)bytes[1]) << 16) |
104 		    (((u_int32_t)bytes[2]) << 8) |
105 		    ((u_int32_t)bytes[3]);
106 #else /* LITTLE_ENDIAN */
107 		value =
108 		    (((u_int32_t)bytes[3]) << 24) |
109 		    (((u_int32_t)bytes[2]) << 16) |
110 		    (((u_int32_t)bytes[1]) << 8) |
111 		    ((u_int32_t)bytes[0]);
112 #endif /* LITTLE_ENDIAN */
113 	}
114 	return value;
115 }
116 
117 static inline u_int64_t
getblock64(const u_int64_t * __bidi_indexable p,int i)118 getblock64(const u_int64_t *__bidi_indexable p, int i)
119 {
120 	const u_int8_t *bytes = (const u_int8_t *)(void *)(uintptr_t)(p + i);
121 	u_int64_t value;
122 
123 	if (ALIGNED64(p)) {
124 		value = p[i];
125 	} else {
126 #if BYTE_ORDER == BIG_ENDIAN
127 		value =
128 		    (((u_int64_t)bytes[0]) << 56) |
129 		    (((u_int64_t)bytes[1]) << 48) |
130 		    (((u_int64_t)bytes[2]) << 40) |
131 		    (((u_int64_t)bytes[3]) << 32) |
132 		    (((u_int64_t)bytes[4]) << 24) |
133 		    (((u_int64_t)bytes[5]) << 16) |
134 		    (((u_int64_t)bytes[6]) << 8) |
135 		    ((u_int64_t)bytes[7]);
136 #else /* LITTLE_ENDIAN */
137 		value =
138 		    (((u_int64_t)bytes[7]) << 56) |
139 		    (((u_int64_t)bytes[6]) << 48) |
140 		    (((u_int64_t)bytes[5]) << 40) |
141 		    (((u_int64_t)bytes[4]) << 32) |
142 		    (((u_int64_t)bytes[3]) << 24) |
143 		    (((u_int64_t)bytes[2]) << 16) |
144 		    (((u_int64_t)bytes[1]) << 8) |
145 		    ((u_int64_t)bytes[0]);
146 #endif /* LITTLE_ENDIAN */
147 	}
148 	return value;
149 }
150 #endif /* !__i386__ && !__x86_64 && !__arm64__ */
151 
152 static inline u_int32_t
mh3_fmix32(u_int32_t h)153 mh3_fmix32(u_int32_t h)
154 {
155 	h ^= h >> 16;
156 	h *= 0x85ebca6b;
157 	h ^= h >> 13;
158 	h *= 0xc2b2ae35;
159 	h ^= h >> 16;
160 
161 	return h;
162 }
163 
164 static inline u_int64_t
mh3_fmix64(u_int64_t k)165 mh3_fmix64(u_int64_t k)
166 {
167 	k ^= k >> 33;
168 	k *= 0xff51afd7ed558ccdLLU;
169 	k ^= k >> 33;
170 	k *= 0xc4ceb9fe1a85ec53LLU;
171 	k ^= k >> 33;
172 
173 	return k;
174 }
175 
176 /*
177  * MurmurHash3_x86_32
178  */
179 #define MH3_X86_32_C1   0xcc9e2d51
180 #define MH3_X86_32_C2   0x1b873593
181 
182 u_int32_t
net_flowhash_mh3_x86_32(const void * __sized_by (len)key,u_int32_t len,const u_int32_t seed)183 net_flowhash_mh3_x86_32(const void *__sized_by(len) key, u_int32_t len, const u_int32_t seed)
184 {
185 	const u_int8_t *data = (const u_int8_t *)key;
186 	const u_int32_t nblocks = len / 4;
187 	const u_int32_t *blocks;
188 	const u_int8_t *tail;
189 	u_int32_t h1 = seed, k1;
190 	int i;
191 
192 	/* body */
193 	blocks = (const u_int32_t *)(const void *)(data + nblocks * 4);
194 
195 	for (i = -nblocks; i; i++) {
196 		k1 = getblock32(blocks, i);
197 
198 		k1 *= MH3_X86_32_C1;
199 		k1 = ROTL32(k1, 15);
200 		k1 *= MH3_X86_32_C2;
201 
202 		h1 ^= k1;
203 		h1 = ROTL32(h1, 13);
204 		h1 = h1 * 5 + 0xe6546b64;
205 	}
206 
207 	/* tail */
208 	tail = (const u_int8_t *)(const void *)(data + nblocks * 4);
209 	k1 = 0;
210 
211 	switch (len & 3) {
212 	case 3:
213 		k1 ^= tail[2] << 16;
214 		OS_FALLTHROUGH;
215 	case 2:
216 		k1 ^= tail[1] << 8;
217 		OS_FALLTHROUGH;
218 	case 1:
219 		k1 ^= tail[0];
220 		k1 *= MH3_X86_32_C1;
221 		k1 = ROTL32(k1, 15);
222 		k1 *= MH3_X86_32_C2;
223 		h1 ^= k1;
224 	}
225 	;
226 
227 	/* finalization */
228 	h1 ^= len;
229 
230 	h1 = mh3_fmix32(h1);
231 
232 	return h1;
233 }
234 
235 /*
236  * MurmurHash3_x64_128
237  */
238 #define MH3_X64_128_C1  0x87c37b91114253d5LLU
239 #define MH3_X64_128_C2  0x4cf5ad432745937fLLU
240 
241 u_int32_t
net_flowhash_mh3_x64_128(const void * __sized_by (len)key,u_int32_t len,const u_int32_t seed)242 net_flowhash_mh3_x64_128(const void *__sized_by(len) key, u_int32_t len, const u_int32_t seed)
243 {
244 	const u_int8_t *data = (const u_int8_t *)key;
245 	const u_int32_t nblocks = len / 16;
246 	const u_int64_t *blocks;
247 	const u_int8_t *tail;
248 	u_int64_t h1 = seed, k1;
249 	u_int64_t h2 = seed, k2;
250 	u_int32_t i;
251 
252 	/* body */
253 	blocks = (const u_int64_t *)(const void *)data;
254 
255 	for (i = 0; i < nblocks; i++) {
256 		k1 = getblock64(blocks, i * 2 + 0);
257 		k2 = getblock64(blocks, i * 2 + 1);
258 
259 		k1 *= MH3_X64_128_C1;
260 #if defined(__x86_64__)
261 		__asm__ ( "rol   $31, %[k1]\n\t" :[k1] "+r" (k1) : :);
262 #elif defined(__arm64__)
263 		__asm__ ( "ror   %[k1], %[k1], #(64-31)\n\t" :[k1] "+r" (k1) : :);
264 #else /* !__x86_64__ && !__arm64__ */
265 		k1 = ROTL64(k1, 31);
266 #endif /* !__x86_64__ && !__arm64__ */
267 		k1 *= MH3_X64_128_C2;
268 		h1 ^= k1;
269 
270 #if defined(__x86_64__)
271 		__asm__ ( "rol   $27, %[h1]\n\t" :[h1] "+r" (h1) : :);
272 #elif defined(__arm64__)
273 		__asm__ ( "ror   %[h1], %[h1], #(64-27)\n\t" :[h1] "+r" (h1) : :);
274 #else /* !__x86_64__ && !__arm64__ */
275 		h1 = ROTL64(h1, 27);
276 #endif /* !__x86_64__ && !__arm64__ */
277 		h1 += h2;
278 		h1 = h1 * 5 + 0x52dce729;
279 
280 		k2 *= MH3_X64_128_C2;
281 #if defined(__x86_64__)
282 		__asm__ ( "rol   $33, %[k2]\n\t" :[k2] "+r" (k2) : :);
283 #elif defined(__arm64__)
284 		__asm__ ( "ror   %[k2], %[k2], #(64-33)\n\t" :[k2] "+r" (k2) : :);
285 #else /* !__x86_64__ && !__arm64__ */
286 		k2 = ROTL64(k2, 33);
287 #endif /* !__x86_64__ && !__arm64__ */
288 		k2 *= MH3_X64_128_C1;
289 		h2 ^= k2;
290 
291 #if defined(__x86_64__)
292 		__asm__ ( "rol   $31, %[h2]\n\t" :[h2] "+r" (h2) : :);
293 #elif defined(__arm64__)
294 		__asm__ ( "ror   %[h2], %[h2], #(64-31)\n\t" :[h2] "+r" (h2) : :);
295 #else /* !__x86_64__ && !__arm64__ */
296 		h2 = ROTL64(h2, 31);
297 #endif /* !__x86_64__ && !__arm64__ */
298 		h2 += h1;
299 		h2 = h2 * 5 + 0x38495ab5;
300 	}
301 
302 	/* tail */
303 	tail = (const u_int8_t *)(const void *)(data + nblocks * 16);
304 	k1 = 0;
305 	k2 = 0;
306 
307 	switch (len & 15) {
308 	case 15:
309 		k2 ^= ((u_int64_t)tail[14]) << 48;
310 		OS_FALLTHROUGH;
311 	case 14:
312 		k2 ^= ((u_int64_t)tail[13]) << 40;
313 		OS_FALLTHROUGH;
314 	case 13:
315 		k2 ^= ((u_int64_t)tail[12]) << 32;
316 		OS_FALLTHROUGH;
317 	case 12:
318 		k2 ^= ((u_int64_t)tail[11]) << 24;
319 		OS_FALLTHROUGH;
320 	case 11:
321 		k2 ^= ((u_int64_t)tail[10]) << 16;
322 		OS_FALLTHROUGH;
323 	case 10:
324 		k2 ^= ((u_int64_t)tail[9]) << 8;
325 		OS_FALLTHROUGH;
326 	case 9:
327 		k2 ^= ((u_int64_t)tail[8]) << 0;
328 		k2 *= MH3_X64_128_C2;
329 #if defined(__x86_64__)
330 		__asm__ ( "rol   $33, %[k2]\n\t" :[k2] "+r" (k2) : :);
331 #elif defined(__arm64__)
332 		__asm__ ( "ror   %[k2], %[k2], #(64-33)\n\t" :[k2] "+r" (k2) : :);
333 #else /* !__x86_64__ && !__arm64__ */
334 		k2 = ROTL64(k2, 33);
335 #endif /* !__x86_64__ && !__arm64__ */
336 		k2 *= MH3_X64_128_C1;
337 		h2 ^= k2;
338 		OS_FALLTHROUGH;
339 	case 8:
340 		k1 ^= ((u_int64_t)tail[7]) << 56;
341 		OS_FALLTHROUGH;
342 	case 7:
343 		k1 ^= ((u_int64_t)tail[6]) << 48;
344 		OS_FALLTHROUGH;
345 	case 6:
346 		k1 ^= ((u_int64_t)tail[5]) << 40;
347 		OS_FALLTHROUGH;
348 	case 5:
349 		k1 ^= ((u_int64_t)tail[4]) << 32;
350 		OS_FALLTHROUGH;
351 	case 4:
352 		k1 ^= ((u_int64_t)tail[3]) << 24;
353 		OS_FALLTHROUGH;
354 	case 3:
355 		k1 ^= ((u_int64_t)tail[2]) << 16;
356 		OS_FALLTHROUGH;
357 	case 2:
358 		k1 ^= ((u_int64_t)tail[1]) << 8;
359 		OS_FALLTHROUGH;
360 	case 1:
361 		k1 ^= ((u_int64_t)tail[0]) << 0;
362 		k1 *= MH3_X64_128_C1;
363 #if defined(__x86_64__)
364 		__asm__ ( "rol   $31, %[k1]\n\t" :[k1] "+r" (k1) : :);
365 #elif defined(__arm64__)
366 		__asm__ ( "ror   %[k1], %[k1], #(64-31)\n\t" :[k1] "+r" (k1) : :);
367 #else /* !__x86_64__ && !__arm64__ */
368 		k1 = ROTL64(k1, 31);
369 #endif /* !__x86_64__ && !__arm64__ */
370 		k1 *= MH3_X64_128_C2;
371 		h1 ^= k1;
372 	}
373 	;
374 
375 	/* finalization */
376 	h1 ^= len;
377 	h2 ^= len;
378 
379 	h1 += h2;
380 	h2 += h1;
381 
382 	h1 = mh3_fmix64(h1);
383 	h2 = mh3_fmix64(h2);
384 
385 	h1 += h2;
386 	h2 += h1;
387 
388 	/* throw all but lowest 32-bit */
389 	return h1 & 0xffffffff;
390 }
391 
392 #define JHASH_INIT      0xdeadbeef
393 
394 #define JHASH_MIX(a, b, c) {                    \
395 	a -= c;  a ^= ROTL32(c, 4);   c += b;   \
396 	b -= a;  b ^= ROTL32(a, 6);   a += c;   \
397 	c -= b;  c ^= ROTL32(b, 8);   b += a;   \
398 	a -= c;  a ^= ROTL32(c, 16);  c += b;   \
399 	b -= a;  b ^= ROTL32(a, 19);  a += c;   \
400 	c -= b;  c ^= ROTL32(b, 4);   b += a;   \
401 }
402 
403 #define JHASH_FINAL(a, b, c) {                  \
404 	c ^= b;  c -= ROTL32(b, 14);            \
405 	a ^= c;  a -= ROTL32(c, 11);            \
406 	b ^= a;  b -= ROTL32(a, 25);            \
407 	c ^= b;  c -= ROTL32(b, 16);            \
408 	a ^= c;  a -= ROTL32(c, 4);             \
409 	b ^= a;  b -= ROTL32(a, 14);            \
410 	c ^= b;  c -= ROTL32(b, 24);            \
411 }
412 
413 #if BYTE_ORDER == BIG_ENDIAN
414 /*
415  * hashbig()
416  */
417 u_int32_t
net_flowhash_jhash(const void * __sized_by (len)key,u_int32_t len,const u_int32_t seed)418 net_flowhash_jhash(const void *__sized_by(len) key, u_int32_t len, const u_int32_t seed)
419 {
420 	u_int32_t a, b, c;
421 
422 	/* Set up the internal state */
423 	a = b = c = JHASH_INIT + len + seed;
424 
425 	if (ALIGNED32(key)) {
426 		/* read 32-bit chunks */
427 		u_int32_t k_len = len;
428 		const u_int32_t *__sized_by(k_len) k = (const u_int32_t *)key;
429 
430 		/*
431 		 * all but last block:
432 		 * aligned reads and affect 32 bits of (a,b,c)
433 		 */
434 		while (k_len > 12) {
435 			a += k[0];
436 			b += k[1];
437 			c += k[2];
438 			JHASH_MIX(a, b, c);
439 			k_len -= 12;
440 			k += 3;
441 		}
442 
443 		/*
444 		 * handle the last (probably partial) block
445 		 *
446 		 * "k[2] << 8" actually reads beyond the end of the string,
447 		 * but then shifts out the part it's not allowed to read.
448 		 * Because the string is aligned, the illegal read is in
449 		 * the same word as the rest of the string.  The masking
450 		 * trick does make the hash noticably faster for short
451 		 * strings (like English words).
452 		 */
453 		switch (k_len) {
454 		case 12:
455 			c += k[2];
456 			b += k[1];
457 			a += k[0];
458 			break;
459 
460 		case 11:
461 			c += k[2] & 0xffffff00;
462 			b += k[1];
463 			a += k[0];
464 			break;
465 
466 		case 10:
467 			c += k[2] & 0xffff0000;
468 			b += k[1];
469 			a += k[0];
470 			break;
471 
472 		case 9:
473 			c += k[2] & 0xff000000;
474 			b += k[1];
475 			a += k[0];
476 			break;
477 
478 		case 8:
479 			b += k[1];
480 			a += k[0];
481 			break;
482 
483 		case 7:
484 			b += k[1] & 0xffffff00;
485 			a += k[0];
486 			break;
487 
488 		case 6:
489 			b += k[1] & 0xffff0000;
490 			a += k[0];
491 			break;
492 
493 		case 5:
494 			b += k[1] & 0xff000000;
495 			a += k[0];
496 			break;
497 
498 		case 4:
499 			a += k[0];
500 			break;
501 
502 		case 3:
503 			a += k[0] & 0xffffff00;
504 			break;
505 
506 		case 2:
507 			a += k[0] & 0xffff0000;
508 			break;
509 
510 		case 1:
511 			a += k[0] & 0xff000000;
512 			break;
513 
514 		case 0:
515 			/* zero length requires no mixing */
516 			return c;
517 		}
518 
519 		JHASH_FINAL(a, b, c);
520 
521 		return c;
522 	}
523 
524 	/* need to read the key one byte at a time */
525 	u_int32_t k_len = len;
526 	const u_int8_t *__sized_by(k_len) k = (const u_int8_t *)key;
527 
528 	/* all but the last block: affect some 32 bits of (a,b,c) */
529 	while (k_len > 12) {
530 		a += ((u_int32_t)k[0]) << 24;
531 		a += ((u_int32_t)k[1]) << 16;
532 		a += ((u_int32_t)k[2]) << 8;
533 		a += ((u_int32_t)k[3]);
534 		b += ((u_int32_t)k[4]) << 24;
535 		b += ((u_int32_t)k[5]) << 16;
536 		b += ((u_int32_t)k[6]) << 8;
537 		b += ((u_int32_t)k[7]);
538 		c += ((u_int32_t)k[8]) << 24;
539 		c += ((u_int32_t)k[9]) << 16;
540 		c += ((u_int32_t)k[10]) << 8;
541 		c += ((u_int32_t)k[11]);
542 		JHASH_MIX(a, b, c);
543 		k_len -= 12;
544 		k += 12;
545 	}
546 
547 	/* last block: affect all 32 bits of (c) */
548 	switch (k_len) {
549 	case 12:
550 		c += k[11];
551 		OS_FALLTHROUGH;
552 	case 11:
553 		c += ((u_int32_t)k[10]) << 8;
554 		OS_FALLTHROUGH;
555 	case 10:
556 		c += ((u_int32_t)k[9]) << 16;
557 		OS_FALLTHROUGH;
558 	case 9:
559 		c += ((u_int32_t)k[8]) << 24;
560 		OS_FALLTHROUGH;
561 	case 8:
562 		b += k[7];
563 		OS_FALLTHROUGH;
564 	case 7:
565 		b += ((u_int32_t)k[6]) << 8;
566 		OS_FALLTHROUGH;
567 	case 6:
568 		b += ((u_int32_t)k[5]) << 16;
569 		OS_FALLTHROUGH;
570 	case 5:
571 		b += ((u_int32_t)k[4]) << 24;
572 		OS_FALLTHROUGH;
573 	case 4:
574 		a += k[3];
575 		OS_FALLTHROUGH;
576 	case 3:
577 		a += ((u_int32_t)k[2]) << 8;
578 		OS_FALLTHROUGH;
579 	case 2:
580 		a += ((u_int32_t)k[1]) << 16;
581 		OS_FALLTHROUGH;
582 	case 1:
583 		a += ((u_int32_t)k[0]) << 24;
584 		break;
585 
586 	case 0:
587 		/* zero length requires no mixing */
588 		return c;
589 	}
590 
591 	JHASH_FINAL(a, b, c);
592 
593 	return c;
594 }
595 #else /* LITTLE_ENDIAN */
596 /*
597  * hashlittle()
598  */
599 u_int32_t
net_flowhash_jhash(const void * __sized_by (len)key,u_int32_t len,const u_int32_t seed)600 net_flowhash_jhash(const void *__sized_by(len) key, u_int32_t len, const u_int32_t seed)
601 {
602 	u_int32_t a, b, c;
603 
604 	/* Set up the internal state */
605 	a = b = c = JHASH_INIT + len + seed;
606 
607 #if defined(__i386__) || defined(__x86_64__)
608 	/*
609 	 * On i386/x86_64, it is faster to read 32-bit chunks if the key
610 	 * is aligned 32-bit OR not 16-bit, and perform 16-bit reads if it
611 	 * is aligned 16-bit.
612 	 */
613 	if (ALIGNED32(key) || !ALIGNED16(key)) {
614 #else /* !defined(__i386__) && !defined(__x86_64__) */
615 	if (ALIGNED32(key)) {
616 #endif /* !defined(__i386__) && !defined(__x86_64__) */
617 		/* read 32-bit chunks */
618 		u_int32_t k_len = len;
619 		const u_int32_t *__sized_by(k_len) k = (const u_int32_t *)key;
620 		const u_int16_t *k16 = (const u_int16_t *)key;
621 		const u_int8_t *k8 = (const u_int8_t *)key;
622 
623 		/*
624 		 * all but last block:
625 		 * aligned reads and affect 32 bits of (a,b,c)
626 		 */
627 		while (k_len > 12) {
628 			a += k[0];
629 			b += k[1];
630 			c += k[2];
631 			JHASH_MIX(a, b, c);
632 			k_len -= 12;
633 			k += 3;
634 		}
635 
636 		/* handle the last (probably partial) block */
637 		switch (k_len) {
638 		case 12:
639 			c += k[2];
640 			b += k[1];
641 			a += k[0];
642 			break;
643 
644 		case 11:
645 			c += ((u_int32_t)k8[10]) << 16;
646 			c += k16[4];
647 			b += k[1];
648 			a += k[0];
649 			break;
650 
651 		case 10:
652 			c += k16[4];
653 			b += k[1];
654 			a += k[0];
655 			break;
656 
657 		case 9:
658 			c += k8[8];
659 			b += k[1];
660 			a += k[0];
661 			break;
662 
663 		case 8:
664 			b += k[1];
665 			a += k[0];
666 			break;
667 
668 		case 7:
669 			b += ((u_int32_t)k8[6]) << 16;
670 			b += k16[2];
671 			a += k[0];
672 			break;
673 
674 		case 6:
675 			b += k16[2];
676 			a += k[0];
677 			break;
678 
679 		case 5:
680 			b += k8[4];
681 			a += k[0];
682 			break;
683 
684 		case 4:
685 			a += k[0];
686 			break;
687 
688 		case 3:
689 			a += ((u_int32_t)k8[2]) << 16;
690 			a += k16[0];
691 			break;
692 
693 		case 2:
694 			a += k16[0];
695 			break;
696 
697 		case 1:
698 			a += k8[0];
699 			break;
700 
701 		case 0:
702 			/* zero length requires no mixing */
703 			return c;
704 		}
705 
706 		JHASH_FINAL(a, b, c);
707 
708 		return c;
709 	}
710 #if !defined(__i386__) && !defined(__x86_64__)
711 	else if (ALIGNED16(key)) {
712 #endif /* !defined(__i386__) && !defined(__x86_64__) */
713 	/* read 16-bit chunks */
714 	u_int32_t k_len = len;
715 	const u_int16_t *__sized_by(k_len) k = (const u_int16_t *)key;
716 	const u_int8_t *k8;
717 
718 	/* all but last block: aligned reads and different mixing */
719 	while (k_len > 12) {
720 		a += k[0] + (((u_int32_t)k[1]) << 16);
721 		b += k[2] + (((u_int32_t)k[3]) << 16);
722 		c += k[4] + (((u_int32_t)k[5]) << 16);
723 		JHASH_MIX(a, b, c);
724 		k_len -= 12;
725 		k += 6;
726 	}
727 
728 	/* handle the last (probably partial) block */
729 	k8 = (const u_int8_t *)k;
730 	switch (k_len) {
731 	case 12:
732 		c += k[4] + (((u_int32_t)k[5]) << 16);
733 		b += k[2] + (((u_int32_t)k[3]) << 16);
734 		a += k[0] + (((u_int32_t)k[1]) << 16);
735 		break;
736 
737 	case 11:
738 		c += ((u_int32_t)k8[10]) << 16;
739 		OS_FALLTHROUGH;
740 	case 10:
741 		c += k[4];
742 		b += k[2] + (((u_int32_t)k[3]) << 16);
743 		a += k[0] + (((u_int32_t)k[1]) << 16);
744 		break;
745 
746 	case 9:
747 		c += k8[8];
748 		OS_FALLTHROUGH;
749 	case 8:
750 		b += k[2] + (((u_int32_t)k[3]) << 16);
751 		a += k[0] + (((u_int32_t)k[1]) << 16);
752 		break;
753 
754 	case 7:
755 		b += ((u_int32_t)k8[6]) << 16;
756 		OS_FALLTHROUGH;
757 	case 6:
758 		b += k[2];
759 		a += k[0] + (((u_int32_t)k[1]) << 16);
760 		break;
761 
762 	case 5:
763 		b += k8[4];
764 		OS_FALLTHROUGH;
765 	case 4:
766 		a += k[0] + (((u_int32_t)k[1]) << 16);
767 		break;
768 
769 	case 3:
770 		a += ((u_int32_t)k8[2]) << 16;
771 		OS_FALLTHROUGH;
772 	case 2:
773 		a += k[0];
774 		break;
775 
776 	case 1:
777 		a += k8[0];
778 		break;
779 
780 	case 0:
781 		/* zero length requires no mixing */
782 		return c;
783 	}
784 
785 	JHASH_FINAL(a, b, c);
786 
787 	return c;
788 #if !defined(__i386__) && !defined(__x86_64__)
789 }
790 
791 /* need to read the key one byte at a time */
792 u_int32_t k_len = len;
793 const u_int8_t *__sized_by(k_len) k = (const u_int8_t *)key;
794 
795 /* all but the last block: affect some 32 bits of (a,b,c) */
796 while (k_len > 12) {
797 	a += k[0];
798 	a += ((u_int32_t)k[1]) << 8;
799 	a += ((u_int32_t)k[2]) << 16;
800 	a += ((u_int32_t)k[3]) << 24;
801 	b += k[4];
802 	b += ((u_int32_t)k[5]) << 8;
803 	b += ((u_int32_t)k[6]) << 16;
804 	b += ((u_int32_t)k[7]) << 24;
805 	c += k[8];
806 	c += ((u_int32_t)k[9]) << 8;
807 	c += ((u_int32_t)k[10]) << 16;
808 	c += ((u_int32_t)k[11]) << 24;
809 	JHASH_MIX(a, b, c);
810 	k_len -= 12;
811 	k += 12;
812 }
813 
814 /* last block: affect all 32 bits of (c) */
815 switch (k_len) {
816 case 12:
817 	c += ((u_int32_t)k[11]) << 24;
818 	OS_FALLTHROUGH;
819 case 11:
820 	c += ((u_int32_t)k[10]) << 16;
821 	OS_FALLTHROUGH;
822 case 10:
823 	c += ((u_int32_t)k[9]) << 8;
824 	OS_FALLTHROUGH;
825 case 9:
826 	c += k[8];
827 	OS_FALLTHROUGH;
828 case 8:
829 	b += ((u_int32_t)k[7]) << 24;
830 	OS_FALLTHROUGH;
831 case 7:
832 	b += ((u_int32_t)k[6]) << 16;
833 	OS_FALLTHROUGH;
834 case 6:
835 	b += ((u_int32_t)k[5]) << 8;
836 	OS_FALLTHROUGH;
837 case 5:
838 	b += k[4];
839 	OS_FALLTHROUGH;
840 case 4:
841 	a += ((u_int32_t)k[3]) << 24;
842 	OS_FALLTHROUGH;
843 case 3:
844 	a += ((u_int32_t)k[2]) << 16;
845 	OS_FALLTHROUGH;
846 case 2:
847 	a += ((u_int32_t)k[1]) << 8;
848 	OS_FALLTHROUGH;
849 case 1:
850 	a += k[0];
851 	break;
852 
853 case 0:
854 	/* zero length requires no mixing */
855 	return c;
856 }
857 
858 JHASH_FINAL(a, b, c);
859 
860 return c;
861 #endif /* !defined(__i386__) && !defined(__x86_64__) */
862 }
863 #endif /* LITTLE_ENDIAN */
864