xref: /xnu-8796.101.5/bsd/skywalk/lib/cuckoo_hashtable.c (revision aca3beaa3dfbd42498b42c5e5ce20a938e6554e5)
1 /*
2  * Copyright (c) 2018-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 #include <skywalk/os_skywalk_private.h>
29 
30 #include "cuckoo_hashtable.h"
31 
32 #define CUCKOO_TAG "com.apple.skywalk.libcuckoo"
33 SKMEM_TAG_DEFINE(cuckoo_tag, CUCKOO_TAG);
34 
35 
36 SYSCTL_NODE(_kern_skywalk, OID_AUTO, libcuckoo, CTLFLAG_RW | CTLFLAG_LOCKED,
37     0, "Skywalk Cuckoo Hashtable Library");
38 
39 uint32_t cuckoo_verbose = 0;
40 #if (DEVELOPMENT || DEBUG)
41 SYSCTL_UINT(_kern_skywalk_libcuckoo, OID_AUTO, verbose,
42     CTLFLAG_RW | CTLFLAG_LOCKED, &cuckoo_verbose, 0, "");
43 #endif /* DEVELOPMENT || DEBUG */
44 
45 typedef enum cht_verb {
46 	CHTV_ERR = 0,
47 	CHTV_WARN = 1,
48 	CHTV_INFO = 2,
49 	CHTV_DEBUG = 3,
50 } cht_verb_t;
51 
52 static LCK_GRP_DECLARE(cht_lock_group, "CHT_LOCK");
53 static LCK_ATTR_DECLARE(cht_lock_attr, 0, 0);
54 
55 #if SK_LOG
56 #define cht_log(level, _fmt, ...)       \
57 	do {    \
58 	        if (level <= cuckoo_verbose) {  \
59 	                kprintf("Cuckoo: thread %p %-30s " _fmt "\n",   \
60 	                    current_thread(), __FUNCTION__, ##__VA_ARGS__);     \
61 	        }       \
62 	} while (0);
63 #else  /* !SK_LOG */
64 #define cht_log(_flag, _fmt, ...) do { ((void)0); } while (0)
65 #endif /* !SK_LOG */
66 
67 #define cht_err(_fmt, ...) cht_log(CHTV_ERR, _fmt, ##__VA_ARGS__)
68 #define cht_warn(_fmt, ...) cht_log(CHTV_WARN, _fmt, ##__VA_ARGS__)
69 #define cht_info(_fmt, ...) cht_log(CHTV_INFO, _fmt, ##__VA_ARGS__)
70 #define cht_debug(_fmt, ...) cht_log(CHTV_DEBUG, _fmt, ##__VA_ARGS__)
71 
72 static inline int
cuckoo_node_chain(struct cuckoo_node * node,struct cuckoo_node * new_node)73 cuckoo_node_chain(struct cuckoo_node *node,
74     struct cuckoo_node *new_node)
75 {
76 	struct cuckoo_node *prev_node = node;
77 
78 	/* new node must be zero initialized */
79 	ASSERT(new_node->next == NULL);
80 
81 	/* use tail insert to check for duplicate along list */
82 	while (__improbable(node != NULL)) {
83 		if (node == new_node) {
84 			return EEXIST;
85 		}
86 		prev_node = node;
87 		node = node->next;
88 	}
89 
90 	prev_node->next = new_node;
91 
92 	return 0;
93 }
94 
95 static inline bool
cuckoo_node_del(struct cuckoo_node ** pnode,struct cuckoo_node * del_node)96 cuckoo_node_del(struct cuckoo_node **pnode,
97     struct cuckoo_node *del_node)
98 {
99 	ASSERT(pnode != NULL);
100 
101 	struct cuckoo_node *node = *pnode;
102 	while (node != NULL && node != del_node) {
103 		pnode = &node->next;
104 		node = node->next;
105 	}
106 	if (__probable(node != NULL)) {
107 		*pnode = node->next;
108 		node->next = NULL;
109 		return true;
110 	}
111 
112 	return false;
113 }
114 
115 static inline void
cuckoo_node_set_next(struct cuckoo_node * node,struct cuckoo_node * next_node)116 cuckoo_node_set_next(struct cuckoo_node *node, struct cuckoo_node *next_node)
117 {
118 	node->next = next_node;
119 }
120 
121 /* We probably won't add RCU soon so use simple pointer reference for now */
122 static inline struct cuckoo_node *
cuckoo_node_next(struct cuckoo_node * node)123 cuckoo_node_next(struct cuckoo_node *node)
124 {
125 	return node->next;
126 }
127 
128 #define _CHT_MAX_LOAD_SHRINK 40       /* at least below 40% load to shrink */
129 #define _CHT_MIN_LOAD_EXPAND 85       /* cuckoo could hold 85% full table */
130 
131 enum cuckoo_resize_ops {
132 	_CHT_RESIZE_EXPAND = 0,
133 	_CHT_RESIZE_SHRINK = 1,
134 };
135 
136 /*
137  * Following classic Cuckoo hash table design, cuckoo_hashtable use k hash
138  * functions to derive multiple candidate hash table bucket indexes.
139  * Here cuckoo_hashtable use k=2.
140  *     prim_bkt_idx = bkt_idx[1] = hash[1](key) % N_BUCKETS
141  *     alt_bkt_idx  = bkt_idx[2] = hash[2](key) % N_BUCKETS
142  *
143  * Currently, we let the caller pass in the actual key's hash value, because
144  * in most of the use cases, caller probably have already calculated the hash
145  * value of actual key (e.g. using hardware offloading or copy+hash). This also
146  * save us from storing the key in the table (or any side data structure). So
147  *
148  *     hash[1] = hash    // hash(hash value) passed in from caller
149  *     hash[2] = __alt_hash(hash[1])
150  *
151  * __alt_hash derives h2 using h1's high bits, since calculating primary
152  * bucket index uses its low bits. So alt_hash is still a uniformly distributed
153  * random variable (but not independent of h1, but is fine for hashtable usage).
154  *
155  * There is option to store h2 in the table bucket as well but cuckoo_hashtable
156  * is not doing this to use less memory usage with the small price of a few
157  * more cpu cycles during add/del operation. Assuming that the hashtable is
158  * read-heavy rather than write-heavy, this is reasonable.
159  *
160  * In the rare case of full hash value collision, where
161  *     hash[1] == hash[1]'
162  * , there is no way for the hash table to differentiate two objects, thus we
163  * need to chain the fully collided objects under the same bucket slot.
164  * The caller need to walk the chain to explicitly compare the full length key
165  * to find the correct object.
166  *
167  * Reference Counting
168  * The hashtable assumes all objects are reference counted. It takes function
169  * pointers that retain and release the object.
170  * Adding to the table will call its retain function.
171  * Deleting from the table will call its release function.
172  *
173  */
174 
175 /* hash might be zero, so always use _node == NULL to test empty slot */
176 struct _slot {
177 	uint32_t                _hash;
178 	struct cuckoo_node      *_node;
179 };
180 
181 /*
182  * Cuckoo hashtable cache line awareness:
183  *   - ARM platform has 128B CPU cache line.
184  *   - Intel platform has 64B CPU cache line. However, hardware prefetcher
185  *     treats cache lines as 128B chunk and prefetch the other 64B cache line.
186  *
187  * Thus cuckoo_hashtable use 128B as bucket size to make best use CPU cache
188  * resource.
189  */
190 #define _CHT_CACHELINE_CHUNK 128
191 #define _CHT_SLOT_INVAL UINT8_MAX
192 static const uint8_t _CHT_BUCKET_SLOTS =
193     ((_CHT_CACHELINE_CHUNK - sizeof(lck_mtx_t) - sizeof(uint8_t)) /
194     sizeof(struct _slot));
195 
196 struct _bucket {
197 	struct _slot            _slots[_CHT_BUCKET_SLOTS];
198 	decl_lck_mtx_data(, _lock);
199 	uint8_t                 _inuse;
200 } __attribute__((aligned(_CHT_CACHELINE_CHUNK)));
201 
202 struct cuckoo_hashtable {
203 	uint32_t        _bitmask;       /* 1s' mask for quick MOD */
204 	uint32_t        _n_buckets;     /* number of buckets */
205 
206 	volatile uint32_t _n_entries;   /* number of entires in table */
207 	uint32_t          _capacity;    /* max number of entires */
208 	uint32_t          _rcapacity;   /* requested capacity */
209 
210 	bool            _busy;
211 	uint32_t        _resize_waiters;
212 	decl_lck_rw_data(, _resize_lock);
213 	decl_lck_mtx_data(, _lock);
214 
215 	struct _bucket  *_buckets;
216 
217 	int (*_obj_cmp)(struct cuckoo_node *node, void *key);
218 	void (*_obj_retain)(struct cuckoo_node *);
219 	void (*_obj_release)(struct cuckoo_node *);
220 } __attribute__((aligned(_CHT_CACHELINE_CHUNK)));
221 
222 static_assert(sizeof(struct _bucket) <= _CHT_CACHELINE_CHUNK);
223 
224 static inline void
__slot_set(struct _slot * slt,uint32_t hash,struct cuckoo_node * node)225 __slot_set(struct _slot *slt, uint32_t hash, struct cuckoo_node *node)
226 {
227 	slt->_hash = hash;
228 	slt->_node = node;
229 }
230 
231 static inline void
__slot_reset(struct _slot * slt)232 __slot_reset(struct _slot *slt)
233 {
234 	slt->_hash = 0;
235 	slt->_node = NULL;
236 }
237 
238 static inline uint32_t
__alt_hash(uint32_t hash)239 __alt_hash(uint32_t hash)
240 {
241 #define _CHT_ALT_HASH_MIX       0x5bd1e995      /* Murmur hash mix */
242 	uint32_t tag = hash >> 16;
243 	uint32_t alt_hash = hash ^ ((tag + 1) * _CHT_ALT_HASH_MIX);
244 	return alt_hash;
245 }
246 
247 static inline struct _bucket *
__get_bucket(struct cuckoo_hashtable * h,uint32_t b_i)248 __get_bucket(struct cuckoo_hashtable *h, uint32_t b_i)
249 {
250 	return &h->_buckets[b_i];
251 }
252 
253 static inline struct _bucket *
__prim_bucket(struct cuckoo_hashtable * h,uint32_t hash)254 __prim_bucket(struct cuckoo_hashtable *h, uint32_t hash)
255 {
256 	return __get_bucket(h, hash & h->_bitmask);
257 }
258 
259 static inline struct _bucket *
__alt_bucket(struct cuckoo_hashtable * h,uint32_t hash)260 __alt_bucket(struct cuckoo_hashtable *h, uint32_t hash)
261 {
262 	return __get_bucket(h, __alt_hash(hash) & h->_bitmask);
263 }
264 
265 #if SK_LOG
266 static inline size_t
__bucket_idx(struct cuckoo_hashtable * h,struct _bucket * b)267 __bucket_idx(struct cuckoo_hashtable *h, struct _bucket *b)
268 {
269 	return ((uintptr_t)b - (uintptr_t)&h->_buckets[0]) / sizeof(struct _bucket);
270 }
271 #endif /* SK_LOG */
272 
273 static inline struct _slot *
__bucket_slot(struct _bucket * b,uint32_t slot_idx)274 __bucket_slot(struct _bucket *b, uint32_t slot_idx)
275 {
276 	return &b->_slots[slot_idx];
277 }
278 
279 static inline bool
__slot_empty(struct _slot * s)280 __slot_empty(struct _slot *s)
281 {
282 	return s->_node == NULL;
283 }
284 
285 static inline uint32_t
__align32pow2(uint32_t v)286 __align32pow2(uint32_t v)
287 {
288 	v--;
289 	v |= v >> 1;
290 	v |= v >> 2;
291 	v |= v >> 4;
292 	v |= v >> 8;
293 	v |= v >> 16;
294 	v++;
295 
296 	return v;
297 }
298 
299 uint32_t
cuckoo_hashtable_load_factor(struct cuckoo_hashtable * h)300 cuckoo_hashtable_load_factor(struct cuckoo_hashtable *h)
301 {
302 	return (100 * h->_n_entries) / (h->_n_buckets * _CHT_BUCKET_SLOTS);
303 }
304 
305 /*
306  * Cuckoo hashtable uses regular mutex.  Most operations(find/add) should
307  * finish faster than a context switch.  It avoids using the spin lock since
308  * it might cause issues on certain platforms (e.g. x86_64) where the trap
309  * handler for dealing with FP/SIMD use would be invoked to perform thread-
310  * specific allocations; the use of FP/SIMD here is related to the memory
311  * compare with mask routines.  Even in case of another thread holding a
312  * bucket lock and went asleep, cuckoo path search would try to find another
313  * path without blockers.
314  *
315  * The only exception is table expansion, which could take a long time, we use
316  * read/write lock to protect the whole table against any read/write in that
317  * case.
318  */
319 
320 /* find/add only acquires table rlock, and serialize with bucket lock */
321 #define __lock_bucket(b)        lck_mtx_lock(&b->_lock)
322 #define __unlock_bucket(b)      lck_mtx_unlock(&b->_lock)
323 
324 #define _CHT_DEADLOCK_THRESHOLD 20
325 static inline bool
__lock_bucket_with_backoff(struct _bucket * b)326 __lock_bucket_with_backoff(struct _bucket *b)
327 {
328 	uint32_t try_counter = 0;
329 	while (!lck_mtx_try_lock(&b->_lock)) {
330 		if (try_counter++ > _CHT_DEADLOCK_THRESHOLD) {
331 			return false;
332 		}
333 	}
334 	return true;
335 }
336 
337 #define __rlock_table(h)        lck_rw_lock_shared(&h->_resize_lock)
338 #define __unrlock_table(h)      lck_rw_unlock_shared(&h->_resize_lock)
339 #define __r2wlock_table(h)      lck_rw_lock_shared_to_exclusive(&h->_resize_lock)
340 #define __wlock_table(h)        lck_rw_lock_exclusive(&h->_resize_lock)
341 #define __unwlock_table(h)      lck_rw_unlock_exclusive(&h->_resize_lock)
342 
343 static inline int
__resize_begin(struct cuckoo_hashtable * h)344 __resize_begin(struct cuckoo_hashtable *h)
345 {
346 	// takes care of concurrent resize
347 	lck_mtx_lock(&h->_lock);
348 	while (h->_busy) {
349 		if (++(h->_resize_waiters) == 0) {   /* wraparound */
350 			h->_resize_waiters++;
351 		}
352 		int error = msleep(&h->_resize_waiters, &h->_lock,
353 		    (PZERO + 1), __FUNCTION__, NULL);
354 		if (error == EINTR) {
355 			cht_warn("resize waiter was interrupted");
356 			ASSERT(h->_resize_waiters > 0);
357 			h->_resize_waiters--;
358 			lck_mtx_unlock(&h->_lock);
359 			return EINTR;
360 		}
361 		// resizer finished
362 		lck_mtx_unlock(&h->_lock);
363 		return EAGAIN;
364 	}
365 
366 	h->_busy = true;
367 	lck_mtx_unlock(&h->_lock);
368 
369 	// takes other readers offline
370 	__wlock_table(h);
371 	return 0;
372 }
373 
374 static inline void
__resize_end(struct cuckoo_hashtable * h)375 __resize_end(struct cuckoo_hashtable *h)
376 {
377 	__unwlock_table(h);
378 	lck_mtx_lock(&h->_lock);
379 	h->_busy = false;
380 	if (__improbable(h->_resize_waiters > 0)) {
381 		h->_resize_waiters = 0;
382 		wakeup(&h->_resize_waiters);
383 	}
384 	lck_mtx_unlock(&h->_lock);
385 }
386 
387 struct cuckoo_hashtable *
cuckoo_hashtable_create(struct cuckoo_hashtable_params * p)388 cuckoo_hashtable_create(struct cuckoo_hashtable_params *p)
389 {
390 	struct cuckoo_hashtable *h = NULL;
391 	uint32_t n = 0;
392 	uint32_t n_buckets = 0;
393 	struct _bucket *buckets = NULL;
394 	uint32_t i;
395 
396 	if (p->cht_capacity > CUCKOO_HASHTABLE_ENTRIES_MAX ||
397 	    p->cht_capacity < _CHT_BUCKET_SLOTS) {
398 		return NULL;
399 	}
400 
401 	ASSERT(p->cht_capacity < UINT32_MAX);
402 	n = (uint32_t)p->cht_capacity;
403 	h = sk_alloc_type(struct cuckoo_hashtable, Z_WAITOK | Z_NOFAIL, cuckoo_tag);
404 
405 	n_buckets = __align32pow2(n / _CHT_BUCKET_SLOTS);
406 	buckets = sk_alloc_type_array(struct _bucket, n_buckets, Z_WAITOK, cuckoo_tag);
407 	if (buckets == NULL) {
408 		sk_free_type(struct cuckoo_hashtable, h);
409 		return NULL;
410 	}
411 
412 	for (i = 0; i < n_buckets; i++) {
413 		lck_mtx_init(&buckets[i]._lock, &cht_lock_group, &cht_lock_attr);
414 	}
415 
416 	lck_mtx_init(&h->_lock, &cht_lock_group, &cht_lock_attr);
417 
418 	h->_n_entries = 0;
419 	h->_n_buckets = n_buckets;
420 	h->_capacity = h->_rcapacity = h->_n_buckets * _CHT_BUCKET_SLOTS;
421 	h->_bitmask = n_buckets - 1;
422 	h->_buckets = buckets;
423 	lck_rw_init(&h->_resize_lock, &cht_lock_group, &cht_lock_attr);
424 	h->_busy = false;
425 	h->_resize_waiters = 0;
426 
427 	ASSERT(p->cht_obj_retain != NULL);
428 	ASSERT(p->cht_obj_release != NULL);
429 	ASSERT(p->cht_obj_cmp != NULL);
430 	h->_obj_cmp = p->cht_obj_cmp;
431 	h->_obj_retain = p->cht_obj_retain;
432 	h->_obj_release = p->cht_obj_release;
433 
434 	return h;
435 }
436 
437 void
cuckoo_hashtable_free(struct cuckoo_hashtable * h)438 cuckoo_hashtable_free(struct cuckoo_hashtable *h)
439 {
440 	uint32_t i;
441 
442 	if (h == NULL) {
443 		return;
444 	}
445 
446 	ASSERT(h->_n_entries == 0);
447 
448 	if (h->_buckets != NULL) {
449 		for (i = 0; i < h->_n_buckets; i++) {
450 			lck_mtx_destroy(&h->_buckets[i]._lock, &cht_lock_group);
451 		}
452 		sk_free_type_array(struct _bucket, h->_n_buckets, h->_buckets);
453 	}
454 	sk_free_type(struct cuckoo_hashtable, h);
455 }
456 
457 size_t
cuckoo_hashtable_entries(struct cuckoo_hashtable * h)458 cuckoo_hashtable_entries(struct cuckoo_hashtable *h)
459 {
460 	return h->_n_entries;
461 }
462 
463 size_t
cuckoo_hashtable_capacity(struct cuckoo_hashtable * h)464 cuckoo_hashtable_capacity(struct cuckoo_hashtable *h)
465 {
466 	return h->_n_buckets * _CHT_BUCKET_SLOTS;
467 }
468 
469 size_t
cuckoo_hashtable_memory_footprint(struct cuckoo_hashtable * h)470 cuckoo_hashtable_memory_footprint(struct cuckoo_hashtable *h)
471 {
472 	size_t total_meminuse = sizeof(struct cuckoo_hashtable) +
473 	    (h->_n_buckets * sizeof(struct _bucket));
474 	return total_meminuse;
475 }
476 
477 static inline struct cuckoo_node *
__find_in_bucket(struct cuckoo_hashtable * h,struct _bucket * b,void * key,uint32_t hash)478 __find_in_bucket(struct cuckoo_hashtable *h, struct _bucket *b, void *key,
479     uint32_t hash)
480 {
481 	uint32_t i;
482 	struct cuckoo_node *node = NULL;
483 
484 	__lock_bucket(b);
485 	if (b->_inuse == 0) {
486 		goto done;
487 	}
488 	for (i = 0; i < _CHT_BUCKET_SLOTS; i++) {
489 		if (b->_slots[i]._hash == hash) {
490 			node = b->_slots[i]._node;
491 			while (node != NULL) {
492 				if (h->_obj_cmp(node, key) == 0) {
493 					h->_obj_retain(node);
494 					goto done;
495 				}
496 				node = cuckoo_node_next(node);
497 			}
498 		}
499 	}
500 
501 done:
502 	__unlock_bucket(b);
503 	return node;
504 }
505 
506 /* will return node retained */
507 struct cuckoo_node *
cuckoo_hashtable_find_with_hash(struct cuckoo_hashtable * h,void * key,uint32_t hash)508 cuckoo_hashtable_find_with_hash(struct cuckoo_hashtable *h, void *key,
509     uint32_t hash)
510 {
511 	struct _bucket *b1, *b2;
512 	struct cuckoo_node *node = NULL;
513 
514 	__rlock_table(h);
515 
516 	b1 = __prim_bucket(h, hash);
517 	if ((node = __find_in_bucket(h, b1, key, hash)) != NULL) {
518 		goto done;
519 	}
520 
521 	b2 = __alt_bucket(h, hash);
522 	if ((node = __find_in_bucket(h, b2, key, hash)) != NULL) {
523 		goto done;
524 	}
525 
526 done:
527 	__unrlock_table(h);
528 	return node;
529 }
530 
531 /*
532  * To add a key into cuckoo_hashtable:
533  *   1. First it searches the key's two candidate buckets b1, b2
534  *   2. If there are slots available in b1 or b2, we place the key there
535  *   3. Otherwise cuckoo_hashtable will have to probe and make space
536  *
537  * To move keys around (open addressing hash table), cuckoo_hashtable needs to
538  * first find available slot via Cuckoo search. Here it uses bread-first-search
539  * to find the shorted path towards an empty bucket slot.
540  *
541  */
542 static inline int
__add_to_bucket(struct cuckoo_hashtable * h,struct _bucket * b,struct cuckoo_node * node,uint32_t hash)543 __add_to_bucket(struct cuckoo_hashtable *h, struct _bucket *b,
544     struct cuckoo_node *node, uint32_t hash)
545 {
546 	int ret = -1;
547 	uint8_t avail_i = _CHT_SLOT_INVAL;
548 
549 	__lock_bucket(b);
550 	if (b->_inuse == _CHT_BUCKET_SLOTS) {
551 		goto done;
552 	}
553 	for (uint8_t i = 0; i < _CHT_BUCKET_SLOTS; i++) {
554 		struct _slot *s = __bucket_slot(b, i);
555 		if (__slot_empty(s)) {
556 			if (avail_i == _CHT_SLOT_INVAL) {
557 				avail_i = i;
558 			}
559 		} else {
560 			/* chain to existing slot with same hash */
561 			if (__improbable(s->_hash == hash)) {
562 				ASSERT(s->_node != NULL);
563 				ret = cuckoo_node_chain(s->_node, node);
564 				if (ret != 0) {
565 					goto done;
566 				}
567 				cht_debug("hash %x node %p inserted [%zu][%d]",
568 				    hash, node, __bucket_idx(h, b), i);
569 				OSAddAtomic(1, &h->_n_entries);
570 				h->_obj_retain(node);
571 				goto done;
572 			}
573 		}
574 	}
575 	if (avail_i != _CHT_SLOT_INVAL) {
576 		h->_obj_retain(node);
577 		b->_slots[avail_i]._hash = hash;
578 		b->_slots[avail_i]._node = node;
579 		b->_inuse++;
580 		cht_debug("hash %x node %p inserted [%zu][%d]", hash, node,
581 		    __bucket_idx(h, b), avail_i);
582 		OSAddAtomic(1, &h->_n_entries);
583 		ret = 0;
584 	}
585 done:
586 	__unlock_bucket(b);
587 	return ret;
588 }
589 
590 #define _CHT_BFS_QUEUE_LEN      UINT8_MAX
591 #define _CHT_BFS_QUEUE_END      (_CHT_BFS_QUEUE_LEN - _CHT_BUCKET_SLOTS)
592 
593 struct _bfs_node {
594 	uint32_t        bkt_idx;
595 	uint8_t         prev_node_idx;
596 	uint8_t         prev_slot_idx;
597 };
598 
599 /*
600  * Move slots backwards on cuckoo path
601  *
602  * cuckoo_move would hold at most 2 locks at any time, moving from
603  * the end of cuckoo path toward the bucket where new keys should be
604  * stored. There could be chances of dead lock in case of multiple
605  * writers have overlapping cuckoo path. We could arrange the order of
606  * locking to avoid that but then we have to take all locks upfront,
607  * which is not friendly to concurrent readers. So instead, we try to
608  * take one by one(but still at most 2 locks holding at any time),
609  * with backoff in mind.
610  */
611 static int
cuckoo_move(struct cuckoo_hashtable * h,struct cuckoo_node * node,uint32_t hash,struct _bfs_node * queue,uint8_t leaf_node_idx,uint8_t leaf_slot)612 cuckoo_move(struct cuckoo_hashtable *h, struct cuckoo_node *node,
613     uint32_t hash, struct _bfs_node *queue, uint8_t leaf_node_idx,
614     uint8_t leaf_slot)
615 {
616 	struct _bfs_node *prev_node, *curr_node;
617 	struct _bucket *from_bkt, *to_bkt, *alt_bkt;
618 	uint8_t from_slot, to_slot;
619 
620 	curr_node = &queue[leaf_node_idx];
621 	to_bkt = __get_bucket(h, curr_node->bkt_idx);
622 	to_slot = leaf_slot;
623 
624 	__lock_bucket(to_bkt);
625 
626 	while (__probable(curr_node->prev_node_idx != _CHT_BFS_QUEUE_LEN)) {
627 		prev_node = &queue[curr_node->prev_node_idx];
628 		from_bkt = __get_bucket(h, prev_node->bkt_idx);
629 		from_slot = curr_node->prev_slot_idx;
630 
631 		if (!__lock_bucket_with_backoff(from_bkt)) {
632 			/* a dead lock or a sleeping-thread holding the lock */
633 			__unlock_bucket(to_bkt);
634 			cht_warn("cuckoo move deadlock detected");
635 			return EINVAL;
636 		}
637 
638 		/*
639 		 * Verify cuckoo path by checking:
640 		 * 1. from_bkt[from_slot]'s alternative bucket is still to_bkt
641 		 * 3. to_bkt[to_slot] is still vacant
642 		 */
643 		alt_bkt = __alt_bucket(h, from_bkt->_slots[from_slot]._hash);
644 		if (alt_bkt != to_bkt ||
645 		    !__slot_empty(__bucket_slot(to_bkt, to_slot))) {
646 			__unlock_bucket(from_bkt);
647 			__unlock_bucket(to_bkt);
648 			cht_warn("cuckoo move path invalid: %s %s",
649 			    alt_bkt != to_bkt ? "alt_bkt != to_bkt" : "",
650 			    !__slot_empty(__bucket_slot(to_bkt, to_slot)) ?
651 			    "!slot_empty(to_bkt, to_slot)" : "");
652 			return EINVAL;
653 		}
654 
655 		cht_log(CHTV_DEBUG, "Move [0x%lx][%d] to [0x%lx][%d]",
656 		    from_bkt - h->_buckets, from_slot, to_bkt - h->_buckets,
657 		    to_slot);
658 
659 		ASSERT(to_bkt->_slots[to_slot]._node == NULL);
660 		ASSERT(to_bkt->_slots[to_slot]._hash == 0);
661 
662 		/* move entry backward */
663 		to_bkt->_slots[to_slot] = from_bkt->_slots[from_slot];
664 		to_bkt->_inuse++;
665 		__slot_reset(&from_bkt->_slots[from_slot]);
666 		from_bkt->_inuse--;
667 
668 		__unlock_bucket(to_bkt);
669 
670 		curr_node = prev_node;
671 		to_bkt = from_bkt;
672 		to_slot = from_slot;
673 	}
674 
675 	ASSERT(curr_node->prev_node_idx == _CHT_BFS_QUEUE_LEN);
676 	ASSERT(curr_node->prev_slot_idx == _CHT_SLOT_INVAL);
677 
678 	/* if root slot is no longer valid */
679 	if (to_bkt->_slots[to_slot]._node != NULL) {
680 		__unlock_bucket(to_bkt);
681 		return EINVAL;
682 	}
683 
684 	to_bkt->_inuse++;
685 	__slot_set(&to_bkt->_slots[to_slot], hash, node);
686 	h->_obj_retain(node);
687 	__unlock_bucket(to_bkt);
688 
689 	OSAddAtomic(1, &h->_n_entries);
690 
691 	cht_debug("hash %x node %p inserted at [%zu][%d]", hash, node,
692 	    __bucket_idx(h, to_bkt), to_slot);
693 
694 	return 0;
695 }
696 
697 static int
cuckoo_probe(struct cuckoo_hashtable * h,struct cuckoo_node * node,uint32_t hash)698 cuckoo_probe(struct cuckoo_hashtable *h, struct cuckoo_node *node,
699     uint32_t hash)
700 {
701 	struct _bfs_node queue[_CHT_BFS_QUEUE_LEN];
702 	uint8_t head, tail;
703 	struct _bucket *b;
704 	uint8_t avail_i;
705 	int ret = ENOMEM;
706 
707 	/* probe starts from its primary bucket */
708 	queue[0].bkt_idx = hash & h->_bitmask;
709 	queue[0].prev_node_idx = _CHT_BFS_QUEUE_LEN;
710 	queue[0].prev_slot_idx = _CHT_SLOT_INVAL;
711 
712 	head = 0;
713 	tail = 1;
714 
715 	while (__probable(tail != head && tail < _CHT_BFS_QUEUE_END)) {
716 		b = __get_bucket(h, queue[head].bkt_idx);
717 		avail_i = _CHT_SLOT_INVAL;
718 		for (uint8_t i = 0; i < _CHT_BUCKET_SLOTS; i++) {
719 			struct _slot *s = __bucket_slot(b, i);
720 			if (__slot_empty(s)) {
721 				if (avail_i == _CHT_SLOT_INVAL) {
722 					avail_i = i;
723 				}
724 				continue;
725 			}
726 
727 			/*
728 			 * Another node with same hash could have been probed
729 			 * into this bucket, chain to it.
730 			 */
731 			if (__improbable(s->_hash == hash)) {
732 				ASSERT(s->_node != NULL);
733 				ret = cuckoo_node_chain(s->_node, node);
734 				if (ret != 0) {
735 					goto done;
736 				}
737 				cht_debug("hash %x node %p inserted [%zu][%d]",
738 				    hash, node, __bucket_idx(h, b), i);
739 				OSAddAtomic(1, &h->_n_entries);
740 				h->_obj_retain(node);
741 				goto done;
742 			}
743 
744 			queue[tail].bkt_idx = __alt_hash(s->_hash) & h->_bitmask;
745 			queue[tail].prev_node_idx = head;
746 			queue[tail].prev_slot_idx = i;
747 			tail++;
748 		}
749 
750 		if (avail_i != _CHT_SLOT_INVAL) {
751 			ret = cuckoo_move(h, node, hash, queue, head, avail_i);
752 			if (ret == 0) {
753 				goto done;
754 			} else if (ret == EINVAL) {
755 				cht_warn("cukoo path invalidated");
756 				goto skip;
757 			} else {
758 				cht_err("faild: unknown err %d", ret);
759 				goto done;
760 			}
761 		}
762 skip:
763 		head++;
764 	}
765 
766 	if (tail == head || tail >= _CHT_BFS_QUEUE_END) {
767 		cht_warn("failed: cuckoo probe out of search space "
768 		    "head %d tail %d (%d/%d, load factor %d%%)", head, tail,
769 		    h->_n_entries, h->_capacity,
770 		    cuckoo_hashtable_load_factor(h));
771 		ret = ENOSPC;
772 	} else {
773 		cht_warn("failed: cuckoo probe path invalidated "
774 		    " (%d/%d, load factor %d%%)", h->_n_entries, h->_capacity,
775 		    cuckoo_hashtable_load_factor(h));
776 		ret = EAGAIN;
777 	}
778 done:
779 	return ret;
780 }
781 
782 static inline void
783 __foreach_node(struct cuckoo_hashtable *h, bool wlocked,
784     void (^node_handler)(struct cuckoo_node *, uint32_t hash))
785 {
786 	if (!wlocked) {
787 		__rlock_table(h);
788 	}
789 	for (uint32_t i = 0; i < h->_n_buckets; i++) {
790 		struct _bucket *b = &h->_buckets[i];
791 		if (b->_inuse == 0) {
792 			continue;
793 		}
794 		if (!wlocked) {
795 			__lock_bucket(b);
796 		}
797 		for (uint32_t j = 0; j < _CHT_BUCKET_SLOTS; j++) {
798 			struct _slot *s = __bucket_slot(b, j);
799 			struct cuckoo_node *node = NULL, *next_node = NULL;
800 			node = s->_node;
801 			while (node != NULL) {
802 				next_node = cuckoo_node_next(node);
803 				node_handler(node, s->_hash);
804 				node = next_node;
805 			}
806 		}
807 		if (!wlocked) {
808 			__unlock_bucket(b);
809 		}
810 	}
811 	if (!wlocked) {
812 		__unrlock_table(h);
813 	}
814 }
815 
816 void
817 cuckoo_hashtable_foreach(struct cuckoo_hashtable *h,
818     void (^node_handler)(struct cuckoo_node *, uint32_t hash))
819 {
820 	__foreach_node(h, false, node_handler);
821 }
822 
823 static void
cuckoo_dummy_retain(struct cuckoo_node * node)824 cuckoo_dummy_retain(struct cuckoo_node *node)
825 {
826 #pragma unused(node)
827 }
828 
829 static void
cuckoo_dummy_release(struct cuckoo_node * node)830 cuckoo_dummy_release(struct cuckoo_node *node)
831 {
832 #pragma unused(node)
833 }
834 
835 static int
cuckoo_resize(struct cuckoo_hashtable * h,enum cuckoo_resize_ops option)836 cuckoo_resize(struct cuckoo_hashtable *h, enum cuckoo_resize_ops option)
837 {
838 	int ret = 0;
839 
840 	/* backoff from concurrent expansion */
841 	do {
842 		ret = __resize_begin(h);
843 		if (ret == EAGAIN) {
844 			cht_info("resize done by peer");
845 			return EAGAIN;
846 		}
847 	} while (ret == EINTR);
848 
849 	uint32_t curr_capacity = h->_n_buckets * _CHT_BUCKET_SLOTS;
850 	uint32_t curr_load = (100 * h->_n_entries) / curr_capacity;
851 	uint32_t curr_buckets = h->_n_buckets;
852 	uint32_t new_capacity;
853 	__block size_t add_called = 0;
854 
855 	/* check load factor to ensure we are not hitting something else */
856 	if (option == _CHT_RESIZE_EXPAND) {
857 		if (curr_load < _CHT_MIN_LOAD_EXPAND) {
858 			cht_warn("Warning: early expand at %d load", curr_load);
859 		}
860 		new_capacity = curr_capacity * 2;
861 	} else {
862 		if (curr_load > _CHT_MAX_LOAD_SHRINK ||
863 		    curr_capacity == h->_rcapacity) {
864 			goto done;
865 		}
866 		new_capacity = curr_capacity / 2;
867 	}
868 
869 	cht_info("resize %d/(%d -> %d)", h->_n_entries,
870 	    curr_capacity, new_capacity);
871 
872 	struct cuckoo_hashtable_params new_p = {
873 		.cht_capacity = new_capacity,
874 		.cht_obj_cmp = h->_obj_cmp,
875 		.cht_obj_retain = cuckoo_dummy_retain,
876 		.cht_obj_release = cuckoo_dummy_release,
877 	};
878 	struct cuckoo_hashtable *tmp_h;
879 	tmp_h = cuckoo_hashtable_create(&new_p);
880 	if (tmp_h == NULL) {
881 		ret = ENOMEM;
882 		goto done;
883 	}
884 
885 	__foreach_node(h, true, ^(struct cuckoo_node *node, uint32_t hash) {
886 		int error = 0;
887 		cuckoo_node_set_next(node, NULL);
888 		error = cuckoo_hashtable_add_with_hash(tmp_h, node, hash);
889 		ASSERT(error == 0);
890 		add_called++;
891 	});
892 
893 	if (__improbable(cuckoo_hashtable_entries(h) !=
894 	    cuckoo_hashtable_entries(tmp_h))) {
895 		panic("h %zu add_called %zu tmp_h %zu",
896 		    cuckoo_hashtable_entries(h), add_called,
897 		    cuckoo_hashtable_entries(tmp_h));
898 	}
899 
900 	for (uint32_t i = 0; i < h->_n_buckets; i++) {
901 		lck_mtx_destroy(&h->_buckets[i]._lock, &cht_lock_group);
902 	}
903 	h->_n_buckets = tmp_h->_n_buckets;
904 	h->_capacity = h->_n_buckets * _CHT_BUCKET_SLOTS;
905 	h->_bitmask = tmp_h->_bitmask;
906 	sk_free_type_array(struct _bucket, curr_buckets, h->_buckets);
907 
908 	h->_buckets = tmp_h->_buckets;
909 	lck_rw_destroy(&tmp_h->_resize_lock, &cht_lock_group);
910 	lck_mtx_destroy(&tmp_h->_lock, &cht_lock_group);
911 	sk_free_type(struct cuckoo_hashtable, tmp_h);
912 
913 done:
914 	__resize_end(h);
915 
916 	return ret;
917 }
918 
919 static inline int
cuckoo_add_no_expand(struct cuckoo_hashtable * h,struct cuckoo_node * node,uint32_t hash)920 cuckoo_add_no_expand(struct cuckoo_hashtable *h,
921     struct cuckoo_node *node, uint32_t hash)
922 {
923 	struct _bucket *b1, *b2;
924 	int ret = -1;
925 
926 	__rlock_table(h);
927 
928 	b1 = __prim_bucket(h, hash);
929 	if ((ret = __add_to_bucket(h, b1, node, hash)) == 0) {
930 		goto done;
931 	}
932 
933 	b2 = __alt_bucket(h, hash);
934 	if ((ret = __add_to_bucket(h, b2, node, hash)) == 0) {
935 		goto done;
936 	}
937 
938 	ret = cuckoo_probe(h, node, hash);
939 done:
940 	__unrlock_table(h);
941 	return ret;
942 }
943 
944 int
cuckoo_hashtable_add_with_hash(struct cuckoo_hashtable * h,struct cuckoo_node * node,uint32_t hash)945 cuckoo_hashtable_add_with_hash(struct cuckoo_hashtable *h,
946     struct cuckoo_node *node, uint32_t hash)
947 {
948 	int ret;
949 
950 	/* neutralize node to avoid non-terminating tail */
951 	ASSERT(cuckoo_node_next(node) == NULL);
952 
953 	ret = cuckoo_add_no_expand(h, node, hash);
954 	if (ret == ENOSPC) {
955 		do {
956 			ret = cuckoo_resize(h, _CHT_RESIZE_EXPAND);
957 			if (ret != 0 && ret != EAGAIN) {
958 				break;
959 			}
960 			// this could still fail, when other threads added
961 			// enough objs that another resize is needed
962 			ret = cuckoo_add_no_expand(h, node, hash);
963 		} while (ret == ENOSPC);
964 	}
965 
966 	return ret;
967 }
968 
969 static inline int
__del_from_bucket(struct cuckoo_hashtable * h,struct _bucket * b,struct cuckoo_node * node,uint32_t hash)970 __del_from_bucket(struct cuckoo_hashtable *h, struct _bucket *b,
971     struct cuckoo_node *node, uint32_t hash)
972 {
973 	uint32_t i;
974 
975 	__lock_bucket(b);
976 	for (i = 0; i < _CHT_BUCKET_SLOTS; i++) {
977 		if (b->_slots[i]._hash == hash) {
978 			if (cuckoo_node_del(&b->_slots[i]._node, node)) {
979 				h->_obj_release(node);
980 				OSAddAtomic(-1, &h->_n_entries);
981 				if (__slot_empty(__bucket_slot(b, i))) {
982 					b->_slots[i]._hash = 0;
983 					b->_inuse--;
984 				}
985 				__unlock_bucket(b);
986 				return 0;
987 			}
988 		}
989 	}
990 	__unlock_bucket(b);
991 	return ENOENT;
992 }
993 
994 int
cuckoo_hashtable_del(struct cuckoo_hashtable * h,struct cuckoo_node * node,uint32_t hash)995 cuckoo_hashtable_del(struct cuckoo_hashtable *h,
996     struct cuckoo_node *node, uint32_t hash)
997 {
998 	struct _bucket *b1, *b2;
999 	int ret = -1;
1000 
1001 	__rlock_table(h);
1002 
1003 	b1 = __prim_bucket(h, hash);
1004 	if ((ret = __del_from_bucket(h, b1, node, hash)) == 0) {
1005 		goto done;
1006 	}
1007 
1008 	b2 = __alt_bucket(h, hash);
1009 	if ((ret = __del_from_bucket(h, b2, node, hash)) == 0) {
1010 		goto done;
1011 	}
1012 
1013 done:
1014 	if (ret == 0) {
1015 		cuckoo_node_set_next(node, NULL);
1016 	}
1017 	__unrlock_table(h);
1018 	return ret;
1019 }
1020 
1021 void
cuckoo_hashtable_try_shrink(struct cuckoo_hashtable * h)1022 cuckoo_hashtable_try_shrink(struct cuckoo_hashtable *h)
1023 {
1024 	cuckoo_resize(h, _CHT_RESIZE_SHRINK);
1025 }
1026 
1027 #if (DEVELOPMENT || DEBUG)
1028 
1029 static inline bool
cuckoo_node_looped(struct cuckoo_node * node)1030 cuckoo_node_looped(struct cuckoo_node *node)
1031 {
1032 	struct cuckoo_node *runner = node;
1033 
1034 	if (node == NULL) {
1035 		return false;
1036 	}
1037 
1038 	while (runner->next && runner->next->next) {
1039 		runner = runner->next->next;
1040 		node = node->next;
1041 
1042 		if (runner == node) {
1043 			return true;
1044 		}
1045 	}
1046 	return false;
1047 }
1048 
1049 int
cuckoo_hashtable_health_check(struct cuckoo_hashtable * h)1050 cuckoo_hashtable_health_check(struct cuckoo_hashtable *h)
1051 {
1052 	uint32_t hash;
1053 	uint32_t i, j;
1054 	struct _bucket *b;
1055 	struct cuckoo_node *node;
1056 	bool healthy = true;
1057 	uint32_t seen = 0;
1058 
1059 	__wlock_table(h);
1060 
1061 	for (i = 0; i < h->_n_buckets; i++) {
1062 		b = &h->_buckets[i];
1063 		uint8_t inuse = 0;
1064 		for (j = 0; j < _CHT_BUCKET_SLOTS; j++) {
1065 			hash = b->_slots[j]._hash;
1066 			node = b->_slots[j]._node;
1067 			if (node != NULL) {
1068 				inuse++;
1069 			}
1070 			while (node != NULL) {
1071 				seen++;
1072 				if ((__prim_bucket(h, hash) != b) &&
1073 				    (__alt_bucket(h, hash) != b)) {
1074 					panic("[%d][%d] stray hash %x node %p",
1075 					    i, j, hash, node);
1076 					healthy = false;
1077 				}
1078 
1079 				if (cuckoo_node_looped(node)) {
1080 					panic("[%d][%d] looped hash %x node %p",
1081 					    i, j, hash, node);
1082 					healthy = false;
1083 				}
1084 				node = cuckoo_node_next(node);
1085 			}
1086 		}
1087 		ASSERT(inuse == b->_inuse);
1088 	}
1089 
1090 	if (seen != h->_n_entries) {
1091 		panic("seen %d != n_entries %d", seen, h->_n_entries);
1092 	}
1093 
1094 	__unwlock_table(h);
1095 
1096 	if (!healthy) {
1097 		cht_err("table unhealthy");
1098 		return -1;
1099 	} else {
1100 		return 0;
1101 	}
1102 }
1103 
1104 void
cuckoo_hashtable_dump(struct cuckoo_hashtable * h)1105 cuckoo_hashtable_dump(struct cuckoo_hashtable *h)
1106 {
1107 	uint32_t hash;
1108 	struct cuckoo_node *node;
1109 	uint32_t i, j;
1110 	struct _bucket *b;
1111 
1112 	cuckoo_hashtable_health_check(h);
1113 
1114 	for (i = 0; i < h->_n_buckets; i++) {
1115 		printf("%d\t", i);
1116 		b = &h->_buckets[i];
1117 		for (j = 0; j < _CHT_BUCKET_SLOTS; j++) {
1118 			hash = b->_slots[j]._hash;
1119 			node = b->_slots[j]._node;
1120 			printf("0x%08x(%p) ", hash, node);
1121 		}
1122 		printf("\n");
1123 	}
1124 }
1125 #endif /* !DEVELOPMENT && !DEBUG */
1126