1 /*
2 * Copyright (c) 2018-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <skywalk/os_skywalk_private.h>
29
30 #include "cuckoo_hashtable.h"
31
32 #define CUCKOO_TAG "com.apple.skywalk.libcuckoo"
33 SKMEM_TAG_DEFINE(cuckoo_tag, CUCKOO_TAG);
34
35
36 SYSCTL_NODE(_kern_skywalk, OID_AUTO, libcuckoo, CTLFLAG_RW | CTLFLAG_LOCKED,
37 0, "Skywalk Cuckoo Hashtable Library");
38
39 uint32_t cuckoo_verbose = 0;
40 #if (DEVELOPMENT || DEBUG)
41 SYSCTL_UINT(_kern_skywalk_libcuckoo, OID_AUTO, verbose,
42 CTLFLAG_RW | CTLFLAG_LOCKED, &cuckoo_verbose, 0, "");
43 #endif /* DEVELOPMENT || DEBUG */
44
45 typedef enum cht_verb {
46 CHTV_ERR = 0,
47 CHTV_WARN = 1,
48 CHTV_INFO = 2,
49 CHTV_DEBUG = 3,
50 } cht_verb_t;
51
52 static LCK_GRP_DECLARE(cht_lock_group, "CHT_LOCK");
53 static LCK_ATTR_DECLARE(cht_lock_attr, 0, 0);
54
55 #if SK_LOG
56 #define cht_log(level, _fmt, ...) \
57 do { \
58 if (level <= cuckoo_verbose) { \
59 kprintf("Cuckoo: thread %p %-30s " _fmt "\n", \
60 current_thread(), __FUNCTION__, ##__VA_ARGS__); \
61 } \
62 } while (0);
63 #else /* !SK_LOG */
64 #define cht_log(_flag, _fmt, ...) do { ((void)0); } while (0)
65 #endif /* !SK_LOG */
66
67 #define cht_err(_fmt, ...) cht_log(CHTV_ERR, _fmt, ##__VA_ARGS__)
68 #define cht_warn(_fmt, ...) cht_log(CHTV_WARN, _fmt, ##__VA_ARGS__)
69 #define cht_info(_fmt, ...) cht_log(CHTV_INFO, _fmt, ##__VA_ARGS__)
70 #define cht_debug(_fmt, ...) cht_log(CHTV_DEBUG, _fmt, ##__VA_ARGS__)
71
72 static inline int
cuckoo_node_chain(struct cuckoo_node * node,struct cuckoo_node * new_node)73 cuckoo_node_chain(struct cuckoo_node *node,
74 struct cuckoo_node *new_node)
75 {
76 struct cuckoo_node *prev_node = node;
77
78 /* new node must be zero initialized */
79 ASSERT(new_node->next == NULL);
80
81 /* use tail insert to check for duplicate along list */
82 while (__improbable(node != NULL)) {
83 if (node == new_node) {
84 return EEXIST;
85 }
86 prev_node = node;
87 node = node->next;
88 }
89
90 prev_node->next = new_node;
91
92 return 0;
93 }
94
95 static inline bool
cuckoo_node_del(struct cuckoo_node ** pnode,struct cuckoo_node * del_node)96 cuckoo_node_del(struct cuckoo_node **pnode,
97 struct cuckoo_node *del_node)
98 {
99 ASSERT(pnode != NULL);
100
101 struct cuckoo_node *node = *pnode;
102 while (node != NULL && node != del_node) {
103 pnode = &node->next;
104 node = node->next;
105 }
106 if (__probable(node != NULL)) {
107 *pnode = node->next;
108 node->next = NULL;
109 return true;
110 }
111
112 return false;
113 }
114
115 static inline void
cuckoo_node_set_next(struct cuckoo_node * node,struct cuckoo_node * next_node)116 cuckoo_node_set_next(struct cuckoo_node *node, struct cuckoo_node *next_node)
117 {
118 node->next = next_node;
119 }
120
121 /* We probably won't add RCU soon so use simple pointer reference for now */
122 static inline struct cuckoo_node *
cuckoo_node_next(struct cuckoo_node * node)123 cuckoo_node_next(struct cuckoo_node *node)
124 {
125 return node->next;
126 }
127
128 #define _CHT_MAX_LOAD_SHRINK 40 /* at least below 40% load to shrink */
129 #define _CHT_MIN_LOAD_EXPAND 85 /* cuckoo could hold 85% full table */
130
131 enum cuckoo_resize_ops {
132 _CHT_RESIZE_EXPAND = 0,
133 _CHT_RESIZE_SHRINK = 1,
134 };
135
136 /*
137 * Following classic Cuckoo hash table design, cuckoo_hashtable use k hash
138 * functions to derive multiple candidate hash table bucket indexes.
139 * Here cuckoo_hashtable use k=2.
140 * prim_bkt_idx = bkt_idx[1] = hash[1](key) % N_BUCKETS
141 * alt_bkt_idx = bkt_idx[2] = hash[2](key) % N_BUCKETS
142 *
143 * Currently, we let the caller pass in the actual key's hash value, because
144 * in most of the use cases, caller probably have already calculated the hash
145 * value of actual key (e.g. using hardware offloading or copy+hash). This also
146 * save us from storing the key in the table (or any side data structure). So
147 *
148 * hash[1] = hash // hash(hash value) passed in from caller
149 * hash[2] = __alt_hash(hash[1])
150 *
151 * __alt_hash derives h2 using h1's high bits, since calculating primary
152 * bucket index uses its low bits. So alt_hash is still a uniformly distributed
153 * random variable (but not independent of h1, but is fine for hashtable usage).
154 *
155 * There is option to store h2 in the table bucket as well but cuckoo_hashtable
156 * is not doing this to use less memory usage with the small price of a few
157 * more cpu cycles during add/del operation. Assuming that the hashtable is
158 * read-heavy rather than write-heavy, this is reasonable.
159 *
160 * In the rare case of full hash value collision, where
161 * hash[1] == hash[1]'
162 * , there is no way for the hash table to differentiate two objects, thus we
163 * need to chain the fully collided objects under the same bucket slot.
164 * The caller need to walk the chain to explicitly compare the full length key
165 * to find the correct object.
166 *
167 * Reference Counting
168 * The hashtable assumes all objects are reference counted. It takes function
169 * pointers that retain and release the object.
170 * Adding to the table will call its retain function.
171 * Deleting from the table will call its release function.
172 *
173 */
174
175 /* hash might be zero, so always use _node == NULL to test empty slot */
176 struct _slot {
177 uint32_t _hash;
178 struct cuckoo_node *_node;
179 };
180
181 /*
182 * Cuckoo hashtable cache line awareness:
183 * - ARM platform has 128B CPU cache line.
184 * - Intel platform has 64B CPU cache line. However, hardware prefetcher
185 * treats cache lines as 128B chunk and prefetch the other 64B cache line.
186 *
187 * Thus cuckoo_hashtable use 128B as bucket size to make best use CPU cache
188 * resource.
189 */
190 #define _CHT_CACHELINE_CHUNK 128
191 #define _CHT_SLOT_INVAL UINT8_MAX
192 static const uint8_t _CHT_BUCKET_SLOTS =
193 ((_CHT_CACHELINE_CHUNK - sizeof(lck_mtx_t) - sizeof(uint8_t)) /
194 sizeof(struct _slot));
195
196 struct _bucket {
197 struct _slot _slots[_CHT_BUCKET_SLOTS];
198 decl_lck_mtx_data(, _lock);
199 uint8_t _inuse;
200 } __attribute__((aligned(_CHT_CACHELINE_CHUNK)));
201
202 struct cuckoo_hashtable {
203 uint32_t _bitmask; /* 1s' mask for quick MOD */
204 uint32_t _n_buckets; /* number of buckets */
205
206 volatile uint32_t _n_entries; /* number of entires in table */
207 uint32_t _capacity; /* max number of entires */
208 uint32_t _rcapacity; /* requested capacity */
209
210 bool _busy;
211 uint32_t _resize_waiters;
212 decl_lck_rw_data(, _resize_lock);
213 decl_lck_mtx_data(, _lock);
214
215 struct _bucket *_buckets;
216
217 int (*_obj_cmp)(struct cuckoo_node *node, void *key);
218 void (*_obj_retain)(struct cuckoo_node *);
219 void (*_obj_release)(struct cuckoo_node *);
220 } __attribute__((aligned(_CHT_CACHELINE_CHUNK)));
221
222 static_assert(sizeof(struct _bucket) <= _CHT_CACHELINE_CHUNK);
223
224 static inline void
__slot_set(struct _slot * slt,uint32_t hash,struct cuckoo_node * node)225 __slot_set(struct _slot *slt, uint32_t hash, struct cuckoo_node *node)
226 {
227 slt->_hash = hash;
228 slt->_node = node;
229 }
230
231 static inline void
__slot_reset(struct _slot * slt)232 __slot_reset(struct _slot *slt)
233 {
234 slt->_hash = 0;
235 slt->_node = NULL;
236 }
237
238 static inline uint32_t
__alt_hash(uint32_t hash)239 __alt_hash(uint32_t hash)
240 {
241 #define _CHT_ALT_HASH_MIX 0x5bd1e995 /* Murmur hash mix */
242 uint32_t tag = hash >> 16;
243 uint32_t alt_hash = hash ^ ((tag + 1) * _CHT_ALT_HASH_MIX);
244 return alt_hash;
245 }
246
247 static inline struct _bucket *
__get_bucket(struct cuckoo_hashtable * h,uint32_t b_i)248 __get_bucket(struct cuckoo_hashtable *h, uint32_t b_i)
249 {
250 return &h->_buckets[b_i];
251 }
252
253 static inline struct _bucket *
__prim_bucket(struct cuckoo_hashtable * h,uint32_t hash)254 __prim_bucket(struct cuckoo_hashtable *h, uint32_t hash)
255 {
256 return __get_bucket(h, hash & h->_bitmask);
257 }
258
259 static inline struct _bucket *
__alt_bucket(struct cuckoo_hashtable * h,uint32_t hash)260 __alt_bucket(struct cuckoo_hashtable *h, uint32_t hash)
261 {
262 return __get_bucket(h, __alt_hash(hash) & h->_bitmask);
263 }
264
265 #if SK_LOG
266 static inline size_t
__bucket_idx(struct cuckoo_hashtable * h,struct _bucket * b)267 __bucket_idx(struct cuckoo_hashtable *h, struct _bucket *b)
268 {
269 return ((uintptr_t)b - (uintptr_t)&h->_buckets[0]) / sizeof(struct _bucket);
270 }
271 #endif /* SK_LOG */
272
273 static inline struct _slot *
__bucket_slot(struct _bucket * b,uint32_t slot_idx)274 __bucket_slot(struct _bucket *b, uint32_t slot_idx)
275 {
276 return &b->_slots[slot_idx];
277 }
278
279 static inline bool
__slot_empty(struct _slot * s)280 __slot_empty(struct _slot *s)
281 {
282 return s->_node == NULL;
283 }
284
285 static inline uint32_t
__align32pow2(uint32_t v)286 __align32pow2(uint32_t v)
287 {
288 v--;
289 v |= v >> 1;
290 v |= v >> 2;
291 v |= v >> 4;
292 v |= v >> 8;
293 v |= v >> 16;
294 v++;
295
296 return v;
297 }
298
299 uint32_t
cuckoo_hashtable_load_factor(struct cuckoo_hashtable * h)300 cuckoo_hashtable_load_factor(struct cuckoo_hashtable *h)
301 {
302 return (100 * h->_n_entries) / (h->_n_buckets * _CHT_BUCKET_SLOTS);
303 }
304
305 /*
306 * Cuckoo hashtable uses regular mutex. Most operations(find/add) should
307 * finish faster than a context switch. It avoids using the spin lock since
308 * it might cause issues on certain platforms (e.g. x86_64) where the trap
309 * handler for dealing with FP/SIMD use would be invoked to perform thread-
310 * specific allocations; the use of FP/SIMD here is related to the memory
311 * compare with mask routines. Even in case of another thread holding a
312 * bucket lock and went asleep, cuckoo path search would try to find another
313 * path without blockers.
314 *
315 * The only exception is table expansion, which could take a long time, we use
316 * read/write lock to protect the whole table against any read/write in that
317 * case.
318 */
319
320 /* find/add only acquires table rlock, and serialize with bucket lock */
321 #define __lock_bucket(b) lck_mtx_lock(&b->_lock)
322 #define __unlock_bucket(b) lck_mtx_unlock(&b->_lock)
323
324 #define _CHT_DEADLOCK_THRESHOLD 20
325 static inline bool
__lock_bucket_with_backoff(struct _bucket * b)326 __lock_bucket_with_backoff(struct _bucket *b)
327 {
328 uint32_t try_counter = 0;
329 while (!lck_mtx_try_lock(&b->_lock)) {
330 if (try_counter++ > _CHT_DEADLOCK_THRESHOLD) {
331 return false;
332 }
333 }
334 return true;
335 }
336
337 #define __rlock_table(h) lck_rw_lock_shared(&h->_resize_lock)
338 #define __unrlock_table(h) lck_rw_unlock_shared(&h->_resize_lock)
339 #define __r2wlock_table(h) lck_rw_lock_shared_to_exclusive(&h->_resize_lock)
340 #define __wlock_table(h) lck_rw_lock_exclusive(&h->_resize_lock)
341 #define __unwlock_table(h) lck_rw_unlock_exclusive(&h->_resize_lock)
342
343 static inline int
__resize_begin(struct cuckoo_hashtable * h)344 __resize_begin(struct cuckoo_hashtable *h)
345 {
346 // takes care of concurrent resize
347 lck_mtx_lock(&h->_lock);
348 while (h->_busy) {
349 if (++(h->_resize_waiters) == 0) { /* wraparound */
350 h->_resize_waiters++;
351 }
352 int error = msleep(&h->_resize_waiters, &h->_lock,
353 (PZERO + 1), __FUNCTION__, NULL);
354 if (error == EINTR) {
355 cht_warn("resize waiter was interrupted");
356 ASSERT(h->_resize_waiters > 0);
357 h->_resize_waiters--;
358 lck_mtx_unlock(&h->_lock);
359 return EINTR;
360 }
361 // resizer finished
362 lck_mtx_unlock(&h->_lock);
363 return EAGAIN;
364 }
365
366 h->_busy = true;
367 lck_mtx_unlock(&h->_lock);
368
369 // takes other readers offline
370 __wlock_table(h);
371 return 0;
372 }
373
374 static inline void
__resize_end(struct cuckoo_hashtable * h)375 __resize_end(struct cuckoo_hashtable *h)
376 {
377 __unwlock_table(h);
378 lck_mtx_lock(&h->_lock);
379 h->_busy = false;
380 if (__improbable(h->_resize_waiters > 0)) {
381 h->_resize_waiters = 0;
382 wakeup(&h->_resize_waiters);
383 }
384 lck_mtx_unlock(&h->_lock);
385 }
386
387 struct cuckoo_hashtable *
cuckoo_hashtable_create(struct cuckoo_hashtable_params * p)388 cuckoo_hashtable_create(struct cuckoo_hashtable_params *p)
389 {
390 struct cuckoo_hashtable *h = NULL;
391 uint32_t n = 0;
392 uint32_t n_buckets = 0;
393 struct _bucket *buckets = NULL;
394 uint32_t i;
395
396 if (p->cht_capacity > CUCKOO_HASHTABLE_ENTRIES_MAX ||
397 p->cht_capacity < _CHT_BUCKET_SLOTS) {
398 return NULL;
399 }
400
401 ASSERT(p->cht_capacity < UINT32_MAX);
402 n = (uint32_t)p->cht_capacity;
403 h = sk_alloc_type(struct cuckoo_hashtable, Z_WAITOK | Z_NOFAIL, cuckoo_tag);
404
405 n_buckets = __align32pow2(n / _CHT_BUCKET_SLOTS);
406 buckets = sk_alloc_type_array(struct _bucket, n_buckets, Z_WAITOK, cuckoo_tag);
407 if (buckets == NULL) {
408 sk_free_type(struct cuckoo_hashtable, h);
409 return NULL;
410 }
411
412 for (i = 0; i < n_buckets; i++) {
413 lck_mtx_init(&buckets[i]._lock, &cht_lock_group, &cht_lock_attr);
414 }
415
416 lck_mtx_init(&h->_lock, &cht_lock_group, &cht_lock_attr);
417
418 h->_n_entries = 0;
419 h->_n_buckets = n_buckets;
420 h->_capacity = h->_rcapacity = h->_n_buckets * _CHT_BUCKET_SLOTS;
421 h->_bitmask = n_buckets - 1;
422 h->_buckets = buckets;
423 lck_rw_init(&h->_resize_lock, &cht_lock_group, &cht_lock_attr);
424 h->_busy = false;
425 h->_resize_waiters = 0;
426
427 ASSERT(p->cht_obj_retain != NULL);
428 ASSERT(p->cht_obj_release != NULL);
429 ASSERT(p->cht_obj_cmp != NULL);
430 h->_obj_cmp = p->cht_obj_cmp;
431 h->_obj_retain = p->cht_obj_retain;
432 h->_obj_release = p->cht_obj_release;
433
434 return h;
435 }
436
437 void
cuckoo_hashtable_free(struct cuckoo_hashtable * h)438 cuckoo_hashtable_free(struct cuckoo_hashtable *h)
439 {
440 uint32_t i;
441
442 if (h == NULL) {
443 return;
444 }
445
446 ASSERT(h->_n_entries == 0);
447
448 if (h->_buckets != NULL) {
449 for (i = 0; i < h->_n_buckets; i++) {
450 lck_mtx_destroy(&h->_buckets[i]._lock, &cht_lock_group);
451 }
452 sk_free_type_array(struct _bucket, h->_n_buckets, h->_buckets);
453 }
454 sk_free_type(struct cuckoo_hashtable, h);
455 }
456
457 size_t
cuckoo_hashtable_entries(struct cuckoo_hashtable * h)458 cuckoo_hashtable_entries(struct cuckoo_hashtable *h)
459 {
460 return h->_n_entries;
461 }
462
463 size_t
cuckoo_hashtable_capacity(struct cuckoo_hashtable * h)464 cuckoo_hashtable_capacity(struct cuckoo_hashtable *h)
465 {
466 return h->_n_buckets * _CHT_BUCKET_SLOTS;
467 }
468
469 size_t
cuckoo_hashtable_memory_footprint(struct cuckoo_hashtable * h)470 cuckoo_hashtable_memory_footprint(struct cuckoo_hashtable *h)
471 {
472 size_t total_meminuse = sizeof(struct cuckoo_hashtable) +
473 (h->_n_buckets * sizeof(struct _bucket));
474 return total_meminuse;
475 }
476
477 static inline struct cuckoo_node *
__find_in_bucket(struct cuckoo_hashtable * h,struct _bucket * b,void * key,uint32_t hash)478 __find_in_bucket(struct cuckoo_hashtable *h, struct _bucket *b, void *key,
479 uint32_t hash)
480 {
481 uint32_t i;
482 struct cuckoo_node *node = NULL;
483
484 __lock_bucket(b);
485 if (b->_inuse == 0) {
486 goto done;
487 }
488 for (i = 0; i < _CHT_BUCKET_SLOTS; i++) {
489 if (b->_slots[i]._hash == hash) {
490 node = b->_slots[i]._node;
491 while (node != NULL) {
492 if (h->_obj_cmp(node, key) == 0) {
493 h->_obj_retain(node);
494 goto done;
495 }
496 node = cuckoo_node_next(node);
497 }
498 }
499 }
500
501 done:
502 __unlock_bucket(b);
503 return node;
504 }
505
506 /* will return node retained */
507 struct cuckoo_node *
cuckoo_hashtable_find_with_hash(struct cuckoo_hashtable * h,void * key,uint32_t hash)508 cuckoo_hashtable_find_with_hash(struct cuckoo_hashtable *h, void *key,
509 uint32_t hash)
510 {
511 struct _bucket *b1, *b2;
512 struct cuckoo_node *node = NULL;
513
514 __rlock_table(h);
515
516 b1 = __prim_bucket(h, hash);
517 if ((node = __find_in_bucket(h, b1, key, hash)) != NULL) {
518 goto done;
519 }
520
521 b2 = __alt_bucket(h, hash);
522 if ((node = __find_in_bucket(h, b2, key, hash)) != NULL) {
523 goto done;
524 }
525
526 done:
527 __unrlock_table(h);
528 return node;
529 }
530
531 /*
532 * To add a key into cuckoo_hashtable:
533 * 1. First it searches the key's two candidate buckets b1, b2
534 * 2. If there are slots available in b1 or b2, we place the key there
535 * 3. Otherwise cuckoo_hashtable will have to probe and make space
536 *
537 * To move keys around (open addressing hash table), cuckoo_hashtable needs to
538 * first find available slot via Cuckoo search. Here it uses bread-first-search
539 * to find the shorted path towards an empty bucket slot.
540 *
541 */
542 static inline int
__add_to_bucket(struct cuckoo_hashtable * h,struct _bucket * b,struct cuckoo_node * node,uint32_t hash)543 __add_to_bucket(struct cuckoo_hashtable *h, struct _bucket *b,
544 struct cuckoo_node *node, uint32_t hash)
545 {
546 int ret = -1;
547 uint8_t avail_i = _CHT_SLOT_INVAL;
548
549 __lock_bucket(b);
550 if (b->_inuse == _CHT_BUCKET_SLOTS) {
551 goto done;
552 }
553 for (uint8_t i = 0; i < _CHT_BUCKET_SLOTS; i++) {
554 struct _slot *s = __bucket_slot(b, i);
555 if (__slot_empty(s)) {
556 if (avail_i == _CHT_SLOT_INVAL) {
557 avail_i = i;
558 }
559 } else {
560 /* chain to existing slot with same hash */
561 if (__improbable(s->_hash == hash)) {
562 ASSERT(s->_node != NULL);
563 ret = cuckoo_node_chain(s->_node, node);
564 if (ret != 0) {
565 goto done;
566 }
567 cht_debug("hash %x node %p inserted [%zu][%d]",
568 hash, node, __bucket_idx(h, b), i);
569 OSAddAtomic(1, &h->_n_entries);
570 h->_obj_retain(node);
571 goto done;
572 }
573 }
574 }
575 if (avail_i != _CHT_SLOT_INVAL) {
576 h->_obj_retain(node);
577 b->_slots[avail_i]._hash = hash;
578 b->_slots[avail_i]._node = node;
579 b->_inuse++;
580 cht_debug("hash %x node %p inserted [%zu][%d]", hash, node,
581 __bucket_idx(h, b), avail_i);
582 OSAddAtomic(1, &h->_n_entries);
583 ret = 0;
584 }
585 done:
586 __unlock_bucket(b);
587 return ret;
588 }
589
590 #define _CHT_BFS_QUEUE_LEN UINT8_MAX
591 #define _CHT_BFS_QUEUE_END (_CHT_BFS_QUEUE_LEN - _CHT_BUCKET_SLOTS)
592
593 struct _bfs_node {
594 uint32_t bkt_idx;
595 uint8_t prev_node_idx;
596 uint8_t prev_slot_idx;
597 };
598
599 /*
600 * Move slots backwards on cuckoo path
601 *
602 * cuckoo_move would hold at most 2 locks at any time, moving from
603 * the end of cuckoo path toward the bucket where new keys should be
604 * stored. There could be chances of dead lock in case of multiple
605 * writers have overlapping cuckoo path. We could arrange the order of
606 * locking to avoid that but then we have to take all locks upfront,
607 * which is not friendly to concurrent readers. So instead, we try to
608 * take one by one(but still at most 2 locks holding at any time),
609 * with backoff in mind.
610 */
611 static int
cuckoo_move(struct cuckoo_hashtable * h,struct cuckoo_node * node,uint32_t hash,struct _bfs_node * queue,uint8_t leaf_node_idx,uint8_t leaf_slot)612 cuckoo_move(struct cuckoo_hashtable *h, struct cuckoo_node *node,
613 uint32_t hash, struct _bfs_node *queue, uint8_t leaf_node_idx,
614 uint8_t leaf_slot)
615 {
616 struct _bfs_node *prev_node, *curr_node;
617 struct _bucket *from_bkt, *to_bkt, *alt_bkt;
618 uint8_t from_slot, to_slot;
619
620 curr_node = &queue[leaf_node_idx];
621 to_bkt = __get_bucket(h, curr_node->bkt_idx);
622 to_slot = leaf_slot;
623
624 __lock_bucket(to_bkt);
625
626 while (__probable(curr_node->prev_node_idx != _CHT_BFS_QUEUE_LEN)) {
627 prev_node = &queue[curr_node->prev_node_idx];
628 from_bkt = __get_bucket(h, prev_node->bkt_idx);
629 from_slot = curr_node->prev_slot_idx;
630
631 if (!__lock_bucket_with_backoff(from_bkt)) {
632 /* a dead lock or a sleeping-thread holding the lock */
633 __unlock_bucket(to_bkt);
634 cht_warn("cuckoo move deadlock detected");
635 return EINVAL;
636 }
637
638 /*
639 * Verify cuckoo path by checking:
640 * 1. from_bkt[from_slot]'s alternative bucket is still to_bkt
641 * 3. to_bkt[to_slot] is still vacant
642 */
643 alt_bkt = __alt_bucket(h, from_bkt->_slots[from_slot]._hash);
644 if (alt_bkt != to_bkt ||
645 !__slot_empty(__bucket_slot(to_bkt, to_slot))) {
646 __unlock_bucket(from_bkt);
647 __unlock_bucket(to_bkt);
648 cht_warn("cuckoo move path invalid: %s %s",
649 alt_bkt != to_bkt ? "alt_bkt != to_bkt" : "",
650 !__slot_empty(__bucket_slot(to_bkt, to_slot)) ?
651 "!slot_empty(to_bkt, to_slot)" : "");
652 return EINVAL;
653 }
654
655 cht_log(CHTV_DEBUG, "Move [0x%lx][%d] to [0x%lx][%d]",
656 from_bkt - h->_buckets, from_slot, to_bkt - h->_buckets,
657 to_slot);
658
659 ASSERT(to_bkt->_slots[to_slot]._node == NULL);
660 ASSERT(to_bkt->_slots[to_slot]._hash == 0);
661
662 /* move entry backward */
663 to_bkt->_slots[to_slot] = from_bkt->_slots[from_slot];
664 to_bkt->_inuse++;
665 __slot_reset(&from_bkt->_slots[from_slot]);
666 from_bkt->_inuse--;
667
668 __unlock_bucket(to_bkt);
669
670 curr_node = prev_node;
671 to_bkt = from_bkt;
672 to_slot = from_slot;
673 }
674
675 ASSERT(curr_node->prev_node_idx == _CHT_BFS_QUEUE_LEN);
676 ASSERT(curr_node->prev_slot_idx == _CHT_SLOT_INVAL);
677
678 /* if root slot is no longer valid */
679 if (to_bkt->_slots[to_slot]._node != NULL) {
680 __unlock_bucket(to_bkt);
681 return EINVAL;
682 }
683
684 to_bkt->_inuse++;
685 __slot_set(&to_bkt->_slots[to_slot], hash, node);
686 h->_obj_retain(node);
687 __unlock_bucket(to_bkt);
688
689 OSAddAtomic(1, &h->_n_entries);
690
691 cht_debug("hash %x node %p inserted at [%zu][%d]", hash, node,
692 __bucket_idx(h, to_bkt), to_slot);
693
694 return 0;
695 }
696
697 static int
cuckoo_probe(struct cuckoo_hashtable * h,struct cuckoo_node * node,uint32_t hash)698 cuckoo_probe(struct cuckoo_hashtable *h, struct cuckoo_node *node,
699 uint32_t hash)
700 {
701 struct _bfs_node queue[_CHT_BFS_QUEUE_LEN];
702 uint8_t head, tail;
703 struct _bucket *b;
704 uint8_t avail_i;
705 int ret = ENOMEM;
706
707 /* probe starts from its primary bucket */
708 queue[0].bkt_idx = hash & h->_bitmask;
709 queue[0].prev_node_idx = _CHT_BFS_QUEUE_LEN;
710 queue[0].prev_slot_idx = _CHT_SLOT_INVAL;
711
712 head = 0;
713 tail = 1;
714
715 while (__probable(tail != head && tail < _CHT_BFS_QUEUE_END)) {
716 b = __get_bucket(h, queue[head].bkt_idx);
717 avail_i = _CHT_SLOT_INVAL;
718 for (uint8_t i = 0; i < _CHT_BUCKET_SLOTS; i++) {
719 struct _slot *s = __bucket_slot(b, i);
720 if (__slot_empty(s)) {
721 if (avail_i == _CHT_SLOT_INVAL) {
722 avail_i = i;
723 }
724 continue;
725 }
726
727 /*
728 * Another node with same hash could have been probed
729 * into this bucket, chain to it.
730 */
731 if (__improbable(s->_hash == hash)) {
732 ASSERT(s->_node != NULL);
733 ret = cuckoo_node_chain(s->_node, node);
734 if (ret != 0) {
735 goto done;
736 }
737 cht_debug("hash %x node %p inserted [%zu][%d]",
738 hash, node, __bucket_idx(h, b), i);
739 OSAddAtomic(1, &h->_n_entries);
740 h->_obj_retain(node);
741 goto done;
742 }
743
744 queue[tail].bkt_idx = __alt_hash(s->_hash) & h->_bitmask;
745 queue[tail].prev_node_idx = head;
746 queue[tail].prev_slot_idx = i;
747 tail++;
748 }
749
750 if (avail_i != _CHT_SLOT_INVAL) {
751 ret = cuckoo_move(h, node, hash, queue, head, avail_i);
752 if (ret == 0) {
753 goto done;
754 } else if (ret == EINVAL) {
755 cht_warn("cukoo path invalidated");
756 goto skip;
757 } else {
758 cht_err("faild: unknown err %d", ret);
759 goto done;
760 }
761 }
762 skip:
763 head++;
764 }
765
766 if (tail == head || tail >= _CHT_BFS_QUEUE_END) {
767 cht_warn("failed: cuckoo probe out of search space "
768 "head %d tail %d (%d/%d, load factor %d%%)", head, tail,
769 h->_n_entries, h->_capacity,
770 cuckoo_hashtable_load_factor(h));
771 ret = ENOSPC;
772 } else {
773 cht_warn("failed: cuckoo probe path invalidated "
774 " (%d/%d, load factor %d%%)", h->_n_entries, h->_capacity,
775 cuckoo_hashtable_load_factor(h));
776 ret = EAGAIN;
777 }
778 done:
779 return ret;
780 }
781
782 static inline void
783 __foreach_node(struct cuckoo_hashtable *h, bool wlocked,
784 void (^node_handler)(struct cuckoo_node *, uint32_t hash))
785 {
786 if (!wlocked) {
787 __rlock_table(h);
788 }
789 for (uint32_t i = 0; i < h->_n_buckets; i++) {
790 struct _bucket *b = &h->_buckets[i];
791 if (b->_inuse == 0) {
792 continue;
793 }
794 if (!wlocked) {
795 __lock_bucket(b);
796 }
797 for (uint32_t j = 0; j < _CHT_BUCKET_SLOTS; j++) {
798 struct _slot *s = __bucket_slot(b, j);
799 struct cuckoo_node *node = NULL, *next_node = NULL;
800 node = s->_node;
801 while (node != NULL) {
802 next_node = cuckoo_node_next(node);
803 node_handler(node, s->_hash);
804 node = next_node;
805 }
806 }
807 if (!wlocked) {
808 __unlock_bucket(b);
809 }
810 }
811 if (!wlocked) {
812 __unrlock_table(h);
813 }
814 }
815
816 void
817 cuckoo_hashtable_foreach(struct cuckoo_hashtable *h,
818 void (^node_handler)(struct cuckoo_node *, uint32_t hash))
819 {
820 __foreach_node(h, false, node_handler);
821 }
822
823 static void
cuckoo_dummy_retain(struct cuckoo_node * node)824 cuckoo_dummy_retain(struct cuckoo_node *node)
825 {
826 #pragma unused(node)
827 }
828
829 static void
cuckoo_dummy_release(struct cuckoo_node * node)830 cuckoo_dummy_release(struct cuckoo_node *node)
831 {
832 #pragma unused(node)
833 }
834
835 static int
cuckoo_resize(struct cuckoo_hashtable * h,enum cuckoo_resize_ops option)836 cuckoo_resize(struct cuckoo_hashtable *h, enum cuckoo_resize_ops option)
837 {
838 int ret = 0;
839
840 /* backoff from concurrent expansion */
841 do {
842 ret = __resize_begin(h);
843 if (ret == EAGAIN) {
844 cht_info("resize done by peer");
845 return EAGAIN;
846 }
847 } while (ret == EINTR);
848
849 uint32_t curr_capacity = h->_n_buckets * _CHT_BUCKET_SLOTS;
850 uint32_t curr_load = (100 * h->_n_entries) / curr_capacity;
851 uint32_t curr_buckets = h->_n_buckets;
852 uint32_t new_capacity;
853 __block size_t add_called = 0;
854
855 /* check load factor to ensure we are not hitting something else */
856 if (option == _CHT_RESIZE_EXPAND) {
857 if (curr_load < _CHT_MIN_LOAD_EXPAND) {
858 cht_warn("Warning: early expand at %d load", curr_load);
859 }
860 new_capacity = curr_capacity * 2;
861 } else {
862 if (curr_load > _CHT_MAX_LOAD_SHRINK ||
863 curr_capacity == h->_rcapacity) {
864 goto done;
865 }
866 new_capacity = curr_capacity / 2;
867 }
868
869 cht_info("resize %d/(%d -> %d)", h->_n_entries,
870 curr_capacity, new_capacity);
871
872 struct cuckoo_hashtable_params new_p = {
873 .cht_capacity = new_capacity,
874 .cht_obj_cmp = h->_obj_cmp,
875 .cht_obj_retain = cuckoo_dummy_retain,
876 .cht_obj_release = cuckoo_dummy_release,
877 };
878 struct cuckoo_hashtable *tmp_h;
879 tmp_h = cuckoo_hashtable_create(&new_p);
880 if (tmp_h == NULL) {
881 ret = ENOMEM;
882 goto done;
883 }
884
885 __foreach_node(h, true, ^(struct cuckoo_node *node, uint32_t hash) {
886 int error = 0;
887 cuckoo_node_set_next(node, NULL);
888 error = cuckoo_hashtable_add_with_hash(tmp_h, node, hash);
889 ASSERT(error == 0);
890 add_called++;
891 });
892
893 if (__improbable(cuckoo_hashtable_entries(h) !=
894 cuckoo_hashtable_entries(tmp_h))) {
895 panic("h %zu add_called %zu tmp_h %zu",
896 cuckoo_hashtable_entries(h), add_called,
897 cuckoo_hashtable_entries(tmp_h));
898 }
899
900 for (uint32_t i = 0; i < h->_n_buckets; i++) {
901 lck_mtx_destroy(&h->_buckets[i]._lock, &cht_lock_group);
902 }
903 h->_n_buckets = tmp_h->_n_buckets;
904 h->_capacity = h->_n_buckets * _CHT_BUCKET_SLOTS;
905 h->_bitmask = tmp_h->_bitmask;
906 sk_free_type_array(struct _bucket, curr_buckets, h->_buckets);
907
908 h->_buckets = tmp_h->_buckets;
909 lck_rw_destroy(&tmp_h->_resize_lock, &cht_lock_group);
910 lck_mtx_destroy(&tmp_h->_lock, &cht_lock_group);
911 sk_free_type(struct cuckoo_hashtable, tmp_h);
912
913 done:
914 __resize_end(h);
915
916 return ret;
917 }
918
919 static inline int
cuckoo_add_no_expand(struct cuckoo_hashtable * h,struct cuckoo_node * node,uint32_t hash)920 cuckoo_add_no_expand(struct cuckoo_hashtable *h,
921 struct cuckoo_node *node, uint32_t hash)
922 {
923 struct _bucket *b1, *b2;
924 int ret = -1;
925
926 __rlock_table(h);
927
928 b1 = __prim_bucket(h, hash);
929 if ((ret = __add_to_bucket(h, b1, node, hash)) == 0) {
930 goto done;
931 }
932
933 b2 = __alt_bucket(h, hash);
934 if ((ret = __add_to_bucket(h, b2, node, hash)) == 0) {
935 goto done;
936 }
937
938 ret = cuckoo_probe(h, node, hash);
939 done:
940 __unrlock_table(h);
941 return ret;
942 }
943
944 int
cuckoo_hashtable_add_with_hash(struct cuckoo_hashtable * h,struct cuckoo_node * node,uint32_t hash)945 cuckoo_hashtable_add_with_hash(struct cuckoo_hashtable *h,
946 struct cuckoo_node *node, uint32_t hash)
947 {
948 int ret;
949
950 /* neutralize node to avoid non-terminating tail */
951 ASSERT(cuckoo_node_next(node) == NULL);
952
953 ret = cuckoo_add_no_expand(h, node, hash);
954 if (ret == ENOSPC) {
955 do {
956 ret = cuckoo_resize(h, _CHT_RESIZE_EXPAND);
957 if (ret != 0 && ret != EAGAIN) {
958 break;
959 }
960 // this could still fail, when other threads added
961 // enough objs that another resize is needed
962 ret = cuckoo_add_no_expand(h, node, hash);
963 } while (ret == ENOSPC);
964 }
965
966 return ret;
967 }
968
969 static inline int
__del_from_bucket(struct cuckoo_hashtable * h,struct _bucket * b,struct cuckoo_node * node,uint32_t hash)970 __del_from_bucket(struct cuckoo_hashtable *h, struct _bucket *b,
971 struct cuckoo_node *node, uint32_t hash)
972 {
973 uint32_t i;
974
975 __lock_bucket(b);
976 for (i = 0; i < _CHT_BUCKET_SLOTS; i++) {
977 if (b->_slots[i]._hash == hash) {
978 if (cuckoo_node_del(&b->_slots[i]._node, node)) {
979 h->_obj_release(node);
980 OSAddAtomic(-1, &h->_n_entries);
981 if (__slot_empty(__bucket_slot(b, i))) {
982 b->_slots[i]._hash = 0;
983 b->_inuse--;
984 }
985 __unlock_bucket(b);
986 return 0;
987 }
988 }
989 }
990 __unlock_bucket(b);
991 return ENOENT;
992 }
993
994 int
cuckoo_hashtable_del(struct cuckoo_hashtable * h,struct cuckoo_node * node,uint32_t hash)995 cuckoo_hashtable_del(struct cuckoo_hashtable *h,
996 struct cuckoo_node *node, uint32_t hash)
997 {
998 struct _bucket *b1, *b2;
999 int ret = -1;
1000
1001 __rlock_table(h);
1002
1003 b1 = __prim_bucket(h, hash);
1004 if ((ret = __del_from_bucket(h, b1, node, hash)) == 0) {
1005 goto done;
1006 }
1007
1008 b2 = __alt_bucket(h, hash);
1009 if ((ret = __del_from_bucket(h, b2, node, hash)) == 0) {
1010 goto done;
1011 }
1012
1013 done:
1014 if (ret == 0) {
1015 cuckoo_node_set_next(node, NULL);
1016 }
1017 __unrlock_table(h);
1018 return ret;
1019 }
1020
1021 void
cuckoo_hashtable_try_shrink(struct cuckoo_hashtable * h)1022 cuckoo_hashtable_try_shrink(struct cuckoo_hashtable *h)
1023 {
1024 cuckoo_resize(h, _CHT_RESIZE_SHRINK);
1025 }
1026
1027 #if (DEVELOPMENT || DEBUG)
1028
1029 static inline bool
cuckoo_node_looped(struct cuckoo_node * node)1030 cuckoo_node_looped(struct cuckoo_node *node)
1031 {
1032 struct cuckoo_node *runner = node;
1033
1034 if (node == NULL) {
1035 return false;
1036 }
1037
1038 while (runner->next && runner->next->next) {
1039 runner = runner->next->next;
1040 node = node->next;
1041
1042 if (runner == node) {
1043 return true;
1044 }
1045 }
1046 return false;
1047 }
1048
1049 int
cuckoo_hashtable_health_check(struct cuckoo_hashtable * h)1050 cuckoo_hashtable_health_check(struct cuckoo_hashtable *h)
1051 {
1052 uint32_t hash;
1053 uint32_t i, j;
1054 struct _bucket *b;
1055 struct cuckoo_node *node;
1056 bool healthy = true;
1057 uint32_t seen = 0;
1058
1059 __wlock_table(h);
1060
1061 for (i = 0; i < h->_n_buckets; i++) {
1062 b = &h->_buckets[i];
1063 uint8_t inuse = 0;
1064 for (j = 0; j < _CHT_BUCKET_SLOTS; j++) {
1065 hash = b->_slots[j]._hash;
1066 node = b->_slots[j]._node;
1067 if (node != NULL) {
1068 inuse++;
1069 }
1070 while (node != NULL) {
1071 seen++;
1072 if ((__prim_bucket(h, hash) != b) &&
1073 (__alt_bucket(h, hash) != b)) {
1074 panic("[%d][%d] stray hash %x node %p",
1075 i, j, hash, node);
1076 healthy = false;
1077 }
1078
1079 if (cuckoo_node_looped(node)) {
1080 panic("[%d][%d] looped hash %x node %p",
1081 i, j, hash, node);
1082 healthy = false;
1083 }
1084 node = cuckoo_node_next(node);
1085 }
1086 }
1087 ASSERT(inuse == b->_inuse);
1088 }
1089
1090 if (seen != h->_n_entries) {
1091 panic("seen %d != n_entries %d", seen, h->_n_entries);
1092 }
1093
1094 __unwlock_table(h);
1095
1096 if (!healthy) {
1097 cht_err("table unhealthy");
1098 return -1;
1099 } else {
1100 return 0;
1101 }
1102 }
1103
1104 void
cuckoo_hashtable_dump(struct cuckoo_hashtable * h)1105 cuckoo_hashtable_dump(struct cuckoo_hashtable *h)
1106 {
1107 uint32_t hash;
1108 struct cuckoo_node *node;
1109 uint32_t i, j;
1110 struct _bucket *b;
1111
1112 cuckoo_hashtable_health_check(h);
1113
1114 for (i = 0; i < h->_n_buckets; i++) {
1115 printf("%d\t", i);
1116 b = &h->_buckets[i];
1117 for (j = 0; j < _CHT_BUCKET_SLOTS; j++) {
1118 hash = b->_slots[j]._hash;
1119 node = b->_slots[j]._node;
1120 printf("0x%08x(%p) ", hash, node);
1121 }
1122 printf("\n");
1123 }
1124 }
1125 #endif /* !DEVELOPMENT && !DEBUG */
1126