xref: /xnu-8792.61.2/libkern/kxld/kxld_dict.c (revision 42e220869062b56f8d7d0726fd4c88954f87902c)
1 /*
2  * Copyright (c) 2007-2008 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 #include <string.h>
29 #include <sys/types.h>
30 
31 #define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld"
32 #include <AssertMacros.h>
33 
34 #include "kxld_dict.h"
35 #include "kxld_util.h"
36 
37 /*******************************************************************************
38 * Types and macros
39 *******************************************************************************/
40 
41 /* Ratio of num_entries:num_buckets that will cause a resize */
42 #define RESIZE_NUMER 7
43 #define RESIZE_DENOM 10
44 #define RESIZE_THRESHOLD(x) (((x)*RESIZE_NUMER) / RESIZE_DENOM)
45 #define MIN_BUCKETS(x) (((x)*RESIZE_DENOM) / RESIZE_NUMER)
46 
47 /* Selected for good scaling qualities when resizing dictionary
48  * ... see: http://www.concentric.net/~ttwang/tech/hashsize.htm
49  */
50 #define DEFAULT_DICT_SIZE 89
51 
52 typedef struct dict_entry DictEntry;
53 
54 typedef enum {
55 	EMPTY = 0,
56 	USED = 1,
57 	DELETED = 2
58 } DictEntryState;
59 
60 struct dict_entry {
61 	const void *key;
62 	void *value;
63 	DictEntryState state;
64 };
65 
66 /*******************************************************************************
67 * Function prototypes
68 *******************************************************************************/
69 
70 static kern_return_t get_locate_index(const KXLDDict *dict, const void *key,
71     u_int *idx);
72 static kern_return_t get_insert_index(const KXLDDict *dict, const void *key,
73     u_int *idx);
74 static kern_return_t resize_dict(KXLDDict *dict);
75 
76 /*******************************************************************************
77 *******************************************************************************/
78 kern_return_t
kxld_dict_init(KXLDDict * dict,kxld_dict_hash hash,kxld_dict_cmp cmp,u_int num_entries)79 kxld_dict_init(KXLDDict * dict, kxld_dict_hash hash, kxld_dict_cmp cmp,
80     u_int num_entries)
81 {
82 	kern_return_t rval = KERN_FAILURE;
83 	u_int min_buckets = MIN_BUCKETS(num_entries);
84 	u_int num_buckets = DEFAULT_DICT_SIZE;
85 
86 	check(dict);
87 	check(hash);
88 	check(cmp);
89 
90 	/* We want the number of allocated buckets to be at least twice that of the
91 	 * number to be inserted.
92 	 */
93 	while (min_buckets > num_buckets) {
94 		num_buckets *= 2;
95 		num_buckets++;
96 	}
97 
98 	/* Allocate enough buckets for the anticipated number of entries */
99 	rval = kxld_array_init(&dict->buckets, sizeof(DictEntry), num_buckets);
100 	require_noerr(rval, finish);
101 
102 	/* Initialize */
103 	dict->hash = hash;
104 	dict->cmp = cmp;
105 	dict->num_entries = 0;
106 	dict->resize_threshold = RESIZE_THRESHOLD(num_buckets);
107 
108 	rval = KERN_SUCCESS;
109 
110 finish:
111 	return rval;
112 }
113 
114 /*******************************************************************************
115 *******************************************************************************/
116 void
kxld_dict_clear(KXLDDict * dict)117 kxld_dict_clear(KXLDDict *dict)
118 {
119 	check(dict);
120 
121 	dict->hash = NULL;
122 	dict->cmp = NULL;
123 	dict->num_entries = 0;
124 	dict->resize_threshold = 0;
125 	kxld_array_clear(&dict->buckets);
126 	kxld_array_clear(&dict->resize_buckets);
127 }
128 
129 /*******************************************************************************
130 *******************************************************************************/
131 void
kxld_dict_iterator_init(KXLDDictIterator * iter,const KXLDDict * dict)132 kxld_dict_iterator_init(KXLDDictIterator *iter, const KXLDDict *dict)
133 {
134 	check(iter);
135 	check(dict);
136 
137 	iter->idx = 0;
138 	iter->dict = dict;
139 }
140 
141 /*******************************************************************************
142 *******************************************************************************/
143 void
kxld_dict_deinit(KXLDDict * dict)144 kxld_dict_deinit(KXLDDict *dict)
145 {
146 	check(dict);
147 
148 	kxld_array_deinit(&dict->buckets);
149 	kxld_array_deinit(&dict->resize_buckets);
150 }
151 
152 /*******************************************************************************
153 *******************************************************************************/
154 u_int
kxld_dict_get_num_entries(const KXLDDict * dict)155 kxld_dict_get_num_entries(const KXLDDict *dict)
156 {
157 	check(dict);
158 
159 	return dict->num_entries;
160 }
161 
162 /*******************************************************************************
163 *******************************************************************************/
164 void *
kxld_dict_find(const KXLDDict * dict,const void * key)165 kxld_dict_find(const KXLDDict *dict, const void *key)
166 {
167 	kern_return_t rval = KERN_FAILURE;
168 	DictEntry *entry = NULL;
169 	u_int idx = 0;
170 
171 	check(dict);
172 	check(key);
173 
174 	rval = get_locate_index(dict, key, &idx);
175 	if (rval) {
176 		return NULL;
177 	}
178 
179 	entry = kxld_array_get_item(&dict->buckets, idx);
180 
181 	return entry->value;
182 }
183 
184 /*******************************************************************************
185  * This dictionary uses linear probing, which means that when there is a
186  * collision, we just walk along the buckets until a free bucket shows up.
187  * A consequence of this is that when looking up an item, items that lie between
188  * its hash value and its actual bucket may have been deleted since it was
189  * inserted.  Thus, we should only stop a lookup when we've wrapped around the
190  * dictionary or encountered an EMPTY bucket.
191  ********************************************************************************/
192 static kern_return_t
get_locate_index(const KXLDDict * dict,const void * key,u_int * _idx)193 get_locate_index(const KXLDDict *dict, const void *key, u_int *_idx)
194 {
195 	kern_return_t rval = KERN_FAILURE;
196 	DictEntry *entry = NULL;
197 	u_int base, idx;
198 
199 	base = idx = dict->hash(dict, key);
200 
201 	/* Iterate until we match the key, wrap, or hit an empty bucket */
202 	entry = kxld_array_get_item(&dict->buckets, idx);
203 	while (!dict->cmp(entry->key, key)) {
204 		if (entry->state == EMPTY) {
205 			goto finish;
206 		}
207 
208 		idx = (idx + 1) % dict->buckets.nitems;
209 		if (idx == base) {
210 			goto finish;
211 		}
212 
213 		entry = kxld_array_get_item(&dict->buckets, idx);
214 	}
215 
216 	check(idx < dict->buckets.nitems);
217 
218 	*_idx = idx;
219 	rval = KERN_SUCCESS;
220 
221 finish:
222 	return rval;
223 }
224 
225 /*******************************************************************************
226 *******************************************************************************/
227 kern_return_t
kxld_dict_insert(KXLDDict * dict,const void * key,void * value)228 kxld_dict_insert(KXLDDict *dict, const void *key, void *value)
229 {
230 	kern_return_t rval = KERN_FAILURE;
231 	DictEntry *entry = NULL;
232 	u_int idx = 0;
233 
234 	check(dict);
235 	check(key);
236 	check(value);
237 
238 	/* Resize if we are greater than the capacity threshold.
239 	 * Note: this is expensive, but the dictionary can be sized correctly at
240 	 * construction to avoid ever having to do this.
241 	 */
242 	while (dict->num_entries > dict->resize_threshold) {
243 		rval = resize_dict(dict);
244 		require_noerr(rval, finish);
245 	}
246 
247 	/* If this function returns FULL after we've already resized appropriately
248 	 * something is very wrong and we should return an error.
249 	 */
250 	rval = get_insert_index(dict, key, &idx);
251 	require_noerr(rval, finish);
252 
253 	/* Insert the new key-value pair into the bucket, but only count it as a
254 	 * new entry if we are not overwriting an existing entry.
255 	 */
256 	entry = kxld_array_get_item(&dict->buckets, idx);
257 	if (entry->state != USED) {
258 		dict->num_entries++;
259 		entry->key = key;
260 		entry->state = USED;
261 	}
262 	entry->value = value;
263 
264 	rval = KERN_SUCCESS;
265 
266 finish:
267 	return rval;
268 }
269 
270 /*******************************************************************************
271 * Increases the hash table's capacity by 2N+1.  Uses dictionary API.  Not
272 * fast; just correct.
273 *******************************************************************************/
274 static kern_return_t
resize_dict(KXLDDict * dict)275 resize_dict(KXLDDict *dict)
276 {
277 	kern_return_t rval = KERN_FAILURE;
278 	KXLDArray tmparray;
279 	DictEntry *entry = NULL;
280 	u_int nbuckets = (dict->buckets.nitems * 2 + 1);
281 	u_int i = 0;
282 
283 	check(dict);
284 
285 	/* Initialize a new set of buckets to hold more entries */
286 	rval = kxld_array_init(&dict->resize_buckets, sizeof(DictEntry), nbuckets);
287 	require_noerr(rval, finish);
288 
289 	/* Swap the new buckets with the old buckets */
290 	tmparray = dict->buckets;
291 	dict->buckets = dict->resize_buckets;
292 	dict->resize_buckets = tmparray;
293 
294 	/* Reset dictionary parameters */
295 	dict->num_entries = 0;
296 	dict->resize_threshold = RESIZE_THRESHOLD(dict->buckets.nitems);
297 
298 	/* Rehash all of the entries */
299 	for (i = 0; i < dict->resize_buckets.nitems; ++i) {
300 		entry = kxld_array_get_item(&dict->resize_buckets, i);
301 		if (entry->state == USED) {
302 			rval = kxld_dict_insert(dict, entry->key, entry->value);
303 			require_noerr(rval, finish);
304 		}
305 	}
306 
307 	/* Clear the old buckets */
308 	kxld_array_clear(&dict->resize_buckets);
309 
310 	rval = KERN_SUCCESS;
311 
312 finish:
313 	return rval;
314 }
315 
316 /*******************************************************************************
317 * Simple function to find the first empty cell
318 *******************************************************************************/
319 static kern_return_t
get_insert_index(const KXLDDict * dict,const void * key,u_int * r_index)320 get_insert_index(const KXLDDict *dict, const void *key, u_int *r_index)
321 {
322 	kern_return_t rval = KERN_FAILURE;
323 	DictEntry *entry = NULL;
324 	u_int base, idx;
325 
326 	base = idx = dict->hash(dict, key);
327 
328 	/* Iterate through the buckets until we find an EMPTY bucket, a DELETED
329 	 * bucket, or a key match.
330 	 */
331 	entry = kxld_array_get_item(&dict->buckets, idx);
332 	while (entry->state == USED && !dict->cmp(entry->key, key)) {
333 		idx = (idx + 1) % dict->buckets.nitems;
334 		require_action(base != idx, finish, rval = KERN_FAILURE);
335 		entry = kxld_array_get_item(&dict->buckets, idx);
336 	}
337 
338 	*r_index = idx;
339 	rval = KERN_SUCCESS;
340 
341 finish:
342 	return rval;
343 }
344 
345 /*******************************************************************************
346 *******************************************************************************/
347 void
kxld_dict_remove(KXLDDict * dict,const void * key,void ** value)348 kxld_dict_remove(KXLDDict *dict, const void *key, void **value)
349 {
350 	kern_return_t rval = KERN_FAILURE;
351 	DictEntry *entry = NULL;
352 	u_int idx = 0;
353 
354 	check(dict);
355 	check(key);
356 
357 	/* Find the item */
358 	rval = get_locate_index(dict, key, &idx);
359 	if (rval) {
360 		if (value) {
361 			*value = NULL;
362 		}
363 		return;
364 	}
365 
366 	entry = kxld_array_get_item(&dict->buckets, idx);
367 
368 	/* Save the value if requested */
369 	if (value) {
370 		*value = entry->value;
371 	}
372 
373 	/* Delete the item from the dictionary */
374 	entry->key = NULL;
375 	entry->value = NULL;
376 	entry->state = DELETED;
377 	dict->num_entries--;
378 }
379 
380 /*******************************************************************************
381 *******************************************************************************/
382 void
kxld_dict_iterator_get_next(KXLDDictIterator * iter,const void ** key,void ** value)383 kxld_dict_iterator_get_next(KXLDDictIterator *iter, const void **key,
384     void **value)
385 {
386 	DictEntry *entry = NULL;
387 
388 	check(iter);
389 	check(key);
390 	check(value);
391 
392 	*key = NULL;
393 	*value = NULL;
394 
395 	/* Walk over the dictionary looking for USED buckets */
396 	for (; iter->idx < iter->dict->buckets.nitems; ++(iter->idx)) {
397 		entry = kxld_array_get_item(&iter->dict->buckets, iter->idx);
398 		if (entry->state == USED) {
399 			*key = entry->key;
400 			*value = entry->value;
401 			++(iter->idx);
402 			break;
403 		}
404 	}
405 }
406 
407 /*******************************************************************************
408 *******************************************************************************/
409 void
kxld_dict_iterator_reset(KXLDDictIterator * iter)410 kxld_dict_iterator_reset(KXLDDictIterator *iter)
411 {
412 	iter->idx = 0;
413 }
414 
415 /*******************************************************************************
416 * This is Daniel Bernstein's hash algorithm from comp.lang.c
417 * It's fast and distributes well.  Returns an idx into the symbol hash table.
418 * NOTE: Will not check for a valid pointer - performance
419 *******************************************************************************/
420 u_int
kxld_dict_string_hash(const KXLDDict * dict,const void * _key)421 kxld_dict_string_hash(const KXLDDict *dict, const void *_key)
422 {
423 	const char *key = _key;
424 	u_int c = 0;
425 	u_int hash_val = 5381;
426 
427 	check(dict);
428 	check(_key);
429 
430 	while ((c = *key++)) {
431 		/* hash(i) = hash(i-1) *33 ^ name[i] */
432 		hash_val = ((hash_val << 5) + hash_val) ^ c;
433 	}
434 
435 	return hash_val % dict->buckets.nitems;
436 }
437 
438 u_int
kxld_dict_uint32_hash(const KXLDDict * dict,const void * _key)439 kxld_dict_uint32_hash(const KXLDDict *dict, const void *_key)
440 {
441 	uint32_t key = *(const uint32_t *) _key;
442 
443 	check(_key);
444 
445 	return (u_int) (key % dict->buckets.nitems);
446 }
447 
448 u_int
kxld_dict_kxldaddr_hash(const KXLDDict * dict,const void * _key)449 kxld_dict_kxldaddr_hash(const KXLDDict *dict, const void *_key)
450 {
451 	kxld_addr_t key = *(const kxld_addr_t *) _key;
452 
453 	check(_key);
454 
455 	return (u_int) (key % dict->buckets.nitems);
456 }
457 
458 u_int
kxld_dict_string_cmp(const void * key1,const void * key2)459 kxld_dict_string_cmp(const void *key1, const void *key2)
460 {
461 	return streq(key1, key2);
462 }
463 
464 u_int
kxld_dict_uint32_cmp(const void * key1,const void * key2)465 kxld_dict_uint32_cmp(const void *key1, const void *key2)
466 {
467 	const uint32_t *a = key1;
468 	const uint32_t *b = key2;
469 
470 	return a && b && (*a == *b);
471 }
472 
473 u_int
kxld_dict_kxldaddr_cmp(const void * key1,const void * key2)474 kxld_dict_kxldaddr_cmp(const void *key1, const void *key2)
475 {
476 	const kxld_addr_t *a = key1;
477 	const kxld_addr_t *b = key2;
478 
479 	return a && b && (*a == *b);
480 }
481