1 | /*
|
---|
2 | * Copyright (c) 2008 Jakub Jermar
|
---|
3 | * Copyright (c) 2012 Adam Hraska
|
---|
4 | *
|
---|
5 | * All rights reserved.
|
---|
6 | *
|
---|
7 | * Redistribution and use in source and binary forms, with or without
|
---|
8 | * modification, are permitted provided that the following conditions
|
---|
9 | * are met:
|
---|
10 | *
|
---|
11 | * - Redistributions of source code must retain the above copyright
|
---|
12 | * notice, this list of conditions and the following disclaimer.
|
---|
13 | * - Redistributions in binary form must reproduce the above copyright
|
---|
14 | * notice, this list of conditions and the following disclaimer in the
|
---|
15 | * documentation and/or other materials provided with the distribution.
|
---|
16 | * - The name of the author may not be used to endorse or promote products
|
---|
17 | * derived from this software without specific prior written permission.
|
---|
18 | *
|
---|
19 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
---|
20 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
---|
21 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
---|
22 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
---|
23 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
---|
24 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
---|
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
---|
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
---|
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
---|
28 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
---|
29 | */
|
---|
30 |
|
---|
31 | /** @addtogroup libc
|
---|
32 | * @{
|
---|
33 | */
|
---|
34 | /** @file
|
---|
35 | */
|
---|
36 |
|
---|
37 | /*
|
---|
38 | * This is an implementation of a generic resizable chained hash table.
|
---|
39 | *
|
---|
40 | * The table grows to 2*n+1 buckets each time, starting at n == 89,
|
---|
41 | * per Thomas Wang's recommendation:
|
---|
42 | * http://www.concentric.net/~Ttwang/tech/hashsize.htm
|
---|
43 | *
|
---|
44 | * This policy produces prime table sizes for the first five resizes
|
---|
45 | * and generally produces table sizes which are either prime or
|
---|
46 | * have fairly large (prime/odd) divisors. Having a prime table size
|
---|
47 | * mitigates the use of suboptimal hash functions and distributes
|
---|
48 | * items over the whole table.
|
---|
49 | */
|
---|
50 |
|
---|
51 | #include <adt/hash_table.h>
|
---|
52 | #include <adt/list.h>
|
---|
53 | #include <unistd.h>
|
---|
54 | #include <malloc.h>
|
---|
55 | #include <assert.h>
|
---|
56 | #include <str.h>
|
---|
57 |
|
---|
58 | /* Optimal initial bucket count. See comment above. */
|
---|
59 | #define HT_MIN_BUCKETS 89
|
---|
60 | /* The table is resized when the average load per bucket exceeds this number. */
|
---|
61 | #define HT_MAX_LOAD 2
|
---|
62 |
|
---|
63 |
|
---|
64 | static size_t round_up_size(size_t);
|
---|
65 | static bool alloc_table(size_t, list_t **);
|
---|
66 | static void clear_items(hash_table_t *);
|
---|
67 | static void resize(hash_table_t *, size_t);
|
---|
68 | static void grow_if_needed(hash_table_t *);
|
---|
69 | static void shrink_if_needed(hash_table_t *);
|
---|
70 |
|
---|
71 | /* Dummy do nothing callback to invoke in place of remove_callback == NULL. */
|
---|
72 | static void nop_remove_callback(ht_link_t *item)
|
---|
73 | {
|
---|
74 | /* no-op */
|
---|
75 | }
|
---|
76 |
|
---|
77 |
|
---|
78 | /** Create chained hash table.
|
---|
79 | *
|
---|
80 | * @param h Hash table structure. Will be initialized by this call.
|
---|
81 | * @param init_size Initial desired number of hash table buckets. Pass zero
|
---|
82 | * if you want the default initial size.
|
---|
83 | * @param max_keys Maximal number of keys needed to identify an item.
|
---|
84 | * @param op Hash table operations structure. remove_callback()
|
---|
85 | * is optional and can be NULL if no action is to be taken
|
---|
86 | * upon removal. equal() is optional if and only if
|
---|
87 | * hash_table_insert_unique() will never be invoked.
|
---|
88 | * All other operations are mandatory.
|
---|
89 | *
|
---|
90 | * @return True on success
|
---|
91 | *
|
---|
92 | */
|
---|
93 | bool hash_table_create(hash_table_t *h, size_t init_size, size_t max_load,
|
---|
94 | hash_table_ops_t *op)
|
---|
95 | {
|
---|
96 | assert(h);
|
---|
97 | assert(op && op->hash && op->key_hash && op->key_equal);
|
---|
98 |
|
---|
99 | /* Check for compulsory ops. */
|
---|
100 | if (!op || !op->hash || !op->key_hash || !op->key_equal)
|
---|
101 | return false;
|
---|
102 |
|
---|
103 | h->bucket_cnt = round_up_size(init_size);
|
---|
104 |
|
---|
105 | if (!alloc_table(h->bucket_cnt, &h->bucket))
|
---|
106 | return false;
|
---|
107 |
|
---|
108 | h->max_load = (max_load == 0) ? HT_MAX_LOAD : max_load;
|
---|
109 | h->item_cnt = 0;
|
---|
110 | h->op = op;
|
---|
111 | h->full_item_cnt = h->max_load * h->bucket_cnt;
|
---|
112 | h->apply_ongoing = false;
|
---|
113 |
|
---|
114 | if (h->op->remove_callback == NULL) {
|
---|
115 | h->op->remove_callback = nop_remove_callback;
|
---|
116 | }
|
---|
117 |
|
---|
118 | return true;
|
---|
119 | }
|
---|
120 |
|
---|
121 | /** Destroy a hash table instance.
|
---|
122 | *
|
---|
123 | * @param h Hash table to be destroyed.
|
---|
124 | *
|
---|
125 | */
|
---|
126 | void hash_table_destroy(hash_table_t *h)
|
---|
127 | {
|
---|
128 | assert(h && h->bucket);
|
---|
129 | assert(!h->apply_ongoing);
|
---|
130 |
|
---|
131 | clear_items(h);
|
---|
132 |
|
---|
133 | free(h->bucket);
|
---|
134 |
|
---|
135 | h->bucket = 0;
|
---|
136 | h->bucket_cnt = 0;
|
---|
137 | }
|
---|
138 |
|
---|
139 | /** Returns true if there are no items in the table. */
|
---|
140 | bool hash_table_empty(hash_table_t *h)
|
---|
141 | {
|
---|
142 | assert(h && h->bucket);
|
---|
143 | return h->item_cnt == 0;
|
---|
144 | }
|
---|
145 |
|
---|
146 | /** Returns the number of items in the table. */
|
---|
147 | size_t hash_table_size(hash_table_t *h)
|
---|
148 | {
|
---|
149 | assert(h && h->bucket);
|
---|
150 | return h->item_cnt;
|
---|
151 | }
|
---|
152 |
|
---|
153 | /** Remove all elements from the hash table
|
---|
154 | *
|
---|
155 | * @param h Hash table to be cleared
|
---|
156 | */
|
---|
157 | void hash_table_clear(hash_table_t *h)
|
---|
158 | {
|
---|
159 | assert(h && h->bucket);
|
---|
160 | assert(!h->apply_ongoing);
|
---|
161 |
|
---|
162 | clear_items(h);
|
---|
163 |
|
---|
164 | /* Shrink the table to its minimum size if possible. */
|
---|
165 | if (HT_MIN_BUCKETS < h->bucket_cnt) {
|
---|
166 | resize(h, HT_MIN_BUCKETS);
|
---|
167 | }
|
---|
168 | }
|
---|
169 |
|
---|
170 | /** Unlinks and removes all items but does not resize. */
|
---|
171 | static void clear_items(hash_table_t *h)
|
---|
172 | {
|
---|
173 | if (h->item_cnt == 0)
|
---|
174 | return;
|
---|
175 |
|
---|
176 | for (size_t idx = 0; idx < h->bucket_cnt; ++idx) {
|
---|
177 | list_foreach_safe(h->bucket[idx], cur, next) {
|
---|
178 | assert(cur);
|
---|
179 | ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
|
---|
180 |
|
---|
181 | list_remove(cur);
|
---|
182 | h->op->remove_callback(cur_link);
|
---|
183 | }
|
---|
184 | }
|
---|
185 |
|
---|
186 | h->item_cnt = 0;
|
---|
187 | }
|
---|
188 |
|
---|
189 | /** Insert item into a hash table.
|
---|
190 | *
|
---|
191 | * @param h Hash table.
|
---|
192 | * @param key Array of all keys necessary to compute hash index.
|
---|
193 | * @param item Item to be inserted into the hash table.
|
---|
194 | */
|
---|
195 | void hash_table_insert(hash_table_t *h, ht_link_t *item)
|
---|
196 | {
|
---|
197 | assert(item);
|
---|
198 | assert(h && h->bucket);
|
---|
199 | assert(!h->apply_ongoing);
|
---|
200 |
|
---|
201 | size_t idx = h->op->hash(item) % h->bucket_cnt;
|
---|
202 |
|
---|
203 | list_append(&item->link, &h->bucket[idx]);
|
---|
204 | ++h->item_cnt;
|
---|
205 | grow_if_needed(h);
|
---|
206 | }
|
---|
207 |
|
---|
208 |
|
---|
209 | /** Insert item into a hash table if not already present.
|
---|
210 | *
|
---|
211 | * @param h Hash table.
|
---|
212 | * @param key Array of all keys necessary to compute hash index.
|
---|
213 | * @param item Item to be inserted into the hash table.
|
---|
214 | *
|
---|
215 | * @return False if such an item had already been inserted.
|
---|
216 | * @return True if the inserted item was the only item with such a lookup key.
|
---|
217 | */
|
---|
218 | bool hash_table_insert_unique(hash_table_t *h, ht_link_t *item)
|
---|
219 | {
|
---|
220 | assert(item);
|
---|
221 | assert(h && h->bucket && h->bucket_cnt);
|
---|
222 | assert(h->op && h->op->hash && h->op->equal);
|
---|
223 | assert(!h->apply_ongoing);
|
---|
224 |
|
---|
225 | size_t idx = h->op->hash(item) % h->bucket_cnt;
|
---|
226 |
|
---|
227 | /* Check for duplicates. */
|
---|
228 | list_foreach(h->bucket[idx], cur) {
|
---|
229 | /*
|
---|
230 | * We could filter out items using their hashes first, but
|
---|
231 | * calling equal() might very well be just as fast.
|
---|
232 | */
|
---|
233 | ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
|
---|
234 | if (h->op->equal(cur_link, item))
|
---|
235 | return false;
|
---|
236 | }
|
---|
237 |
|
---|
238 | list_append(&item->link, &h->bucket[idx]);
|
---|
239 | ++h->item_cnt;
|
---|
240 | grow_if_needed(h);
|
---|
241 |
|
---|
242 | return true;
|
---|
243 | }
|
---|
244 |
|
---|
245 | /** Search hash table for an item matching keys.
|
---|
246 | *
|
---|
247 | * @param h Hash table.
|
---|
248 | * @param key Array of all keys needed to compute hash index.
|
---|
249 | *
|
---|
250 | * @return Matching item on success, NULL if there is no such item.
|
---|
251 | *
|
---|
252 | */
|
---|
253 | ht_link_t *hash_table_find(const hash_table_t *h, void *key)
|
---|
254 | {
|
---|
255 | assert(h && h->bucket);
|
---|
256 |
|
---|
257 | size_t idx = h->op->key_hash(key) % h->bucket_cnt;
|
---|
258 |
|
---|
259 | list_foreach(h->bucket[idx], cur) {
|
---|
260 | ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
|
---|
261 | /*
|
---|
262 | * Is this is the item we are looking for? We could have first
|
---|
263 | * checked if the hashes match but op->key_equal() may very well be
|
---|
264 | * just as fast as op->hash().
|
---|
265 | */
|
---|
266 | if (h->op->key_equal(key, cur_link)) {
|
---|
267 | return cur_link;
|
---|
268 | }
|
---|
269 | }
|
---|
270 |
|
---|
271 | return NULL;
|
---|
272 | }
|
---|
273 |
|
---|
274 | /** Find the next item equal to item. */
|
---|
275 | ht_link_t *hash_table_find_next(const hash_table_t *h, ht_link_t *item)
|
---|
276 | {
|
---|
277 | assert(item);
|
---|
278 | assert(h && h->bucket);
|
---|
279 |
|
---|
280 | /* Traverse the circular list until we reach the starting item again. */
|
---|
281 | for (link_t *cur = item->link.next; cur != &item->link; cur = cur->next) {
|
---|
282 | assert(cur);
|
---|
283 | ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
|
---|
284 | /*
|
---|
285 | * Is this is the item we are looking for? We could have first
|
---|
286 | * checked if the hashes match but op->equal() may very well be
|
---|
287 | * just as fast as op->hash().
|
---|
288 | */
|
---|
289 | if (h->op->equal(cur_link, item)) {
|
---|
290 | return cur_link;
|
---|
291 | }
|
---|
292 | }
|
---|
293 |
|
---|
294 | return NULL;
|
---|
295 | }
|
---|
296 |
|
---|
297 | /** Remove all matching items from hash table.
|
---|
298 | *
|
---|
299 | * For each removed item, h->remove_callback() is called.
|
---|
300 | *
|
---|
301 | * @param h Hash table.
|
---|
302 | * @param key Array of keys that will be compared against items of
|
---|
303 | * the hash table.
|
---|
304 | * @param keys Number of keys in the 'key' array.
|
---|
305 | *
|
---|
306 | * @return Returns the number of removed items.
|
---|
307 | */
|
---|
308 | size_t hash_table_remove(hash_table_t *h, void *key)
|
---|
309 | {
|
---|
310 | assert(h && h->bucket);
|
---|
311 | assert(!h->apply_ongoing);
|
---|
312 |
|
---|
313 | size_t idx = h->op->key_hash(key) % h->bucket_cnt;
|
---|
314 |
|
---|
315 | size_t removed = 0;
|
---|
316 |
|
---|
317 | list_foreach_safe(h->bucket[idx], cur, next) {
|
---|
318 | ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
|
---|
319 |
|
---|
320 | if (h->op->key_equal(key, cur_link)) {
|
---|
321 | ++removed;
|
---|
322 | list_remove(cur);
|
---|
323 | h->op->remove_callback(cur_link);
|
---|
324 | }
|
---|
325 | }
|
---|
326 |
|
---|
327 | h->item_cnt -= removed;
|
---|
328 | shrink_if_needed(h);
|
---|
329 |
|
---|
330 | return removed;
|
---|
331 | }
|
---|
332 |
|
---|
333 | /** Removes an item already present in the table. The item must be in the table.*/
|
---|
334 | void hash_table_remove_item(hash_table_t *h, ht_link_t *item)
|
---|
335 | {
|
---|
336 | assert(item);
|
---|
337 | assert(h && h->bucket);
|
---|
338 | assert(link_in_use(&item->link));
|
---|
339 |
|
---|
340 | list_remove(&item->link);
|
---|
341 | --h->item_cnt;
|
---|
342 | h->op->remove_callback(item);
|
---|
343 | shrink_if_needed(h);
|
---|
344 | }
|
---|
345 |
|
---|
346 | /** Apply function to all items in hash table.
|
---|
347 | *
|
---|
348 | * @param h Hash table.
|
---|
349 | * @param f Function to be applied. Return false if no more items
|
---|
350 | * should be visited. The functor may only delete the supplied
|
---|
351 | * item. It must not delete the successor of the item passed
|
---|
352 | * in the first argument.
|
---|
353 | * @param arg Argument to be passed to the function.
|
---|
354 | */
|
---|
355 | void hash_table_apply(hash_table_t *h, bool (*f)(ht_link_t *, void *), void *arg)
|
---|
356 | {
|
---|
357 | assert(f);
|
---|
358 | assert(h && h->bucket);
|
---|
359 |
|
---|
360 | if (h->item_cnt == 0)
|
---|
361 | return;
|
---|
362 |
|
---|
363 | h->apply_ongoing = true;
|
---|
364 |
|
---|
365 | for (size_t idx = 0; idx < h->bucket_cnt; ++idx) {
|
---|
366 | list_foreach_safe(h->bucket[idx], cur, next) {
|
---|
367 | ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
|
---|
368 | /*
|
---|
369 | * The next pointer had already been saved. f() may safely
|
---|
370 | * delete cur (but not next!).
|
---|
371 | */
|
---|
372 | if (!f(cur_link, arg))
|
---|
373 | return;
|
---|
374 | }
|
---|
375 | }
|
---|
376 |
|
---|
377 | h->apply_ongoing = false;
|
---|
378 |
|
---|
379 | shrink_if_needed(h);
|
---|
380 | grow_if_needed(h);
|
---|
381 | }
|
---|
382 |
|
---|
383 | /** Rounds up size to the nearest suitable table size. */
|
---|
384 | static size_t round_up_size(size_t size)
|
---|
385 | {
|
---|
386 | size_t rounded_size = HT_MIN_BUCKETS;
|
---|
387 |
|
---|
388 | while (rounded_size < size) {
|
---|
389 | rounded_size = 2 * rounded_size + 1;
|
---|
390 | }
|
---|
391 |
|
---|
392 | return rounded_size;
|
---|
393 | }
|
---|
394 |
|
---|
395 | /** Allocates and initializes the desired number of buckets. True if successful.*/
|
---|
396 | static bool alloc_table(size_t bucket_cnt, list_t **pbuckets)
|
---|
397 | {
|
---|
398 | assert(pbuckets && HT_MIN_BUCKETS <= bucket_cnt);
|
---|
399 |
|
---|
400 | list_t *buckets = malloc(bucket_cnt * sizeof(list_t));
|
---|
401 | if (!buckets)
|
---|
402 | return false;
|
---|
403 |
|
---|
404 | for (size_t i = 0; i < bucket_cnt; i++)
|
---|
405 | list_initialize(&buckets[i]);
|
---|
406 |
|
---|
407 | *pbuckets = buckets;
|
---|
408 | return true;
|
---|
409 | }
|
---|
410 |
|
---|
411 |
|
---|
412 | /** Shrinks the table if the table is only sparely populated. */
|
---|
413 | static inline void shrink_if_needed(hash_table_t *h)
|
---|
414 | {
|
---|
415 | if (h->item_cnt <= h->full_item_cnt / 4 && HT_MIN_BUCKETS < h->bucket_cnt) {
|
---|
416 | /*
|
---|
417 | * Keep the bucket_cnt odd (possibly also prime).
|
---|
418 | * Shrink from 2n + 1 to n. Integer division discards the +1.
|
---|
419 | */
|
---|
420 | size_t new_bucket_cnt = h->bucket_cnt / 2;
|
---|
421 | resize(h, new_bucket_cnt);
|
---|
422 | }
|
---|
423 | }
|
---|
424 |
|
---|
425 | /** Grows the table if table load exceeds the maximum allowed. */
|
---|
426 | static inline void grow_if_needed(hash_table_t *h)
|
---|
427 | {
|
---|
428 | /* Grow the table if the average bucket load exceeds the maximum. */
|
---|
429 | if (h->full_item_cnt < h->item_cnt) {
|
---|
430 | /* Keep the bucket_cnt odd (possibly also prime). */
|
---|
431 | size_t new_bucket_cnt = 2 * h->bucket_cnt + 1;
|
---|
432 | resize(h, new_bucket_cnt);
|
---|
433 | }
|
---|
434 | }
|
---|
435 |
|
---|
436 | /** Allocates and rehashes items to a new table. Frees the old table. */
|
---|
437 | static void resize(hash_table_t *h, size_t new_bucket_cnt)
|
---|
438 | {
|
---|
439 | assert(h && h->bucket);
|
---|
440 | assert(HT_MIN_BUCKETS <= new_bucket_cnt);
|
---|
441 |
|
---|
442 | /* We are traversing the table and resizing would mess up the buckets. */
|
---|
443 | if (h->apply_ongoing)
|
---|
444 | return;
|
---|
445 |
|
---|
446 | list_t *new_buckets;
|
---|
447 |
|
---|
448 | /* Leave the table as is if we cannot resize. */
|
---|
449 | if (!alloc_table(new_bucket_cnt, &new_buckets))
|
---|
450 | return;
|
---|
451 |
|
---|
452 | if (0 < h->item_cnt) {
|
---|
453 | /* Rehash all the items to the new table. */
|
---|
454 | for (size_t old_idx = 0; old_idx < h->bucket_cnt; ++old_idx) {
|
---|
455 | list_foreach_safe(h->bucket[old_idx], cur, next) {
|
---|
456 | ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
|
---|
457 |
|
---|
458 | size_t new_idx = h->op->hash(cur_link) % new_bucket_cnt;
|
---|
459 | list_remove(cur);
|
---|
460 | list_append(cur, &new_buckets[new_idx]);
|
---|
461 | }
|
---|
462 | }
|
---|
463 | }
|
---|
464 |
|
---|
465 | free(h->bucket);
|
---|
466 | h->bucket = new_buckets;
|
---|
467 | h->bucket_cnt = new_bucket_cnt;
|
---|
468 | h->full_item_cnt = h->max_load * h->bucket_cnt;
|
---|
469 | }
|
---|
470 |
|
---|
471 |
|
---|
472 | /** @}
|
---|
473 | */
|
---|