source: mainline/uspace/lib/c/generic/adt/hash_table.c@ 61eb2ce2

ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 61eb2ce2 was 61eb2ce2, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 2 years ago

Make hash table operations immutable, because global mutable state is evil

  • Property mode set to 100644
File size: 12.5 KB
RevLine 
[ee7736e]1/*
[739d00a]2 * Copyright (c) 2008 Jakub Jermar
[062d900]3 * Copyright (c) 2012 Adam Hraska
[1b20da0]4 *
[ee7736e]5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * - Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * - The name of the author may not be used to endorse or promote products
17 * derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
[fadd381]31/** @addtogroup libc
[b2951e2]32 * @{
33 */
34/** @file
35 */
36
[ee7736e]37/*
[062d900]38 * This is an implementation of a generic resizable chained hash table.
[1b20da0]39 *
40 * The table grows to 2*n+1 buckets each time, starting at n == 89,
[062d900]41 * per Thomas Wang's recommendation:
42 * http://www.concentric.net/~Ttwang/tech/hashsize.htm
[1b20da0]43 *
[062d900]44 * This policy produces prime table sizes for the first five resizes
[1b20da0]45 * and generally produces table sizes which are either prime or
[062d900]46 * have fairly large (prime/odd) divisors. Having a prime table size
47 * mitigates the use of suboptimal hash functions and distributes
48 * items over the whole table.
[ee7736e]49 */
50
[d9c8c81]51#include <adt/hash_table.h>
52#include <adt/list.h>
[ee7736e]53#include <assert.h>
[36795edf]54#include <member.h>
[38d150e]55#include <stdlib.h>
[19f857a]56#include <str.h>
[ee7736e]57
[062d900]58/* Optimal initial bucket count. See comment above. */
59#define HT_MIN_BUCKETS 89
60/* The table is resized when the average load per bucket exceeds this number. */
61#define HT_MAX_LOAD 2
62
[97b199b1]63static size_t round_up_size(size_t);
64static bool alloc_table(size_t, list_t **);
65static void clear_items(hash_table_t *);
66static void resize(hash_table_t *, size_t);
67static void grow_if_needed(hash_table_t *);
68static void shrink_if_needed(hash_table_t *);
[062d900]69
70/* Dummy do nothing callback to invoke in place of remove_callback == NULL. */
71static void nop_remove_callback(ht_link_t *item)
72{
73 /* no-op */
74}
75
[ee7736e]76/** Create chained hash table.
77 *
[e1da7ec]78 * @param h Hash table structure. Will be initialized by this call.
[062d900]79 * @param init_size Initial desired number of hash table buckets. Pass zero
[1b20da0]80 * if you want the default initial size.
[220210c8]81 * @param max_load The table is resized when the average load per bucket
82 * exceeds this number. Pass zero if you want the default.
[062d900]83 * @param op Hash table operations structure. remove_callback()
84 * is optional and can be NULL if no action is to be taken
85 * upon removal. equal() is optional if and only if
86 * hash_table_insert_unique() will never be invoked.
[1b20da0]87 * All other operations are mandatory.
[e1da7ec]88 *
89 * @return True on success
90 *
[ee7736e]91 */
[062d900]92bool hash_table_create(hash_table_t *h, size_t init_size, size_t max_load,
[61eb2ce2]93 const hash_table_ops_t *op)
[ee7736e]94{
[4f34b6a]95 assert(h);
[062d900]96 assert(op && op->hash && op->key_hash && op->key_equal);
[a35b458]97
[062d900]98 /* Check for compulsory ops. */
99 if (!op || !op->hash || !op->key_hash || !op->key_equal)
[4f34b6a]100 return false;
[a35b458]101
[062d900]102 h->bucket_cnt = round_up_size(init_size);
[a35b458]103
[062d900]104 if (!alloc_table(h->bucket_cnt, &h->bucket))
105 return false;
[a35b458]106
[062d900]107 h->max_load = (max_load == 0) ? HT_MAX_LOAD : max_load;
108 h->item_cnt = 0;
[ee7736e]109 h->op = op;
[062d900]110 h->full_item_cnt = h->max_load * h->bucket_cnt;
111 h->apply_ongoing = false;
112
[4f34b6a]113 return true;
[ee7736e]114}
115
[062d900]116/** Destroy a hash table instance.
117 *
118 * @param h Hash table to be destroyed.
119 *
120 */
121void hash_table_destroy(hash_table_t *h)
122{
123 assert(h && h->bucket);
124 assert(!h->apply_ongoing);
[a35b458]125
[062d900]126 clear_items(h);
[a35b458]127
[062d900]128 free(h->bucket);
129
[205832b]130 h->bucket = NULL;
[062d900]131 h->bucket_cnt = 0;
132}
133
134/** Returns true if there are no items in the table. */
135bool hash_table_empty(hash_table_t *h)
136{
137 assert(h && h->bucket);
138 return h->item_cnt == 0;
139}
140
141/** Returns the number of items in the table. */
142size_t hash_table_size(hash_table_t *h)
143{
144 assert(h && h->bucket);
145 return h->item_cnt;
146}
147
[892022a1]148/** Remove all elements from the hash table
149 *
150 * @param h Hash table to be cleared
151 */
152void hash_table_clear(hash_table_t *h)
153{
[062d900]154 assert(h && h->bucket);
155 assert(!h->apply_ongoing);
[a35b458]156
[062d900]157 clear_items(h);
[a35b458]158
[062d900]159 /* Shrink the table to its minimum size if possible. */
160 if (HT_MIN_BUCKETS < h->bucket_cnt) {
161 resize(h, HT_MIN_BUCKETS);
162 }
163}
164
165/** Unlinks and removes all items but does not resize. */
166static void clear_items(hash_table_t *h)
167{
168 if (h->item_cnt == 0)
169 return;
[a35b458]170
[61eb2ce2]171 void (*remove_cb)(ht_link_t *) = h->op->remove_callback ? h->op->remove_callback : nop_remove_callback;
172
[062d900]173 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) {
174 list_foreach_safe(h->bucket[idx], cur, next) {
175 assert(cur);
176 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
[a35b458]177
[892022a1]178 list_remove(cur);
[61eb2ce2]179 remove_cb(cur_link);
[892022a1]180 }
181 }
[a35b458]182
[062d900]183 h->item_cnt = 0;
[892022a1]184}
185
[062d900]186/** Insert item into a hash table.
[e1da7ec]187 *
[062d900]188 * @param h Hash table.
189 * @param item Item to be inserted into the hash table.
[739d00a]190 */
[062d900]191void hash_table_insert(hash_table_t *h, ht_link_t *item)
[739d00a]192{
[062d900]193 assert(item);
194 assert(h && h->bucket);
195 assert(!h->apply_ongoing);
[a35b458]196
[062d900]197 size_t idx = h->op->hash(item) % h->bucket_cnt;
[a35b458]198
[062d900]199 list_append(&item->link, &h->bucket[idx]);
200 ++h->item_cnt;
201 grow_if_needed(h);
[739d00a]202}
203
[062d900]204/** Insert item into a hash table if not already present.
[739d00a]205 *
[e1da7ec]206 * @param h Hash table.
207 * @param item Item to be inserted into the hash table.
[1b20da0]208 *
209 * @return False if such an item had already been inserted.
[062d900]210 * @return True if the inserted item was the only item with such a lookup key.
[ee7736e]211 */
[062d900]212bool hash_table_insert_unique(hash_table_t *h, ht_link_t *item)
[ee7736e]213{
[4f34b6a]214 assert(item);
[062d900]215 assert(h && h->bucket && h->bucket_cnt);
216 assert(h->op && h->op->hash && h->op->equal);
217 assert(!h->apply_ongoing);
[a35b458]218
[062d900]219 size_t idx = h->op->hash(item) % h->bucket_cnt;
[a35b458]220
[062d900]221 /* Check for duplicates. */
[feeac0d]222 list_foreach(h->bucket[idx], link, ht_link_t, cur_link) {
[1b20da0]223 /*
224 * We could filter out items using their hashes first, but
[062d900]225 * calling equal() might very well be just as fast.
226 */
227 if (h->op->equal(cur_link, item))
228 return false;
229 }
[a35b458]230
[062d900]231 list_append(&item->link, &h->bucket[idx]);
232 ++h->item_cnt;
233 grow_if_needed(h);
[a35b458]234
[062d900]235 return true;
[ee7736e]236}
237
238/** Search hash table for an item matching keys.
239 *
[e1da7ec]240 * @param h Hash table.
241 * @param key Array of all keys needed to compute hash index.
242 *
243 * @return Matching item on success, NULL if there is no such item.
[ee7736e]244 *
245 */
[5e801dc]246ht_link_t *hash_table_find(const hash_table_t *h, const void *key)
[ee7736e]247{
[062d900]248 assert(h && h->bucket);
[a35b458]249
[062d900]250 size_t idx = h->op->key_hash(key) % h->bucket_cnt;
251
[feeac0d]252 list_foreach(h->bucket[idx], link, ht_link_t, cur_link) {
[1b20da0]253 /*
254 * Is this is the item we are looking for? We could have first
255 * checked if the hashes match but op->key_equal() may very well be
[062d900]256 * just as fast as op->hash().
257 */
258 if (h->op->key_equal(key, cur_link)) {
259 return cur_link;
[ee7736e]260 }
261 }
[a35b458]262
[ee7736e]263 return NULL;
264}
265
[062d900]266/** Find the next item equal to item. */
[30f1a25]267ht_link_t *
268hash_table_find_next(const hash_table_t *h, ht_link_t *first, ht_link_t *item)
[062d900]269{
270 assert(item);
271 assert(h && h->bucket);
272
[963037b0]273 size_t idx = h->op->hash(item) % h->bucket_cnt;
274
[062d900]275 /* Traverse the circular list until we reach the starting item again. */
[30f1a25]276 for (link_t *cur = item->link.next; cur != &first->link;
277 cur = cur->next) {
[062d900]278 assert(cur);
[963037b0]279
280 if (cur == &h->bucket[idx].head)
281 continue;
282
[062d900]283 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
[1b20da0]284 /*
285 * Is this is the item we are looking for? We could have first
286 * checked if the hashes match but op->equal() may very well be
[062d900]287 * just as fast as op->hash().
288 */
289 if (h->op->equal(cur_link, item)) {
290 return cur_link;
291 }
292 }
293
294 return NULL;
295}
296
[ee7736e]297/** Remove all matching items from hash table.
298 *
299 * For each removed item, h->remove_callback() is called.
300 *
[e1da7ec]301 * @param h Hash table.
302 * @param key Array of keys that will be compared against items of
303 * the hash table.
[1b20da0]304 *
[062d900]305 * @return Returns the number of removed items.
[ee7736e]306 */
[5e801dc]307size_t hash_table_remove(hash_table_t *h, const void *key)
[ee7736e]308{
[062d900]309 assert(h && h->bucket);
310 assert(!h->apply_ongoing);
[a35b458]311
[062d900]312 size_t idx = h->op->key_hash(key) % h->bucket_cnt;
313
314 size_t removed = 0;
[a35b458]315
[062d900]316 list_foreach_safe(h->bucket[idx], cur, next) {
317 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
[a35b458]318
[062d900]319 if (h->op->key_equal(key, cur_link)) {
320 ++removed;
[ee7736e]321 list_remove(cur);
[61eb2ce2]322
323 if (h->op->remove_callback)
324 h->op->remove_callback(cur_link);
[ee7736e]325 }
326 }
[062d900]327
328 h->item_cnt -= removed;
329 shrink_if_needed(h);
[a35b458]330
[062d900]331 return removed;
332}
333
[d1582b50]334/** Removes an item already present in the table. The item must be in the table. */
[062d900]335void hash_table_remove_item(hash_table_t *h, ht_link_t *item)
336{
337 assert(item);
338 assert(h && h->bucket);
339 assert(link_in_use(&item->link));
340
341 list_remove(&item->link);
342 --h->item_cnt;
[61eb2ce2]343
344 if (h->op->remove_callback)
345 h->op->remove_callback(item);
[062d900]346 shrink_if_needed(h);
[ee7736e]347}
[b2951e2]348
[1ab4aca]349/** Apply function to all items in hash table.
[203a090]350 *
[e1da7ec]351 * @param h Hash table.
[1b20da0]352 * @param f Function to be applied. Return false if no more items
[062d900]353 * should be visited. The functor may only delete the supplied
[1b20da0]354 * item. It must not delete the successor of the item passed
[062d900]355 * in the first argument.
[e1da7ec]356 * @param arg Argument to be passed to the function.
[203a090]357 */
[062d900]358void hash_table_apply(hash_table_t *h, bool (*f)(ht_link_t *, void *), void *arg)
[1b20da0]359{
[062d900]360 assert(f);
361 assert(h && h->bucket);
[a35b458]362
[062d900]363 if (h->item_cnt == 0)
364 return;
[a35b458]365
[062d900]366 h->apply_ongoing = true;
[a35b458]367
[062d900]368 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) {
369 list_foreach_safe(h->bucket[idx], cur, next) {
370 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
[1b20da0]371 /*
372 * The next pointer had already been saved. f() may safely
[062d900]373 * delete cur (but not next!).
[892022a1]374 */
[062d900]375 if (!f(cur_link, arg))
[251d4dd]376 goto out;
[203a090]377 }
378 }
[251d4dd]379out:
[062d900]380 h->apply_ongoing = false;
[a35b458]381
[062d900]382 shrink_if_needed(h);
383 grow_if_needed(h);
384}
385
386/** Rounds up size to the nearest suitable table size. */
387static size_t round_up_size(size_t size)
388{
389 size_t rounded_size = HT_MIN_BUCKETS;
[a35b458]390
[062d900]391 while (rounded_size < size) {
392 rounded_size = 2 * rounded_size + 1;
393 }
[a35b458]394
[062d900]395 return rounded_size;
[203a090]396}
397
[d1582b50]398/** Allocates and initializes the desired number of buckets. True if successful. */
[062d900]399static bool alloc_table(size_t bucket_cnt, list_t **pbuckets)
400{
401 assert(pbuckets && HT_MIN_BUCKETS <= bucket_cnt);
[a35b458]402
[062d900]403 list_t *buckets = malloc(bucket_cnt * sizeof(list_t));
404 if (!buckets)
405 return false;
[a35b458]406
[062d900]407 for (size_t i = 0; i < bucket_cnt; i++)
408 list_initialize(&buckets[i]);
409
410 *pbuckets = buckets;
411 return true;
412}
413
414/** Shrinks the table if the table is only sparely populated. */
415static inline void shrink_if_needed(hash_table_t *h)
416{
417 if (h->item_cnt <= h->full_item_cnt / 4 && HT_MIN_BUCKETS < h->bucket_cnt) {
[1b20da0]418 /*
419 * Keep the bucket_cnt odd (possibly also prime).
[062d900]420 * Shrink from 2n + 1 to n. Integer division discards the +1.
421 */
422 size_t new_bucket_cnt = h->bucket_cnt / 2;
423 resize(h, new_bucket_cnt);
424 }
425}
426
427/** Grows the table if table load exceeds the maximum allowed. */
428static inline void grow_if_needed(hash_table_t *h)
429{
430 /* Grow the table if the average bucket load exceeds the maximum. */
431 if (h->full_item_cnt < h->item_cnt) {
432 /* Keep the bucket_cnt odd (possibly also prime). */
433 size_t new_bucket_cnt = 2 * h->bucket_cnt + 1;
434 resize(h, new_bucket_cnt);
435 }
436}
437
438/** Allocates and rehashes items to a new table. Frees the old table. */
[1b20da0]439static void resize(hash_table_t *h, size_t new_bucket_cnt)
[062d900]440{
441 assert(h && h->bucket);
442 assert(HT_MIN_BUCKETS <= new_bucket_cnt);
[a35b458]443
[062d900]444 /* We are traversing the table and resizing would mess up the buckets. */
445 if (h->apply_ongoing)
446 return;
[a35b458]447
[062d900]448 list_t *new_buckets;
449
450 /* Leave the table as is if we cannot resize. */
451 if (!alloc_table(new_bucket_cnt, &new_buckets))
452 return;
[a35b458]453
[062d900]454 if (0 < h->item_cnt) {
455 /* Rehash all the items to the new table. */
456 for (size_t old_idx = 0; old_idx < h->bucket_cnt; ++old_idx) {
457 list_foreach_safe(h->bucket[old_idx], cur, next) {
458 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
459
460 size_t new_idx = h->op->hash(cur_link) % new_bucket_cnt;
461 list_remove(cur);
462 list_append(cur, &new_buckets[new_idx]);
463 }
464 }
465 }
[a35b458]466
[062d900]467 free(h->bucket);
468 h->bucket = new_buckets;
469 h->bucket_cnt = new_bucket_cnt;
470 h->full_item_cnt = h->max_load * h->bucket_cnt;
471}
472
[fadd381]473/** @}
[b2951e2]474 */
Note: See TracBrowser for help on using the repository browser.