source: mainline/uspace/lib/c/generic/adt/hash_table.c@ 514d561

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 514d561 was 30f1a25, checked in by Jakub Jermar <jakub@…>, 7 years ago

Make hash_table_find_next immune to livelocks

By giving hash_table_find_next the item returned from hash_table_find,
we provide it with a fixed reference with which the outer loop which
calls hash_table_find_next can terminate even if the respective bucket
contains more matching elements.

  • Property mode set to 100644
File size: 12.5 KB
RevLine 
[ee7736e]1/*
[739d00a]2 * Copyright (c) 2008 Jakub Jermar
[062d900]3 * Copyright (c) 2012 Adam Hraska
[1b20da0]4 *
[ee7736e]5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * - Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * - The name of the author may not be used to endorse or promote products
17 * derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
[fadd381]31/** @addtogroup libc
[b2951e2]32 * @{
33 */
34/** @file
35 */
36
[ee7736e]37/*
[062d900]38 * This is an implementation of a generic resizable chained hash table.
[1b20da0]39 *
40 * The table grows to 2*n+1 buckets each time, starting at n == 89,
[062d900]41 * per Thomas Wang's recommendation:
42 * http://www.concentric.net/~Ttwang/tech/hashsize.htm
[1b20da0]43 *
[062d900]44 * This policy produces prime table sizes for the first five resizes
[1b20da0]45 * and generally produces table sizes which are either prime or
[062d900]46 * have fairly large (prime/odd) divisors. Having a prime table size
47 * mitigates the use of suboptimal hash functions and distributes
48 * items over the whole table.
[ee7736e]49 */
50
[d9c8c81]51#include <adt/hash_table.h>
52#include <adt/list.h>
[ee7736e]53#include <assert.h>
[38d150e]54#include <stdlib.h>
[19f857a]55#include <str.h>
[ee7736e]56
[062d900]57/* Optimal initial bucket count. See comment above. */
58#define HT_MIN_BUCKETS 89
59/* The table is resized when the average load per bucket exceeds this number. */
60#define HT_MAX_LOAD 2
61
62
[97b199b1]63static size_t round_up_size(size_t);
64static bool alloc_table(size_t, list_t **);
65static void clear_items(hash_table_t *);
66static void resize(hash_table_t *, size_t);
67static void grow_if_needed(hash_table_t *);
68static void shrink_if_needed(hash_table_t *);
[062d900]69
70/* Dummy do nothing callback to invoke in place of remove_callback == NULL. */
71static void nop_remove_callback(ht_link_t *item)
72{
73 /* no-op */
74}
75
76
[ee7736e]77/** Create chained hash table.
78 *
[e1da7ec]79 * @param h Hash table structure. Will be initialized by this call.
[062d900]80 * @param init_size Initial desired number of hash table buckets. Pass zero
[1b20da0]81 * if you want the default initial size.
[220210c8]82 * @param max_load The table is resized when the average load per bucket
83 * exceeds this number. Pass zero if you want the default.
[062d900]84 * @param op Hash table operations structure. remove_callback()
85 * is optional and can be NULL if no action is to be taken
86 * upon removal. equal() is optional if and only if
87 * hash_table_insert_unique() will never be invoked.
[1b20da0]88 * All other operations are mandatory.
[e1da7ec]89 *
90 * @return True on success
91 *
[ee7736e]92 */
[062d900]93bool hash_table_create(hash_table_t *h, size_t init_size, size_t max_load,
94 hash_table_ops_t *op)
[ee7736e]95{
[4f34b6a]96 assert(h);
[062d900]97 assert(op && op->hash && op->key_hash && op->key_equal);
[a35b458]98
[062d900]99 /* Check for compulsory ops. */
100 if (!op || !op->hash || !op->key_hash || !op->key_equal)
[4f34b6a]101 return false;
[a35b458]102
[062d900]103 h->bucket_cnt = round_up_size(init_size);
[a35b458]104
[062d900]105 if (!alloc_table(h->bucket_cnt, &h->bucket))
106 return false;
[a35b458]107
[062d900]108 h->max_load = (max_load == 0) ? HT_MAX_LOAD : max_load;
109 h->item_cnt = 0;
[ee7736e]110 h->op = op;
[062d900]111 h->full_item_cnt = h->max_load * h->bucket_cnt;
112 h->apply_ongoing = false;
113
[97b199b1]114 if (h->op->remove_callback == NULL) {
[062d900]115 h->op->remove_callback = nop_remove_callback;
116 }
[a35b458]117
[4f34b6a]118 return true;
[ee7736e]119}
120
[062d900]121/** Destroy a hash table instance.
122 *
123 * @param h Hash table to be destroyed.
124 *
125 */
126void hash_table_destroy(hash_table_t *h)
127{
128 assert(h && h->bucket);
129 assert(!h->apply_ongoing);
[a35b458]130
[062d900]131 clear_items(h);
[a35b458]132
[062d900]133 free(h->bucket);
134
[205832b]135 h->bucket = NULL;
[062d900]136 h->bucket_cnt = 0;
137}
138
139/** Returns true if there are no items in the table. */
140bool hash_table_empty(hash_table_t *h)
141{
142 assert(h && h->bucket);
143 return h->item_cnt == 0;
144}
145
146/** Returns the number of items in the table. */
147size_t hash_table_size(hash_table_t *h)
148{
149 assert(h && h->bucket);
150 return h->item_cnt;
151}
152
[892022a1]153/** Remove all elements from the hash table
154 *
155 * @param h Hash table to be cleared
156 */
157void hash_table_clear(hash_table_t *h)
158{
[062d900]159 assert(h && h->bucket);
160 assert(!h->apply_ongoing);
[a35b458]161
[062d900]162 clear_items(h);
[a35b458]163
[062d900]164 /* Shrink the table to its minimum size if possible. */
165 if (HT_MIN_BUCKETS < h->bucket_cnt) {
166 resize(h, HT_MIN_BUCKETS);
167 }
168}
169
170/** Unlinks and removes all items but does not resize. */
171static void clear_items(hash_table_t *h)
172{
173 if (h->item_cnt == 0)
174 return;
[a35b458]175
[062d900]176 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) {
177 list_foreach_safe(h->bucket[idx], cur, next) {
178 assert(cur);
179 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
[a35b458]180
[892022a1]181 list_remove(cur);
[062d900]182 h->op->remove_callback(cur_link);
[892022a1]183 }
184 }
[a35b458]185
[062d900]186 h->item_cnt = 0;
[892022a1]187}
188
[062d900]189/** Insert item into a hash table.
[e1da7ec]190 *
[062d900]191 * @param h Hash table.
192 * @param item Item to be inserted into the hash table.
[739d00a]193 */
[062d900]194void hash_table_insert(hash_table_t *h, ht_link_t *item)
[739d00a]195{
[062d900]196 assert(item);
197 assert(h && h->bucket);
198 assert(!h->apply_ongoing);
[a35b458]199
[062d900]200 size_t idx = h->op->hash(item) % h->bucket_cnt;
[a35b458]201
[062d900]202 list_append(&item->link, &h->bucket[idx]);
203 ++h->item_cnt;
204 grow_if_needed(h);
[739d00a]205}
206
[062d900]207
208/** Insert item into a hash table if not already present.
[739d00a]209 *
[e1da7ec]210 * @param h Hash table.
211 * @param item Item to be inserted into the hash table.
[1b20da0]212 *
213 * @return False if such an item had already been inserted.
[062d900]214 * @return True if the inserted item was the only item with such a lookup key.
[ee7736e]215 */
[062d900]216bool hash_table_insert_unique(hash_table_t *h, ht_link_t *item)
[ee7736e]217{
[4f34b6a]218 assert(item);
[062d900]219 assert(h && h->bucket && h->bucket_cnt);
220 assert(h->op && h->op->hash && h->op->equal);
221 assert(!h->apply_ongoing);
[a35b458]222
[062d900]223 size_t idx = h->op->hash(item) % h->bucket_cnt;
[a35b458]224
[062d900]225 /* Check for duplicates. */
[feeac0d]226 list_foreach(h->bucket[idx], link, ht_link_t, cur_link) {
[1b20da0]227 /*
228 * We could filter out items using their hashes first, but
[062d900]229 * calling equal() might very well be just as fast.
230 */
231 if (h->op->equal(cur_link, item))
232 return false;
233 }
[a35b458]234
[062d900]235 list_append(&item->link, &h->bucket[idx]);
236 ++h->item_cnt;
237 grow_if_needed(h);
[a35b458]238
[062d900]239 return true;
[ee7736e]240}
241
242/** Search hash table for an item matching keys.
243 *
[e1da7ec]244 * @param h Hash table.
245 * @param key Array of all keys needed to compute hash index.
246 *
247 * @return Matching item on success, NULL if there is no such item.
[ee7736e]248 *
249 */
[062d900]250ht_link_t *hash_table_find(const hash_table_t *h, void *key)
[ee7736e]251{
[062d900]252 assert(h && h->bucket);
[a35b458]253
[062d900]254 size_t idx = h->op->key_hash(key) % h->bucket_cnt;
255
[feeac0d]256 list_foreach(h->bucket[idx], link, ht_link_t, cur_link) {
[1b20da0]257 /*
258 * Is this is the item we are looking for? We could have first
259 * checked if the hashes match but op->key_equal() may very well be
[062d900]260 * just as fast as op->hash().
261 */
262 if (h->op->key_equal(key, cur_link)) {
263 return cur_link;
[ee7736e]264 }
265 }
[a35b458]266
[ee7736e]267 return NULL;
268}
269
[062d900]270/** Find the next item equal to item. */
[30f1a25]271ht_link_t *
272hash_table_find_next(const hash_table_t *h, ht_link_t *first, ht_link_t *item)
[062d900]273{
274 assert(item);
275 assert(h && h->bucket);
276
[963037b0]277 size_t idx = h->op->hash(item) % h->bucket_cnt;
278
[062d900]279 /* Traverse the circular list until we reach the starting item again. */
[30f1a25]280 for (link_t *cur = item->link.next; cur != &first->link;
281 cur = cur->next) {
[062d900]282 assert(cur);
[963037b0]283
284 if (cur == &h->bucket[idx].head)
285 continue;
286
[062d900]287 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
[1b20da0]288 /*
289 * Is this is the item we are looking for? We could have first
290 * checked if the hashes match but op->equal() may very well be
[062d900]291 * just as fast as op->hash().
292 */
293 if (h->op->equal(cur_link, item)) {
294 return cur_link;
295 }
296 }
297
298 return NULL;
299}
300
[ee7736e]301/** Remove all matching items from hash table.
302 *
303 * For each removed item, h->remove_callback() is called.
304 *
[e1da7ec]305 * @param h Hash table.
306 * @param key Array of keys that will be compared against items of
307 * the hash table.
308 * @param keys Number of keys in the 'key' array.
[1b20da0]309 *
[062d900]310 * @return Returns the number of removed items.
[ee7736e]311 */
[062d900]312size_t hash_table_remove(hash_table_t *h, void *key)
[ee7736e]313{
[062d900]314 assert(h && h->bucket);
315 assert(!h->apply_ongoing);
[a35b458]316
[062d900]317 size_t idx = h->op->key_hash(key) % h->bucket_cnt;
318
319 size_t removed = 0;
[a35b458]320
[062d900]321 list_foreach_safe(h->bucket[idx], cur, next) {
322 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
[a35b458]323
[062d900]324 if (h->op->key_equal(key, cur_link)) {
325 ++removed;
[ee7736e]326 list_remove(cur);
[062d900]327 h->op->remove_callback(cur_link);
[ee7736e]328 }
329 }
[062d900]330
331 h->item_cnt -= removed;
332 shrink_if_needed(h);
[a35b458]333
[062d900]334 return removed;
335}
336
337/** Removes an item already present in the table. The item must be in the table.*/
338void hash_table_remove_item(hash_table_t *h, ht_link_t *item)
339{
340 assert(item);
341 assert(h && h->bucket);
342 assert(link_in_use(&item->link));
343
344 list_remove(&item->link);
345 --h->item_cnt;
346 h->op->remove_callback(item);
347 shrink_if_needed(h);
[ee7736e]348}
[b2951e2]349
[1ab4aca]350/** Apply function to all items in hash table.
[203a090]351 *
[e1da7ec]352 * @param h Hash table.
[1b20da0]353 * @param f Function to be applied. Return false if no more items
[062d900]354 * should be visited. The functor may only delete the supplied
[1b20da0]355 * item. It must not delete the successor of the item passed
[062d900]356 * in the first argument.
[e1da7ec]357 * @param arg Argument to be passed to the function.
[203a090]358 */
[062d900]359void hash_table_apply(hash_table_t *h, bool (*f)(ht_link_t *, void *), void *arg)
[1b20da0]360{
[062d900]361 assert(f);
362 assert(h && h->bucket);
[a35b458]363
[062d900]364 if (h->item_cnt == 0)
365 return;
[a35b458]366
[062d900]367 h->apply_ongoing = true;
[a35b458]368
[062d900]369 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) {
370 list_foreach_safe(h->bucket[idx], cur, next) {
371 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
[1b20da0]372 /*
373 * The next pointer had already been saved. f() may safely
[062d900]374 * delete cur (but not next!).
[892022a1]375 */
[062d900]376 if (!f(cur_link, arg))
[251d4dd]377 goto out;
[203a090]378 }
379 }
[251d4dd]380out:
[062d900]381 h->apply_ongoing = false;
[a35b458]382
[062d900]383 shrink_if_needed(h);
384 grow_if_needed(h);
385}
386
387/** Rounds up size to the nearest suitable table size. */
388static size_t round_up_size(size_t size)
389{
390 size_t rounded_size = HT_MIN_BUCKETS;
[a35b458]391
[062d900]392 while (rounded_size < size) {
393 rounded_size = 2 * rounded_size + 1;
394 }
[a35b458]395
[062d900]396 return rounded_size;
[203a090]397}
398
[062d900]399/** Allocates and initializes the desired number of buckets. True if successful.*/
400static bool alloc_table(size_t bucket_cnt, list_t **pbuckets)
401{
402 assert(pbuckets && HT_MIN_BUCKETS <= bucket_cnt);
[a35b458]403
[062d900]404 list_t *buckets = malloc(bucket_cnt * sizeof(list_t));
405 if (!buckets)
406 return false;
[a35b458]407
[062d900]408 for (size_t i = 0; i < bucket_cnt; i++)
409 list_initialize(&buckets[i]);
410
411 *pbuckets = buckets;
412 return true;
413}
414
415
416/** Shrinks the table if the table is only sparely populated. */
417static inline void shrink_if_needed(hash_table_t *h)
418{
419 if (h->item_cnt <= h->full_item_cnt / 4 && HT_MIN_BUCKETS < h->bucket_cnt) {
[1b20da0]420 /*
421 * Keep the bucket_cnt odd (possibly also prime).
[062d900]422 * Shrink from 2n + 1 to n. Integer division discards the +1.
423 */
424 size_t new_bucket_cnt = h->bucket_cnt / 2;
425 resize(h, new_bucket_cnt);
426 }
427}
428
429/** Grows the table if table load exceeds the maximum allowed. */
430static inline void grow_if_needed(hash_table_t *h)
431{
432 /* Grow the table if the average bucket load exceeds the maximum. */
433 if (h->full_item_cnt < h->item_cnt) {
434 /* Keep the bucket_cnt odd (possibly also prime). */
435 size_t new_bucket_cnt = 2 * h->bucket_cnt + 1;
436 resize(h, new_bucket_cnt);
437 }
438}
439
440/** Allocates and rehashes items to a new table. Frees the old table. */
[1b20da0]441static void resize(hash_table_t *h, size_t new_bucket_cnt)
[062d900]442{
443 assert(h && h->bucket);
444 assert(HT_MIN_BUCKETS <= new_bucket_cnt);
[a35b458]445
[062d900]446 /* We are traversing the table and resizing would mess up the buckets. */
447 if (h->apply_ongoing)
448 return;
[a35b458]449
[062d900]450 list_t *new_buckets;
451
452 /* Leave the table as is if we cannot resize. */
453 if (!alloc_table(new_bucket_cnt, &new_buckets))
454 return;
[a35b458]455
[062d900]456 if (0 < h->item_cnt) {
457 /* Rehash all the items to the new table. */
458 for (size_t old_idx = 0; old_idx < h->bucket_cnt; ++old_idx) {
459 list_foreach_safe(h->bucket[old_idx], cur, next) {
460 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
461
462 size_t new_idx = h->op->hash(cur_link) % new_bucket_cnt;
463 list_remove(cur);
464 list_append(cur, &new_buckets[new_idx]);
465 }
466 }
467 }
[a35b458]468
[062d900]469 free(h->bucket);
470 h->bucket = new_buckets;
471 h->bucket_cnt = new_bucket_cnt;
472 h->full_item_cnt = h->max_load * h->bucket_cnt;
473}
474
475
[fadd381]476/** @}
[b2951e2]477 */
Note: See TracBrowser for help on using the repository browser.