source: mainline/uspace/lib/c/generic/adt/hash_table.c@ 0ca7286

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 0ca7286 was 0ca7286, checked in by Adam Hraska <adam.hraska+hos@…>, 13 years ago

Added resizing to user space (single-threaded) hash_table. Resizes in a way to mitigate effects of bad hash functions. Change of interface affected many files.

  • Property mode set to 100644
File size: 11.9 KB
RevLine 
[ee7736e]1/*
[739d00a]2 * Copyright (c) 2008 Jakub Jermar
[ee7736e]3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
[fadd381]29/** @addtogroup libc
[b2951e2]30 * @{
31 */
32/** @file
33 */
34
[ee7736e]35/*
[0ca7286]36 * This is an implementation of a generic resizable chained hash table.
37 *
38 * The table grows to 2*n+1 buckets each time, starting at n == 89,
39 * per Thomas Wang's recommendation:
40 * http://www.concentric.net/~Ttwang/tech/hashsize.htm
41 *
42 * This policy produces prime table sizes for the first five resizes
43 * and generally produces table sizes which are either prime or
44 * have fairly large (prime/odd) divisors. Having a prime table size
45 * mitigates the use of suboptimal hash functions and distributes
46 * items over the whole table.
[ee7736e]47 */
48
[d9c8c81]49#include <adt/hash_table.h>
50#include <adt/list.h>
[ee7736e]51#include <unistd.h>
52#include <malloc.h>
53#include <assert.h>
[19f857a]54#include <str.h>
[ee7736e]55
[0ca7286]56/* Optimal initial bucket count. See comment above. */
57#define HT_MIN_BUCKETS 89
58/* The table is resized when the average load per bucket exceeds this number. */
59#define HT_MAX_LOAD 2
60
61
62static size_t round_up_size(size_t size);
63static bool alloc_table(size_t bucket_cnt, list_t **pbuckets);
64static void item_inserted(hash_table_t *h);
65static void item_removed(hash_table_t *h);
66static inline void remove_item(hash_table_t *h, link_t *item);
67static size_t remove_duplicates(hash_table_t *h, unsigned long key[]);
68static size_t remove_matching(hash_table_t *h, unsigned long key[], size_t key_cnt);
69
70/* Dummy do nothing callback to invoke in place of remove_callback == NULL. */
71static void nop_remove_callback(link_t *item)
72{
73 /* no-op */
74}
75
76
[ee7736e]77/** Create chained hash table.
78 *
[e1da7ec]79 * @param h Hash table structure. Will be initialized by this call.
[0ca7286]80 * @param init_size Initial desired number of hash table buckets. Pass zero
81 * if you want the default initial size.
[e1da7ec]82 * @param max_keys Maximal number of keys needed to identify an item.
[0ca7286]83 * @param op Hash table operations structure. remove_callback()
84 * is optional and can be NULL if no action is to be taken
85 * upon removal. equal() is optional if and only if
86 * hash_table_insert_unique() will never be invoked.
87 * All other operations are mandatory.
[e1da7ec]88 *
89 * @return True on success
90 *
[ee7736e]91 */
[0ca7286]92bool hash_table_create(hash_table_t *h, size_t init_size, size_t max_keys,
93 hash_table_ops_t *op)
[ee7736e]94{
[4f34b6a]95 assert(h);
[0ca7286]96 assert(op && op->hash && op->key_hash && op->match);
[4f34b6a]97 assert(max_keys > 0);
[ee7736e]98
[0ca7286]99 h->bucket_cnt = round_up_size(init_size);
[ee7736e]100
[0ca7286]101 if (!alloc_table(h->bucket_cnt, &h->bucket))
102 return false;
[ee7736e]103
104 h->max_keys = max_keys;
[0ca7286]105 h->items = 0;
[ee7736e]106 h->op = op;
[e1da7ec]107
[0ca7286]108 if (h->op->remove_callback == 0)
109 h->op->remove_callback = nop_remove_callback;
110
[4f34b6a]111 return true;
[ee7736e]112}
113
[892022a1]114/** Remove all elements from the hash table
115 *
116 * @param h Hash table to be cleared
117 */
118void hash_table_clear(hash_table_t *h)
119{
[0ca7286]120 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) {
121 list_foreach_safe(h->bucket[idx], cur, next) {
[892022a1]122 list_remove(cur);
123 h->op->remove_callback(cur);
124 }
125 }
[0ca7286]126
127 h->items = 0;
128
129 /* Shrink the table to its minimum size if possible. */
130 if (HT_MIN_BUCKETS < h->bucket_cnt) {
131 list_t *new_buckets;
132 if (alloc_table(HT_MIN_BUCKETS, &new_buckets)) {
133 free(h->bucket);
134 h->bucket = new_buckets;
135 h->bucket_cnt = HT_MIN_BUCKETS;
136 }
137 }
[892022a1]138}
139
[739d00a]140/** Destroy a hash table instance.
[ee7736e]141 *
[e1da7ec]142 * @param h Hash table to be destroyed.
143 *
[739d00a]144 */
145void hash_table_destroy(hash_table_t *h)
146{
147 assert(h);
[0ca7286]148 assert(h->bucket);
[e1da7ec]149
[0ca7286]150 free(h->bucket);
151
152 h->bucket = 0;
153 h->bucket_cnt = 0;
[739d00a]154}
155
156/** Insert item into a hash table.
157 *
[e1da7ec]158 * @param h Hash table.
159 * @param key Array of all keys necessary to compute hash index.
160 * @param item Item to be inserted into the hash table.
[ee7736e]161 */
[0ca7286]162void hash_table_insert(hash_table_t *h, link_t *item)
[ee7736e]163{
[4f34b6a]164 assert(item);
[0ca7286]165 assert(h && h->bucket);
166 assert(h->op && h->op->hash);
167
168 size_t idx = h->op->hash(item) % h->bucket_cnt;
169
170 assert(idx < h->bucket_cnt);
171
172 list_append(item, &h->bucket[idx]);
173 item_inserted(h);
174}
175
176
177/** Insert item into a hash table if not already present.
178 *
179 * @param h Hash table.
180 * @param key Array of all keys necessary to compute hash index.
181 * @param item Item to be inserted into the hash table.
182 *
183 * @return False if such an item had already been inserted.
184 * @return True if the inserted item was the only item with such a lookup key.
185 */
186bool hash_table_insert_unique(hash_table_t *h, link_t *item)
187{
188 assert(item);
189 assert(h && h->bucket && h->bucket_cnt);
190 assert(h->op && h->op->hash && h->op->equal);
191
192 size_t item_hash = h->op->hash(item);
193 size_t idx = item_hash % h->bucket_cnt;
[e1da7ec]194
[0ca7286]195 assert(idx < h->bucket_cnt);
[ee7736e]196
[0ca7286]197 /* Check for duplicates. */
198 list_foreach(h->bucket[idx], cur) {
199 /*
200 * We could filter out items using their hashes first, but
201 * calling equal() might very well be just as fast.
202 */
203 if (h->op->equal(cur, item))
204 return false;
205 }
206
207 list_append(item, &h->bucket[idx]);
208 item_inserted(h);
209
210 return true;
[ee7736e]211}
212
213/** Search hash table for an item matching keys.
214 *
[e1da7ec]215 * @param h Hash table.
216 * @param key Array of all keys needed to compute hash index.
217 *
218 * @return Matching item on success, NULL if there is no such item.
[ee7736e]219 *
220 */
[4f34b6a]221link_t *hash_table_find(hash_table_t *h, unsigned long key[])
[ee7736e]222{
[0ca7286]223 assert(h && h->bucket);
224 assert(h->op && h->op->key_hash && h->op->match);
[e1da7ec]225
[0ca7286]226 size_t key_hash = h->op->key_hash(key);
227 size_t idx = key_hash % h->bucket_cnt;
228
229 assert(idx < h->bucket_cnt);
[ee7736e]230
[0ca7286]231 list_foreach(h->bucket[idx], cur) {
232 /*
233 * Is this is the item we are looking for? We could have first
234 * checked if the hashes match but op->match() may very well be
235 * just as fast as op->hash().
236 */
237 if (h->op->match(key, h->max_keys, cur)) {
[ee7736e]238 return cur;
239 }
240 }
241
242 return NULL;
243}
244
[0ca7286]245
246/** Apply function to all items in hash table.
247 *
248 * @param h Hash table.
249 * @param f Function to be applied. Return false if no more items
250 * should be visited. The functor must not delete the successor
251 * of the item passed in the first argument.
252 * @param arg Argument to be passed to the function.
253 *
254 */
255void hash_table_apply(hash_table_t *h, bool (*f)(link_t *, void *), void *arg)
256{
257 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) {
258 list_foreach_safe(h->bucket[idx], cur, next) {
259 /*
260 * The next pointer had already been saved. f() may safely
261 * delete cur (but not next!).
262 */
263 if (!f(cur, arg))
264 return;
265 }
266 }
267}
268
[ee7736e]269/** Remove all matching items from hash table.
270 *
271 * For each removed item, h->remove_callback() is called.
272 *
[e1da7ec]273 * @param h Hash table.
274 * @param key Array of keys that will be compared against items of
275 * the hash table.
276 * @param keys Number of keys in the 'key' array.
[0ca7286]277 *
278 * @return Returns the number of removed items.
[ee7736e]279 */
[0ca7286]280size_t hash_table_remove(hash_table_t *h, unsigned long key[], size_t key_cnt)
[ee7736e]281{
[0ca7286]282 assert(h && h->bucket);
283 assert(h && h->op && h->op->hash &&
[739d00a]284 h->op->remove_callback);
[0ca7286]285 assert(key_cnt <= h->max_keys);
[ee7736e]286
[0ca7286]287 /* All keys are known, remove from a specific bucket. */
288 if (key_cnt == h->max_keys) {
289 return remove_duplicates(h, key);
290 } else {
[ee7736e]291 /*
[0ca7286]292 * Fewer keys were passed.
293 * Any partially matching entries are to be removed.
294 */
295 return remove_matching(h, key, key_cnt);
296 }
297}
298
299/** Removes an item already present in the table. The item must be in the table.*/
300void hash_table_remove_item(hash_table_t *h, link_t *item)
301{
302 assert(item);
303 assert(h && h->bucket);
304
305 remove_item(h, item);
306}
307
308/** Unlink the item from a bucket, update statistics and resize if needed. */
309static inline void remove_item(hash_table_t *h, link_t *item)
310{
311 assert(item);
312
313 list_remove(item);
314 item_removed(h);
315 h->op->remove_callback(item);
316}
317
318/** Removes all items matching key in the bucket key hashes to. */
319static size_t remove_duplicates(hash_table_t *h, unsigned long key[])
320{
321 assert(h && h->bucket);
322 assert(h->op && h->op->key_hash && h->op->match);
323
324 size_t key_hash = h->op->key_hash(key);
325 size_t idx = key_hash % h->bucket_cnt;
326
327 assert(idx < h->bucket_cnt);
328
329 size_t removed = 0;
330
331 list_foreach_safe(h->bucket[idx], cur, next) {
332 if (h->op->match(key, h->max_keys, cur)) {
333 ++removed;
334 remove_item(h, cur);
[ee7736e]335 }
336 }
337
[0ca7286]338 return removed;
339}
340
341/** Removes all items in any bucket in the table that match the partial key. */
342static size_t remove_matching(hash_table_t *h, unsigned long key[],
343 size_t key_cnt)
344{
345 assert(h && h->bucket);
346 assert(key_cnt < h->max_keys);
347
348 size_t removed = 0;
[ee7736e]349 /*
350 * Fewer keys were passed.
351 * Any partially matching entries are to be removed.
352 */
[0ca7286]353 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) {
354 list_foreach_safe(h->bucket[idx], cur, next) {
355 if (h->op->match(key, key_cnt, cur)) {
356 ++removed;
357 remove_item(h, cur);
[ee7736e]358 }
359 }
360 }
[0ca7286]361
362 return removed;
363
[ee7736e]364}
[b2951e2]365
[0ca7286]366/** Rounds up size to the nearest suitable table size. */
367static size_t round_up_size(size_t size)
368{
369 size_t rounded_size = HT_MIN_BUCKETS;
370
371 while (rounded_size < size) {
372 rounded_size = 2 * rounded_size + 1;
373 }
374
375 return rounded_size;
376}
377
378/** Allocates and initializes the desired number of buckets. True if successful.*/
379static bool alloc_table(size_t bucket_cnt, list_t **pbuckets)
380{
381 assert(pbuckets && HT_MIN_BUCKETS <= bucket_cnt);
382
383 list_t *buckets = malloc(bucket_cnt * sizeof(list_t));
384 if (!buckets)
385 return false;
386
387 for (size_t i = 0; i < bucket_cnt; i++)
388 list_initialize(&buckets[i]);
389
390 *pbuckets = buckets;
391 return true;
392}
393
394/** Allocates and rehashes items to a new table. Frees the old table. */
395static void resize(hash_table_t *h, size_t new_bucket_cnt)
396{
397 assert(h && h->bucket);
398
399 list_t *new_buckets;
400
401 /* Leave the table as is if we cannot resize. */
402 if (!alloc_table(new_bucket_cnt, &new_buckets))
403 return;
404
405 /* Rehash all the items to the new table. */
406 for (size_t old_idx = 0; old_idx < h->bucket_cnt; ++old_idx) {
407 list_foreach_safe(h->bucket[old_idx], cur, next) {
408 size_t new_idx = h->op->hash(cur) % new_bucket_cnt;
409 list_remove(cur);
410 list_append(cur, &new_buckets[new_idx]);
[203a090]411 }
412 }
[0ca7286]413
414 free(h->bucket);
415 h->bucket = new_buckets;
416 h->bucket_cnt = new_bucket_cnt;
[203a090]417}
418
[0ca7286]419/** Shrinks the table if needed. */
420static void item_removed(hash_table_t *h)
421{
422 --h->items;
423
424 if (HT_MIN_BUCKETS < h->items && h->items <= HT_MAX_LOAD * h->bucket_cnt / 4) {
425 /*
426 * Keep the bucket_cnt odd (possibly also prime).
427 * Shrink from 2n + 1 to n. Integer division discards the +1.
428 */
429 size_t new_bucket_cnt = h->bucket_cnt / 2;
430 resize(h, new_bucket_cnt);
431 }
432}
433
434/** Grows the table if needed. */
435static void item_inserted(hash_table_t *h)
436{
437 ++h->items;
438
439 /* Grow the table if the average bucket load exceeds the maximum. */
440 if (HT_MAX_LOAD * h->bucket_cnt < h->items) {
441 /* Keep the bucket_cnt odd (possibly also prime). */
442 size_t new_bucket_cnt = 2 * h->bucket_cnt + 1;
443 resize(h, new_bucket_cnt);
444 }
445}
446
447
[fadd381]448/** @}
[b2951e2]449 */
Note: See TracBrowser for help on using the repository browser.