source: mainline/uspace/lib/c/generic/adt/hash_table.c@ e948fde

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since e948fde was feeac0d, checked in by Jiri Svoboda <jiri@…>, 12 years ago

Simplify use of list_foreach.

  • Property mode set to 100644
File size: 12.5 KB
Line 
1/*
2 * Copyright (c) 2008 Jakub Jermar
3 * Copyright (c) 2012 Adam Hraska
4 *
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * - Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * - The name of the author may not be used to endorse or promote products
17 * derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31/** @addtogroup libc
32 * @{
33 */
34/** @file
35 */
36
37/*
38 * This is an implementation of a generic resizable chained hash table.
39 *
40 * The table grows to 2*n+1 buckets each time, starting at n == 89,
41 * per Thomas Wang's recommendation:
42 * http://www.concentric.net/~Ttwang/tech/hashsize.htm
43 *
44 * This policy produces prime table sizes for the first five resizes
45 * and generally produces table sizes which are either prime or
46 * have fairly large (prime/odd) divisors. Having a prime table size
47 * mitigates the use of suboptimal hash functions and distributes
48 * items over the whole table.
49 */
50
51#include <adt/hash_table.h>
52#include <adt/list.h>
53#include <unistd.h>
54#include <malloc.h>
55#include <assert.h>
56#include <str.h>
57
58/* Optimal initial bucket count. See comment above. */
59#define HT_MIN_BUCKETS 89
60/* The table is resized when the average load per bucket exceeds this number. */
61#define HT_MAX_LOAD 2
62
63
64static size_t round_up_size(size_t);
65static bool alloc_table(size_t, list_t **);
66static void clear_items(hash_table_t *);
67static void resize(hash_table_t *, size_t);
68static void grow_if_needed(hash_table_t *);
69static void shrink_if_needed(hash_table_t *);
70
71/* Dummy do nothing callback to invoke in place of remove_callback == NULL. */
72static void nop_remove_callback(ht_link_t *item)
73{
74 /* no-op */
75}
76
77
78/** Create chained hash table.
79 *
80 * @param h Hash table structure. Will be initialized by this call.
81 * @param init_size Initial desired number of hash table buckets. Pass zero
82 * if you want the default initial size.
83 * @param max_load The table is resized when the average load per bucket
84 * exceeds this number. Pass zero if you want the default.
85 * @param op Hash table operations structure. remove_callback()
86 * is optional and can be NULL if no action is to be taken
87 * upon removal. equal() is optional if and only if
88 * hash_table_insert_unique() will never be invoked.
89 * All other operations are mandatory.
90 *
91 * @return True on success
92 *
93 */
94bool hash_table_create(hash_table_t *h, size_t init_size, size_t max_load,
95 hash_table_ops_t *op)
96{
97 assert(h);
98 assert(op && op->hash && op->key_hash && op->key_equal);
99
100 /* Check for compulsory ops. */
101 if (!op || !op->hash || !op->key_hash || !op->key_equal)
102 return false;
103
104 h->bucket_cnt = round_up_size(init_size);
105
106 if (!alloc_table(h->bucket_cnt, &h->bucket))
107 return false;
108
109 h->max_load = (max_load == 0) ? HT_MAX_LOAD : max_load;
110 h->item_cnt = 0;
111 h->op = op;
112 h->full_item_cnt = h->max_load * h->bucket_cnt;
113 h->apply_ongoing = false;
114
115 if (h->op->remove_callback == NULL) {
116 h->op->remove_callback = nop_remove_callback;
117 }
118
119 return true;
120}
121
122/** Destroy a hash table instance.
123 *
124 * @param h Hash table to be destroyed.
125 *
126 */
127void hash_table_destroy(hash_table_t *h)
128{
129 assert(h && h->bucket);
130 assert(!h->apply_ongoing);
131
132 clear_items(h);
133
134 free(h->bucket);
135
136 h->bucket = 0;
137 h->bucket_cnt = 0;
138}
139
140/** Returns true if there are no items in the table. */
141bool hash_table_empty(hash_table_t *h)
142{
143 assert(h && h->bucket);
144 return h->item_cnt == 0;
145}
146
147/** Returns the number of items in the table. */
148size_t hash_table_size(hash_table_t *h)
149{
150 assert(h && h->bucket);
151 return h->item_cnt;
152}
153
154/** Remove all elements from the hash table
155 *
156 * @param h Hash table to be cleared
157 */
158void hash_table_clear(hash_table_t *h)
159{
160 assert(h && h->bucket);
161 assert(!h->apply_ongoing);
162
163 clear_items(h);
164
165 /* Shrink the table to its minimum size if possible. */
166 if (HT_MIN_BUCKETS < h->bucket_cnt) {
167 resize(h, HT_MIN_BUCKETS);
168 }
169}
170
171/** Unlinks and removes all items but does not resize. */
172static void clear_items(hash_table_t *h)
173{
174 if (h->item_cnt == 0)
175 return;
176
177 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) {
178 list_foreach_safe(h->bucket[idx], cur, next) {
179 assert(cur);
180 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
181
182 list_remove(cur);
183 h->op->remove_callback(cur_link);
184 }
185 }
186
187 h->item_cnt = 0;
188}
189
190/** Insert item into a hash table.
191 *
192 * @param h Hash table.
193 * @param key Array of all keys necessary to compute hash index.
194 * @param item Item to be inserted into the hash table.
195 */
196void hash_table_insert(hash_table_t *h, ht_link_t *item)
197{
198 assert(item);
199 assert(h && h->bucket);
200 assert(!h->apply_ongoing);
201
202 size_t idx = h->op->hash(item) % h->bucket_cnt;
203
204 list_append(&item->link, &h->bucket[idx]);
205 ++h->item_cnt;
206 grow_if_needed(h);
207}
208
209
210/** Insert item into a hash table if not already present.
211 *
212 * @param h Hash table.
213 * @param key Array of all keys necessary to compute hash index.
214 * @param item Item to be inserted into the hash table.
215 *
216 * @return False if such an item had already been inserted.
217 * @return True if the inserted item was the only item with such a lookup key.
218 */
219bool hash_table_insert_unique(hash_table_t *h, ht_link_t *item)
220{
221 assert(item);
222 assert(h && h->bucket && h->bucket_cnt);
223 assert(h->op && h->op->hash && h->op->equal);
224 assert(!h->apply_ongoing);
225
226 size_t idx = h->op->hash(item) % h->bucket_cnt;
227
228 /* Check for duplicates. */
229 list_foreach(h->bucket[idx], link, ht_link_t, cur_link) {
230 /*
231 * We could filter out items using their hashes first, but
232 * calling equal() might very well be just as fast.
233 */
234 if (h->op->equal(cur_link, item))
235 return false;
236 }
237
238 list_append(&item->link, &h->bucket[idx]);
239 ++h->item_cnt;
240 grow_if_needed(h);
241
242 return true;
243}
244
245/** Search hash table for an item matching keys.
246 *
247 * @param h Hash table.
248 * @param key Array of all keys needed to compute hash index.
249 *
250 * @return Matching item on success, NULL if there is no such item.
251 *
252 */
253ht_link_t *hash_table_find(const hash_table_t *h, void *key)
254{
255 assert(h && h->bucket);
256
257 size_t idx = h->op->key_hash(key) % h->bucket_cnt;
258
259 list_foreach(h->bucket[idx], link, ht_link_t, cur_link) {
260 /*
261 * Is this is the item we are looking for? We could have first
262 * checked if the hashes match but op->key_equal() may very well be
263 * just as fast as op->hash().
264 */
265 if (h->op->key_equal(key, cur_link)) {
266 return cur_link;
267 }
268 }
269
270 return NULL;
271}
272
273/** Find the next item equal to item. */
274ht_link_t *hash_table_find_next(const hash_table_t *h, ht_link_t *item)
275{
276 assert(item);
277 assert(h && h->bucket);
278
279 /* Traverse the circular list until we reach the starting item again. */
280 for (link_t *cur = item->link.next; cur != &item->link; cur = cur->next) {
281 assert(cur);
282 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
283 /*
284 * Is this is the item we are looking for? We could have first
285 * checked if the hashes match but op->equal() may very well be
286 * just as fast as op->hash().
287 */
288 if (h->op->equal(cur_link, item)) {
289 return cur_link;
290 }
291 }
292
293 return NULL;
294}
295
296/** Remove all matching items from hash table.
297 *
298 * For each removed item, h->remove_callback() is called.
299 *
300 * @param h Hash table.
301 * @param key Array of keys that will be compared against items of
302 * the hash table.
303 * @param keys Number of keys in the 'key' array.
304 *
305 * @return Returns the number of removed items.
306 */
307size_t hash_table_remove(hash_table_t *h, void *key)
308{
309 assert(h && h->bucket);
310 assert(!h->apply_ongoing);
311
312 size_t idx = h->op->key_hash(key) % h->bucket_cnt;
313
314 size_t removed = 0;
315
316 list_foreach_safe(h->bucket[idx], cur, next) {
317 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
318
319 if (h->op->key_equal(key, cur_link)) {
320 ++removed;
321 list_remove(cur);
322 h->op->remove_callback(cur_link);
323 }
324 }
325
326 h->item_cnt -= removed;
327 shrink_if_needed(h);
328
329 return removed;
330}
331
332/** Removes an item already present in the table. The item must be in the table.*/
333void hash_table_remove_item(hash_table_t *h, ht_link_t *item)
334{
335 assert(item);
336 assert(h && h->bucket);
337 assert(link_in_use(&item->link));
338
339 list_remove(&item->link);
340 --h->item_cnt;
341 h->op->remove_callback(item);
342 shrink_if_needed(h);
343}
344
345/** Apply function to all items in hash table.
346 *
347 * @param h Hash table.
348 * @param f Function to be applied. Return false if no more items
349 * should be visited. The functor may only delete the supplied
350 * item. It must not delete the successor of the item passed
351 * in the first argument.
352 * @param arg Argument to be passed to the function.
353 */
354void hash_table_apply(hash_table_t *h, bool (*f)(ht_link_t *, void *), void *arg)
355{
356 assert(f);
357 assert(h && h->bucket);
358
359 if (h->item_cnt == 0)
360 return;
361
362 h->apply_ongoing = true;
363
364 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) {
365 list_foreach_safe(h->bucket[idx], cur, next) {
366 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
367 /*
368 * The next pointer had already been saved. f() may safely
369 * delete cur (but not next!).
370 */
371 if (!f(cur_link, arg))
372 return;
373 }
374 }
375
376 h->apply_ongoing = false;
377
378 shrink_if_needed(h);
379 grow_if_needed(h);
380}
381
382/** Rounds up size to the nearest suitable table size. */
383static size_t round_up_size(size_t size)
384{
385 size_t rounded_size = HT_MIN_BUCKETS;
386
387 while (rounded_size < size) {
388 rounded_size = 2 * rounded_size + 1;
389 }
390
391 return rounded_size;
392}
393
394/** Allocates and initializes the desired number of buckets. True if successful.*/
395static bool alloc_table(size_t bucket_cnt, list_t **pbuckets)
396{
397 assert(pbuckets && HT_MIN_BUCKETS <= bucket_cnt);
398
399 list_t *buckets = malloc(bucket_cnt * sizeof(list_t));
400 if (!buckets)
401 return false;
402
403 for (size_t i = 0; i < bucket_cnt; i++)
404 list_initialize(&buckets[i]);
405
406 *pbuckets = buckets;
407 return true;
408}
409
410
411/** Shrinks the table if the table is only sparely populated. */
412static inline void shrink_if_needed(hash_table_t *h)
413{
414 if (h->item_cnt <= h->full_item_cnt / 4 && HT_MIN_BUCKETS < h->bucket_cnt) {
415 /*
416 * Keep the bucket_cnt odd (possibly also prime).
417 * Shrink from 2n + 1 to n. Integer division discards the +1.
418 */
419 size_t new_bucket_cnt = h->bucket_cnt / 2;
420 resize(h, new_bucket_cnt);
421 }
422}
423
424/** Grows the table if table load exceeds the maximum allowed. */
425static inline void grow_if_needed(hash_table_t *h)
426{
427 /* Grow the table if the average bucket load exceeds the maximum. */
428 if (h->full_item_cnt < h->item_cnt) {
429 /* Keep the bucket_cnt odd (possibly also prime). */
430 size_t new_bucket_cnt = 2 * h->bucket_cnt + 1;
431 resize(h, new_bucket_cnt);
432 }
433}
434
435/** Allocates and rehashes items to a new table. Frees the old table. */
436static void resize(hash_table_t *h, size_t new_bucket_cnt)
437{
438 assert(h && h->bucket);
439 assert(HT_MIN_BUCKETS <= new_bucket_cnt);
440
441 /* We are traversing the table and resizing would mess up the buckets. */
442 if (h->apply_ongoing)
443 return;
444
445 list_t *new_buckets;
446
447 /* Leave the table as is if we cannot resize. */
448 if (!alloc_table(new_bucket_cnt, &new_buckets))
449 return;
450
451 if (0 < h->item_cnt) {
452 /* Rehash all the items to the new table. */
453 for (size_t old_idx = 0; old_idx < h->bucket_cnt; ++old_idx) {
454 list_foreach_safe(h->bucket[old_idx], cur, next) {
455 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
456
457 size_t new_idx = h->op->hash(cur_link) % new_bucket_cnt;
458 list_remove(cur);
459 list_append(cur, &new_buckets[new_idx]);
460 }
461 }
462 }
463
464 free(h->bucket);
465 h->bucket = new_buckets;
466 h->bucket_cnt = new_bucket_cnt;
467 h->full_item_cnt = h->max_load * h->bucket_cnt;
468}
469
470
471/** @}
472 */
Note: See TracBrowser for help on using the repository browser.