source: mainline/uspace/lib/c/generic/adt/hash_table.c@ a35b458

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since a35b458 was a35b458, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 7 years ago

style: Remove trailing whitespace on _all_ lines, including empty ones, for particular file types.

Command used: tools/srepl '\s\+$' '' -- *.c *.h *.py *.sh *.s *.S *.ag

Currently, whitespace on empty lines is very inconsistent.
There are two basic choices: Either remove the whitespace, or keep empty lines
indented to the level of surrounding code. The former is AFAICT more common,
and also much easier to do automatically.

Alternatively, we could write script for automatic indentation, and use that
instead. However, if such a script exists, it's possible to use the indented
style locally, by having the editor apply relevant conversions on load/save,
without affecting remote repository. IMO, it makes more sense to adopt
the simpler rule.

  • Property mode set to 100644
File size: 12.3 KB
Line 
1/*
2 * Copyright (c) 2008 Jakub Jermar
3 * Copyright (c) 2012 Adam Hraska
4 *
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * - Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * - The name of the author may not be used to endorse or promote products
17 * derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31/** @addtogroup libc
32 * @{
33 */
34/** @file
35 */
36
37/*
38 * This is an implementation of a generic resizable chained hash table.
39 *
40 * The table grows to 2*n+1 buckets each time, starting at n == 89,
41 * per Thomas Wang's recommendation:
42 * http://www.concentric.net/~Ttwang/tech/hashsize.htm
43 *
44 * This policy produces prime table sizes for the first five resizes
45 * and generally produces table sizes which are either prime or
46 * have fairly large (prime/odd) divisors. Having a prime table size
47 * mitigates the use of suboptimal hash functions and distributes
48 * items over the whole table.
49 */
50
51#include <adt/hash_table.h>
52#include <adt/list.h>
53#include <assert.h>
54#include <stdlib.h>
55#include <str.h>
56
57/* Optimal initial bucket count. See comment above. */
58#define HT_MIN_BUCKETS 89
59/* The table is resized when the average load per bucket exceeds this number. */
60#define HT_MAX_LOAD 2
61
62
63static size_t round_up_size(size_t);
64static bool alloc_table(size_t, list_t **);
65static void clear_items(hash_table_t *);
66static void resize(hash_table_t *, size_t);
67static void grow_if_needed(hash_table_t *);
68static void shrink_if_needed(hash_table_t *);
69
70/* Dummy do nothing callback to invoke in place of remove_callback == NULL. */
71static void nop_remove_callback(ht_link_t *item)
72{
73 /* no-op */
74}
75
76
77/** Create chained hash table.
78 *
79 * @param h Hash table structure. Will be initialized by this call.
80 * @param init_size Initial desired number of hash table buckets. Pass zero
81 * if you want the default initial size.
82 * @param max_load The table is resized when the average load per bucket
83 * exceeds this number. Pass zero if you want the default.
84 * @param op Hash table operations structure. remove_callback()
85 * is optional and can be NULL if no action is to be taken
86 * upon removal. equal() is optional if and only if
87 * hash_table_insert_unique() will never be invoked.
88 * All other operations are mandatory.
89 *
90 * @return True on success
91 *
92 */
93bool hash_table_create(hash_table_t *h, size_t init_size, size_t max_load,
94 hash_table_ops_t *op)
95{
96 assert(h);
97 assert(op && op->hash && op->key_hash && op->key_equal);
98
99 /* Check for compulsory ops. */
100 if (!op || !op->hash || !op->key_hash || !op->key_equal)
101 return false;
102
103 h->bucket_cnt = round_up_size(init_size);
104
105 if (!alloc_table(h->bucket_cnt, &h->bucket))
106 return false;
107
108 h->max_load = (max_load == 0) ? HT_MAX_LOAD : max_load;
109 h->item_cnt = 0;
110 h->op = op;
111 h->full_item_cnt = h->max_load * h->bucket_cnt;
112 h->apply_ongoing = false;
113
114 if (h->op->remove_callback == NULL) {
115 h->op->remove_callback = nop_remove_callback;
116 }
117
118 return true;
119}
120
121/** Destroy a hash table instance.
122 *
123 * @param h Hash table to be destroyed.
124 *
125 */
126void hash_table_destroy(hash_table_t *h)
127{
128 assert(h && h->bucket);
129 assert(!h->apply_ongoing);
130
131 clear_items(h);
132
133 free(h->bucket);
134
135 h->bucket = NULL;
136 h->bucket_cnt = 0;
137}
138
139/** Returns true if there are no items in the table. */
140bool hash_table_empty(hash_table_t *h)
141{
142 assert(h && h->bucket);
143 return h->item_cnt == 0;
144}
145
146/** Returns the number of items in the table. */
147size_t hash_table_size(hash_table_t *h)
148{
149 assert(h && h->bucket);
150 return h->item_cnt;
151}
152
153/** Remove all elements from the hash table
154 *
155 * @param h Hash table to be cleared
156 */
157void hash_table_clear(hash_table_t *h)
158{
159 assert(h && h->bucket);
160 assert(!h->apply_ongoing);
161
162 clear_items(h);
163
164 /* Shrink the table to its minimum size if possible. */
165 if (HT_MIN_BUCKETS < h->bucket_cnt) {
166 resize(h, HT_MIN_BUCKETS);
167 }
168}
169
170/** Unlinks and removes all items but does not resize. */
171static void clear_items(hash_table_t *h)
172{
173 if (h->item_cnt == 0)
174 return;
175
176 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) {
177 list_foreach_safe(h->bucket[idx], cur, next) {
178 assert(cur);
179 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
180
181 list_remove(cur);
182 h->op->remove_callback(cur_link);
183 }
184 }
185
186 h->item_cnt = 0;
187}
188
189/** Insert item into a hash table.
190 *
191 * @param h Hash table.
192 * @param item Item to be inserted into the hash table.
193 */
194void hash_table_insert(hash_table_t *h, ht_link_t *item)
195{
196 assert(item);
197 assert(h && h->bucket);
198 assert(!h->apply_ongoing);
199
200 size_t idx = h->op->hash(item) % h->bucket_cnt;
201
202 list_append(&item->link, &h->bucket[idx]);
203 ++h->item_cnt;
204 grow_if_needed(h);
205}
206
207
208/** Insert item into a hash table if not already present.
209 *
210 * @param h Hash table.
211 * @param item Item to be inserted into the hash table.
212 *
213 * @return False if such an item had already been inserted.
214 * @return True if the inserted item was the only item with such a lookup key.
215 */
216bool hash_table_insert_unique(hash_table_t *h, ht_link_t *item)
217{
218 assert(item);
219 assert(h && h->bucket && h->bucket_cnt);
220 assert(h->op && h->op->hash && h->op->equal);
221 assert(!h->apply_ongoing);
222
223 size_t idx = h->op->hash(item) % h->bucket_cnt;
224
225 /* Check for duplicates. */
226 list_foreach(h->bucket[idx], link, ht_link_t, cur_link) {
227 /*
228 * We could filter out items using their hashes first, but
229 * calling equal() might very well be just as fast.
230 */
231 if (h->op->equal(cur_link, item))
232 return false;
233 }
234
235 list_append(&item->link, &h->bucket[idx]);
236 ++h->item_cnt;
237 grow_if_needed(h);
238
239 return true;
240}
241
242/** Search hash table for an item matching keys.
243 *
244 * @param h Hash table.
245 * @param key Array of all keys needed to compute hash index.
246 *
247 * @return Matching item on success, NULL if there is no such item.
248 *
249 */
250ht_link_t *hash_table_find(const hash_table_t *h, void *key)
251{
252 assert(h && h->bucket);
253
254 size_t idx = h->op->key_hash(key) % h->bucket_cnt;
255
256 list_foreach(h->bucket[idx], link, ht_link_t, cur_link) {
257 /*
258 * Is this is the item we are looking for? We could have first
259 * checked if the hashes match but op->key_equal() may very well be
260 * just as fast as op->hash().
261 */
262 if (h->op->key_equal(key, cur_link)) {
263 return cur_link;
264 }
265 }
266
267 return NULL;
268}
269
270/** Find the next item equal to item. */
271ht_link_t *hash_table_find_next(const hash_table_t *h, ht_link_t *item)
272{
273 assert(item);
274 assert(h && h->bucket);
275
276 /* Traverse the circular list until we reach the starting item again. */
277 for (link_t *cur = item->link.next; cur != &item->link; cur = cur->next) {
278 assert(cur);
279 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
280 /*
281 * Is this is the item we are looking for? We could have first
282 * checked if the hashes match but op->equal() may very well be
283 * just as fast as op->hash().
284 */
285 if (h->op->equal(cur_link, item)) {
286 return cur_link;
287 }
288 }
289
290 return NULL;
291}
292
293/** Remove all matching items from hash table.
294 *
295 * For each removed item, h->remove_callback() is called.
296 *
297 * @param h Hash table.
298 * @param key Array of keys that will be compared against items of
299 * the hash table.
300 * @param keys Number of keys in the 'key' array.
301 *
302 * @return Returns the number of removed items.
303 */
304size_t hash_table_remove(hash_table_t *h, void *key)
305{
306 assert(h && h->bucket);
307 assert(!h->apply_ongoing);
308
309 size_t idx = h->op->key_hash(key) % h->bucket_cnt;
310
311 size_t removed = 0;
312
313 list_foreach_safe(h->bucket[idx], cur, next) {
314 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
315
316 if (h->op->key_equal(key, cur_link)) {
317 ++removed;
318 list_remove(cur);
319 h->op->remove_callback(cur_link);
320 }
321 }
322
323 h->item_cnt -= removed;
324 shrink_if_needed(h);
325
326 return removed;
327}
328
329/** Removes an item already present in the table. The item must be in the table.*/
330void hash_table_remove_item(hash_table_t *h, ht_link_t *item)
331{
332 assert(item);
333 assert(h && h->bucket);
334 assert(link_in_use(&item->link));
335
336 list_remove(&item->link);
337 --h->item_cnt;
338 h->op->remove_callback(item);
339 shrink_if_needed(h);
340}
341
342/** Apply function to all items in hash table.
343 *
344 * @param h Hash table.
345 * @param f Function to be applied. Return false if no more items
346 * should be visited. The functor may only delete the supplied
347 * item. It must not delete the successor of the item passed
348 * in the first argument.
349 * @param arg Argument to be passed to the function.
350 */
351void hash_table_apply(hash_table_t *h, bool (*f)(ht_link_t *, void *), void *arg)
352{
353 assert(f);
354 assert(h && h->bucket);
355
356 if (h->item_cnt == 0)
357 return;
358
359 h->apply_ongoing = true;
360
361 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) {
362 list_foreach_safe(h->bucket[idx], cur, next) {
363 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
364 /*
365 * The next pointer had already been saved. f() may safely
366 * delete cur (but not next!).
367 */
368 if (!f(cur_link, arg))
369 goto out;
370 }
371 }
372out:
373 h->apply_ongoing = false;
374
375 shrink_if_needed(h);
376 grow_if_needed(h);
377}
378
379/** Rounds up size to the nearest suitable table size. */
380static size_t round_up_size(size_t size)
381{
382 size_t rounded_size = HT_MIN_BUCKETS;
383
384 while (rounded_size < size) {
385 rounded_size = 2 * rounded_size + 1;
386 }
387
388 return rounded_size;
389}
390
391/** Allocates and initializes the desired number of buckets. True if successful.*/
392static bool alloc_table(size_t bucket_cnt, list_t **pbuckets)
393{
394 assert(pbuckets && HT_MIN_BUCKETS <= bucket_cnt);
395
396 list_t *buckets = malloc(bucket_cnt * sizeof(list_t));
397 if (!buckets)
398 return false;
399
400 for (size_t i = 0; i < bucket_cnt; i++)
401 list_initialize(&buckets[i]);
402
403 *pbuckets = buckets;
404 return true;
405}
406
407
408/** Shrinks the table if the table is only sparely populated. */
409static inline void shrink_if_needed(hash_table_t *h)
410{
411 if (h->item_cnt <= h->full_item_cnt / 4 && HT_MIN_BUCKETS < h->bucket_cnt) {
412 /*
413 * Keep the bucket_cnt odd (possibly also prime).
414 * Shrink from 2n + 1 to n. Integer division discards the +1.
415 */
416 size_t new_bucket_cnt = h->bucket_cnt / 2;
417 resize(h, new_bucket_cnt);
418 }
419}
420
421/** Grows the table if table load exceeds the maximum allowed. */
422static inline void grow_if_needed(hash_table_t *h)
423{
424 /* Grow the table if the average bucket load exceeds the maximum. */
425 if (h->full_item_cnt < h->item_cnt) {
426 /* Keep the bucket_cnt odd (possibly also prime). */
427 size_t new_bucket_cnt = 2 * h->bucket_cnt + 1;
428 resize(h, new_bucket_cnt);
429 }
430}
431
432/** Allocates and rehashes items to a new table. Frees the old table. */
433static void resize(hash_table_t *h, size_t new_bucket_cnt)
434{
435 assert(h && h->bucket);
436 assert(HT_MIN_BUCKETS <= new_bucket_cnt);
437
438 /* We are traversing the table and resizing would mess up the buckets. */
439 if (h->apply_ongoing)
440 return;
441
442 list_t *new_buckets;
443
444 /* Leave the table as is if we cannot resize. */
445 if (!alloc_table(new_bucket_cnt, &new_buckets))
446 return;
447
448 if (0 < h->item_cnt) {
449 /* Rehash all the items to the new table. */
450 for (size_t old_idx = 0; old_idx < h->bucket_cnt; ++old_idx) {
451 list_foreach_safe(h->bucket[old_idx], cur, next) {
452 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
453
454 size_t new_idx = h->op->hash(cur_link) % new_bucket_cnt;
455 list_remove(cur);
456 list_append(cur, &new_buckets[new_idx]);
457 }
458 }
459 }
460
461 free(h->bucket);
462 h->bucket = new_buckets;
463 h->bucket_cnt = new_bucket_cnt;
464 h->full_item_cnt = h->max_load * h->bucket_cnt;
465}
466
467
468/** @}
469 */
Note: See TracBrowser for help on using the repository browser.