source: mainline/uspace/lib/c/generic/adt/hash_table.c@ 1e4a937

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 1e4a937 was 5e801dc, checked in by GitHub <noreply@…>, 6 years ago

Indicate and enforce constness of hash table key in certain functions (#158)

The assumption here is that modifying key in the hash/equal functions in something completely unexpected, and not something you would ever want to do intentionally, so it makes sense to disallow it entirely to get that extra level of checking.

  • Property mode set to 100644
File size: 12.4 KB
Line 
1/*
2 * Copyright (c) 2008 Jakub Jermar
3 * Copyright (c) 2012 Adam Hraska
4 *
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * - Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * - The name of the author may not be used to endorse or promote products
17 * derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31/** @addtogroup libc
32 * @{
33 */
34/** @file
35 */
36
37/*
38 * This is an implementation of a generic resizable chained hash table.
39 *
40 * The table grows to 2*n+1 buckets each time, starting at n == 89,
41 * per Thomas Wang's recommendation:
42 * http://www.concentric.net/~Ttwang/tech/hashsize.htm
43 *
44 * This policy produces prime table sizes for the first five resizes
45 * and generally produces table sizes which are either prime or
46 * have fairly large (prime/odd) divisors. Having a prime table size
47 * mitigates the use of suboptimal hash functions and distributes
48 * items over the whole table.
49 */
50
51#include <adt/hash_table.h>
52#include <adt/list.h>
53#include <assert.h>
54#include <stdlib.h>
55#include <str.h>
56
57/* Optimal initial bucket count. See comment above. */
58#define HT_MIN_BUCKETS 89
59/* The table is resized when the average load per bucket exceeds this number. */
60#define HT_MAX_LOAD 2
61
62static size_t round_up_size(size_t);
63static bool alloc_table(size_t, list_t **);
64static void clear_items(hash_table_t *);
65static void resize(hash_table_t *, size_t);
66static void grow_if_needed(hash_table_t *);
67static void shrink_if_needed(hash_table_t *);
68
69/* Dummy do nothing callback to invoke in place of remove_callback == NULL. */
70static void nop_remove_callback(ht_link_t *item)
71{
72 /* no-op */
73}
74
75/** Create chained hash table.
76 *
77 * @param h Hash table structure. Will be initialized by this call.
78 * @param init_size Initial desired number of hash table buckets. Pass zero
79 * if you want the default initial size.
80 * @param max_load The table is resized when the average load per bucket
81 * exceeds this number. Pass zero if you want the default.
82 * @param op Hash table operations structure. remove_callback()
83 * is optional and can be NULL if no action is to be taken
84 * upon removal. equal() is optional if and only if
85 * hash_table_insert_unique() will never be invoked.
86 * All other operations are mandatory.
87 *
88 * @return True on success
89 *
90 */
91bool hash_table_create(hash_table_t *h, size_t init_size, size_t max_load,
92 hash_table_ops_t *op)
93{
94 assert(h);
95 assert(op && op->hash && op->key_hash && op->key_equal);
96
97 /* Check for compulsory ops. */
98 if (!op || !op->hash || !op->key_hash || !op->key_equal)
99 return false;
100
101 h->bucket_cnt = round_up_size(init_size);
102
103 if (!alloc_table(h->bucket_cnt, &h->bucket))
104 return false;
105
106 h->max_load = (max_load == 0) ? HT_MAX_LOAD : max_load;
107 h->item_cnt = 0;
108 h->op = op;
109 h->full_item_cnt = h->max_load * h->bucket_cnt;
110 h->apply_ongoing = false;
111
112 if (h->op->remove_callback == NULL) {
113 h->op->remove_callback = nop_remove_callback;
114 }
115
116 return true;
117}
118
119/** Destroy a hash table instance.
120 *
121 * @param h Hash table to be destroyed.
122 *
123 */
124void hash_table_destroy(hash_table_t *h)
125{
126 assert(h && h->bucket);
127 assert(!h->apply_ongoing);
128
129 clear_items(h);
130
131 free(h->bucket);
132
133 h->bucket = NULL;
134 h->bucket_cnt = 0;
135}
136
137/** Returns true if there are no items in the table. */
138bool hash_table_empty(hash_table_t *h)
139{
140 assert(h && h->bucket);
141 return h->item_cnt == 0;
142}
143
144/** Returns the number of items in the table. */
145size_t hash_table_size(hash_table_t *h)
146{
147 assert(h && h->bucket);
148 return h->item_cnt;
149}
150
151/** Remove all elements from the hash table
152 *
153 * @param h Hash table to be cleared
154 */
155void hash_table_clear(hash_table_t *h)
156{
157 assert(h && h->bucket);
158 assert(!h->apply_ongoing);
159
160 clear_items(h);
161
162 /* Shrink the table to its minimum size if possible. */
163 if (HT_MIN_BUCKETS < h->bucket_cnt) {
164 resize(h, HT_MIN_BUCKETS);
165 }
166}
167
168/** Unlinks and removes all items but does not resize. */
169static void clear_items(hash_table_t *h)
170{
171 if (h->item_cnt == 0)
172 return;
173
174 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) {
175 list_foreach_safe(h->bucket[idx], cur, next) {
176 assert(cur);
177 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
178
179 list_remove(cur);
180 h->op->remove_callback(cur_link);
181 }
182 }
183
184 h->item_cnt = 0;
185}
186
187/** Insert item into a hash table.
188 *
189 * @param h Hash table.
190 * @param item Item to be inserted into the hash table.
191 */
192void hash_table_insert(hash_table_t *h, ht_link_t *item)
193{
194 assert(item);
195 assert(h && h->bucket);
196 assert(!h->apply_ongoing);
197
198 size_t idx = h->op->hash(item) % h->bucket_cnt;
199
200 list_append(&item->link, &h->bucket[idx]);
201 ++h->item_cnt;
202 grow_if_needed(h);
203}
204
205/** Insert item into a hash table if not already present.
206 *
207 * @param h Hash table.
208 * @param item Item to be inserted into the hash table.
209 *
210 * @return False if such an item had already been inserted.
211 * @return True if the inserted item was the only item with such a lookup key.
212 */
213bool hash_table_insert_unique(hash_table_t *h, ht_link_t *item)
214{
215 assert(item);
216 assert(h && h->bucket && h->bucket_cnt);
217 assert(h->op && h->op->hash && h->op->equal);
218 assert(!h->apply_ongoing);
219
220 size_t idx = h->op->hash(item) % h->bucket_cnt;
221
222 /* Check for duplicates. */
223 list_foreach(h->bucket[idx], link, ht_link_t, cur_link) {
224 /*
225 * We could filter out items using their hashes first, but
226 * calling equal() might very well be just as fast.
227 */
228 if (h->op->equal(cur_link, item))
229 return false;
230 }
231
232 list_append(&item->link, &h->bucket[idx]);
233 ++h->item_cnt;
234 grow_if_needed(h);
235
236 return true;
237}
238
239/** Search hash table for an item matching keys.
240 *
241 * @param h Hash table.
242 * @param key Array of all keys needed to compute hash index.
243 *
244 * @return Matching item on success, NULL if there is no such item.
245 *
246 */
247ht_link_t *hash_table_find(const hash_table_t *h, const void *key)
248{
249 assert(h && h->bucket);
250
251 size_t idx = h->op->key_hash(key) % h->bucket_cnt;
252
253 list_foreach(h->bucket[idx], link, ht_link_t, cur_link) {
254 /*
255 * Is this is the item we are looking for? We could have first
256 * checked if the hashes match but op->key_equal() may very well be
257 * just as fast as op->hash().
258 */
259 if (h->op->key_equal(key, cur_link)) {
260 return cur_link;
261 }
262 }
263
264 return NULL;
265}
266
267/** Find the next item equal to item. */
268ht_link_t *
269hash_table_find_next(const hash_table_t *h, ht_link_t *first, ht_link_t *item)
270{
271 assert(item);
272 assert(h && h->bucket);
273
274 size_t idx = h->op->hash(item) % h->bucket_cnt;
275
276 /* Traverse the circular list until we reach the starting item again. */
277 for (link_t *cur = item->link.next; cur != &first->link;
278 cur = cur->next) {
279 assert(cur);
280
281 if (cur == &h->bucket[idx].head)
282 continue;
283
284 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
285 /*
286 * Is this is the item we are looking for? We could have first
287 * checked if the hashes match but op->equal() may very well be
288 * just as fast as op->hash().
289 */
290 if (h->op->equal(cur_link, item)) {
291 return cur_link;
292 }
293 }
294
295 return NULL;
296}
297
298/** Remove all matching items from hash table.
299 *
300 * For each removed item, h->remove_callback() is called.
301 *
302 * @param h Hash table.
303 * @param key Array of keys that will be compared against items of
304 * the hash table.
305 *
306 * @return Returns the number of removed items.
307 */
308size_t hash_table_remove(hash_table_t *h, const void *key)
309{
310 assert(h && h->bucket);
311 assert(!h->apply_ongoing);
312
313 size_t idx = h->op->key_hash(key) % h->bucket_cnt;
314
315 size_t removed = 0;
316
317 list_foreach_safe(h->bucket[idx], cur, next) {
318 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
319
320 if (h->op->key_equal(key, cur_link)) {
321 ++removed;
322 list_remove(cur);
323 h->op->remove_callback(cur_link);
324 }
325 }
326
327 h->item_cnt -= removed;
328 shrink_if_needed(h);
329
330 return removed;
331}
332
333/** Removes an item already present in the table. The item must be in the table.*/
334void hash_table_remove_item(hash_table_t *h, ht_link_t *item)
335{
336 assert(item);
337 assert(h && h->bucket);
338 assert(link_in_use(&item->link));
339
340 list_remove(&item->link);
341 --h->item_cnt;
342 h->op->remove_callback(item);
343 shrink_if_needed(h);
344}
345
346/** Apply function to all items in hash table.
347 *
348 * @param h Hash table.
349 * @param f Function to be applied. Return false if no more items
350 * should be visited. The functor may only delete the supplied
351 * item. It must not delete the successor of the item passed
352 * in the first argument.
353 * @param arg Argument to be passed to the function.
354 */
355void hash_table_apply(hash_table_t *h, bool (*f)(ht_link_t *, void *), void *arg)
356{
357 assert(f);
358 assert(h && h->bucket);
359
360 if (h->item_cnt == 0)
361 return;
362
363 h->apply_ongoing = true;
364
365 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) {
366 list_foreach_safe(h->bucket[idx], cur, next) {
367 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
368 /*
369 * The next pointer had already been saved. f() may safely
370 * delete cur (but not next!).
371 */
372 if (!f(cur_link, arg))
373 goto out;
374 }
375 }
376out:
377 h->apply_ongoing = false;
378
379 shrink_if_needed(h);
380 grow_if_needed(h);
381}
382
383/** Rounds up size to the nearest suitable table size. */
384static size_t round_up_size(size_t size)
385{
386 size_t rounded_size = HT_MIN_BUCKETS;
387
388 while (rounded_size < size) {
389 rounded_size = 2 * rounded_size + 1;
390 }
391
392 return rounded_size;
393}
394
395/** Allocates and initializes the desired number of buckets. True if successful.*/
396static bool alloc_table(size_t bucket_cnt, list_t **pbuckets)
397{
398 assert(pbuckets && HT_MIN_BUCKETS <= bucket_cnt);
399
400 list_t *buckets = malloc(bucket_cnt * sizeof(list_t));
401 if (!buckets)
402 return false;
403
404 for (size_t i = 0; i < bucket_cnt; i++)
405 list_initialize(&buckets[i]);
406
407 *pbuckets = buckets;
408 return true;
409}
410
411/** Shrinks the table if the table is only sparely populated. */
412static inline void shrink_if_needed(hash_table_t *h)
413{
414 if (h->item_cnt <= h->full_item_cnt / 4 && HT_MIN_BUCKETS < h->bucket_cnt) {
415 /*
416 * Keep the bucket_cnt odd (possibly also prime).
417 * Shrink from 2n + 1 to n. Integer division discards the +1.
418 */
419 size_t new_bucket_cnt = h->bucket_cnt / 2;
420 resize(h, new_bucket_cnt);
421 }
422}
423
424/** Grows the table if table load exceeds the maximum allowed. */
425static inline void grow_if_needed(hash_table_t *h)
426{
427 /* Grow the table if the average bucket load exceeds the maximum. */
428 if (h->full_item_cnt < h->item_cnt) {
429 /* Keep the bucket_cnt odd (possibly also prime). */
430 size_t new_bucket_cnt = 2 * h->bucket_cnt + 1;
431 resize(h, new_bucket_cnt);
432 }
433}
434
435/** Allocates and rehashes items to a new table. Frees the old table. */
436static void resize(hash_table_t *h, size_t new_bucket_cnt)
437{
438 assert(h && h->bucket);
439 assert(HT_MIN_BUCKETS <= new_bucket_cnt);
440
441 /* We are traversing the table and resizing would mess up the buckets. */
442 if (h->apply_ongoing)
443 return;
444
445 list_t *new_buckets;
446
447 /* Leave the table as is if we cannot resize. */
448 if (!alloc_table(new_bucket_cnt, &new_buckets))
449 return;
450
451 if (0 < h->item_cnt) {
452 /* Rehash all the items to the new table. */
453 for (size_t old_idx = 0; old_idx < h->bucket_cnt; ++old_idx) {
454 list_foreach_safe(h->bucket[old_idx], cur, next) {
455 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
456
457 size_t new_idx = h->op->hash(cur_link) % new_bucket_cnt;
458 list_remove(cur);
459 list_append(cur, &new_buckets[new_idx]);
460 }
461 }
462 }
463
464 free(h->bucket);
465 h->bucket = new_buckets;
466 h->bucket_cnt = new_bucket_cnt;
467 h->full_item_cnt = h->max_load * h->bucket_cnt;
468}
469
470/** @}
471 */
Note: See TracBrowser for help on using the repository browser.