source: mainline/common/adt/hash_table.c@ e77c3ed

Last change on this file since e77c3ed was 0db0df2, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 3 months ago

Hash table improvements

Implement hash_table_foreach macro, analogous to list_foreach.

Remove superfluous argument to hash_table_find_next().
(If the user needs to recheck the part of the list already
checked by hash_table_find(), they can just rerun that function.)

Add hash argument to hash_table_ops_t::key_equal.
The big change here is that users with big keys can store the hash
value alongside key in their entries, and for the low low cost of
sizeof(size_t) bytes eliminate a bunch of expensive key comparisons.

Also added a hash function for strings and arbitrary data.
Found this one by asking ChatGPT, because the latency of accesses
to my book collection is currently a couple of hours.

+ Some drive-by unused #include removal.

  • Property mode set to 100644
File size: 12.1 KB
RevLine 
[ee7736e]1/*
[739d00a]2 * Copyright (c) 2008 Jakub Jermar
[062d900]3 * Copyright (c) 2012 Adam Hraska
[1b20da0]4 *
[ee7736e]5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * - Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * - The name of the author may not be used to endorse or promote products
17 * derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
[fadd381]31/** @addtogroup libc
[b2951e2]32 * @{
33 */
34/** @file
35 */
36
[ee7736e]37/*
[062d900]38 * This is an implementation of a generic resizable chained hash table.
[1b20da0]39 *
40 * The table grows to 2*n+1 buckets each time, starting at n == 89,
[062d900]41 * per Thomas Wang's recommendation:
42 * http://www.concentric.net/~Ttwang/tech/hashsize.htm
[1b20da0]43 *
[062d900]44 * This policy produces prime table sizes for the first five resizes
[1b20da0]45 * and generally produces table sizes which are either prime or
[062d900]46 * have fairly large (prime/odd) divisors. Having a prime table size
47 * mitigates the use of suboptimal hash functions and distributes
48 * items over the whole table.
[ee7736e]49 */
50
[d9c8c81]51#include <adt/hash_table.h>
52#include <adt/list.h>
[ee7736e]53#include <assert.h>
[38d150e]54#include <stdlib.h>
[19f857a]55#include <str.h>
[ee7736e]56
[062d900]57/* Optimal initial bucket count. See comment above. */
58#define HT_MIN_BUCKETS 89
59/* The table is resized when the average load per bucket exceeds this number. */
60#define HT_MAX_LOAD 2
61
[97b199b1]62static size_t round_up_size(size_t);
63static bool alloc_table(size_t, list_t **);
64static void clear_items(hash_table_t *);
65static void resize(hash_table_t *, size_t);
66static void grow_if_needed(hash_table_t *);
67static void shrink_if_needed(hash_table_t *);
[062d900]68
69/* Dummy do nothing callback to invoke in place of remove_callback == NULL. */
70static void nop_remove_callback(ht_link_t *item)
71{
72 /* no-op */
73}
74
[ee7736e]75/** Create chained hash table.
76 *
[e1da7ec]77 * @param h Hash table structure. Will be initialized by this call.
[062d900]78 * @param init_size Initial desired number of hash table buckets. Pass zero
[1b20da0]79 * if you want the default initial size.
[220210c8]80 * @param max_load The table is resized when the average load per bucket
81 * exceeds this number. Pass zero if you want the default.
[062d900]82 * @param op Hash table operations structure. remove_callback()
83 * is optional and can be NULL if no action is to be taken
84 * upon removal. equal() is optional if and only if
85 * hash_table_insert_unique() will never be invoked.
[1b20da0]86 * All other operations are mandatory.
[e1da7ec]87 *
88 * @return True on success
89 *
[ee7736e]90 */
[062d900]91bool hash_table_create(hash_table_t *h, size_t init_size, size_t max_load,
[61eb2ce2]92 const hash_table_ops_t *op)
[ee7736e]93{
[4f34b6a]94 assert(h);
[062d900]95 assert(op && op->hash && op->key_hash && op->key_equal);
[a35b458]96
[062d900]97 /* Check for compulsory ops. */
98 if (!op || !op->hash || !op->key_hash || !op->key_equal)
[4f34b6a]99 return false;
[a35b458]100
[062d900]101 h->bucket_cnt = round_up_size(init_size);
[a35b458]102
[062d900]103 if (!alloc_table(h->bucket_cnt, &h->bucket))
104 return false;
[a35b458]105
[062d900]106 h->max_load = (max_load == 0) ? HT_MAX_LOAD : max_load;
107 h->item_cnt = 0;
[ee7736e]108 h->op = op;
[062d900]109 h->full_item_cnt = h->max_load * h->bucket_cnt;
110 h->apply_ongoing = false;
111
[4f34b6a]112 return true;
[ee7736e]113}
114
[062d900]115/** Destroy a hash table instance.
116 *
117 * @param h Hash table to be destroyed.
118 *
119 */
120void hash_table_destroy(hash_table_t *h)
121{
122 assert(h && h->bucket);
123 assert(!h->apply_ongoing);
[a35b458]124
[062d900]125 clear_items(h);
[a35b458]126
[062d900]127 free(h->bucket);
128
[205832b]129 h->bucket = NULL;
[062d900]130 h->bucket_cnt = 0;
131}
132
133/** Returns true if there are no items in the table. */
134bool hash_table_empty(hash_table_t *h)
135{
136 assert(h && h->bucket);
137 return h->item_cnt == 0;
138}
139
140/** Returns the number of items in the table. */
141size_t hash_table_size(hash_table_t *h)
142{
143 assert(h && h->bucket);
144 return h->item_cnt;
145}
146
[892022a1]147/** Remove all elements from the hash table
148 *
149 * @param h Hash table to be cleared
150 */
151void hash_table_clear(hash_table_t *h)
152{
[062d900]153 assert(h && h->bucket);
154 assert(!h->apply_ongoing);
[a35b458]155
[062d900]156 clear_items(h);
[a35b458]157
[062d900]158 /* Shrink the table to its minimum size if possible. */
159 if (HT_MIN_BUCKETS < h->bucket_cnt) {
160 resize(h, HT_MIN_BUCKETS);
161 }
162}
163
164/** Unlinks and removes all items but does not resize. */
165static void clear_items(hash_table_t *h)
166{
167 if (h->item_cnt == 0)
168 return;
[a35b458]169
[61eb2ce2]170 void (*remove_cb)(ht_link_t *) = h->op->remove_callback ? h->op->remove_callback : nop_remove_callback;
171
[062d900]172 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) {
173 list_foreach_safe(h->bucket[idx], cur, next) {
174 assert(cur);
175 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
[a35b458]176
[892022a1]177 list_remove(cur);
[61eb2ce2]178 remove_cb(cur_link);
[892022a1]179 }
180 }
[a35b458]181
[062d900]182 h->item_cnt = 0;
[892022a1]183}
184
[062d900]185/** Insert item into a hash table.
[e1da7ec]186 *
[062d900]187 * @param h Hash table.
188 * @param item Item to be inserted into the hash table.
[739d00a]189 */
[062d900]190void hash_table_insert(hash_table_t *h, ht_link_t *item)
[739d00a]191{
[062d900]192 assert(item);
193 assert(h && h->bucket);
194 assert(!h->apply_ongoing);
[a35b458]195
[062d900]196 size_t idx = h->op->hash(item) % h->bucket_cnt;
[a35b458]197
[062d900]198 list_append(&item->link, &h->bucket[idx]);
199 ++h->item_cnt;
200 grow_if_needed(h);
[739d00a]201}
202
[062d900]203/** Insert item into a hash table if not already present.
[739d00a]204 *
[e1da7ec]205 * @param h Hash table.
206 * @param item Item to be inserted into the hash table.
[1b20da0]207 *
208 * @return False if such an item had already been inserted.
[062d900]209 * @return True if the inserted item was the only item with such a lookup key.
[ee7736e]210 */
[062d900]211bool hash_table_insert_unique(hash_table_t *h, ht_link_t *item)
[ee7736e]212{
[4f34b6a]213 assert(item);
[062d900]214 assert(h && h->bucket && h->bucket_cnt);
215 assert(h->op && h->op->hash && h->op->equal);
216 assert(!h->apply_ongoing);
[a35b458]217
[062d900]218 size_t idx = h->op->hash(item) % h->bucket_cnt;
[a35b458]219
[062d900]220 /* Check for duplicates. */
[feeac0d]221 list_foreach(h->bucket[idx], link, ht_link_t, cur_link) {
[1b20da0]222 /*
223 * We could filter out items using their hashes first, but
[062d900]224 * calling equal() might very well be just as fast.
225 */
226 if (h->op->equal(cur_link, item))
227 return false;
228 }
[a35b458]229
[062d900]230 list_append(&item->link, &h->bucket[idx]);
231 ++h->item_cnt;
232 grow_if_needed(h);
[a35b458]233
[062d900]234 return true;
[ee7736e]235}
236
237/** Search hash table for an item matching keys.
238 *
[e1da7ec]239 * @param h Hash table.
240 * @param key Array of all keys needed to compute hash index.
241 *
242 * @return Matching item on success, NULL if there is no such item.
[ee7736e]243 *
244 */
[5e801dc]245ht_link_t *hash_table_find(const hash_table_t *h, const void *key)
[ee7736e]246{
[062d900]247 assert(h && h->bucket);
[a35b458]248
[0db0df2]249 size_t hash = h->op->key_hash(key);
250 size_t idx = hash % h->bucket_cnt;
[062d900]251
[feeac0d]252 list_foreach(h->bucket[idx], link, ht_link_t, cur_link) {
[0db0df2]253 if (h->op->key_equal(key, hash, cur_link))
[062d900]254 return cur_link;
[ee7736e]255 }
[a35b458]256
[ee7736e]257 return NULL;
258}
259
[062d900]260/** Find the next item equal to item. */
[30f1a25]261ht_link_t *
[0db0df2]262hash_table_find_next(const hash_table_t *h, ht_link_t *item)
[062d900]263{
264 assert(item);
265 assert(h && h->bucket);
266
[963037b0]267 size_t idx = h->op->hash(item) % h->bucket_cnt;
[0db0df2]268 list_t *list = &h->bucket[idx];
269 link_t *cur = list_next(&item->link, list);
[963037b0]270
[0db0df2]271 /* Traverse the list until we reach its end. */
272 for (; cur != NULL; cur = list_next(cur, list)) {
[062d900]273 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
[0db0df2]274
275 if (h->op->equal(cur_link, item))
[062d900]276 return cur_link;
277 }
278
279 return NULL;
280}
281
[ee7736e]282/** Remove all matching items from hash table.
283 *
284 * For each removed item, h->remove_callback() is called.
285 *
[e1da7ec]286 * @param h Hash table.
287 * @param key Array of keys that will be compared against items of
288 * the hash table.
[1b20da0]289 *
[062d900]290 * @return Returns the number of removed items.
[ee7736e]291 */
[5e801dc]292size_t hash_table_remove(hash_table_t *h, const void *key)
[ee7736e]293{
[062d900]294 assert(h && h->bucket);
295 assert(!h->apply_ongoing);
[a35b458]296
[0db0df2]297 size_t hash = h->op->key_hash(key);
298 size_t idx = hash % h->bucket_cnt;
[062d900]299
300 size_t removed = 0;
[a35b458]301
[062d900]302 list_foreach_safe(h->bucket[idx], cur, next) {
303 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
[a35b458]304
[0db0df2]305 if (h->op->key_equal(key, hash, cur_link)) {
[062d900]306 ++removed;
[ee7736e]307 list_remove(cur);
[61eb2ce2]308
309 if (h->op->remove_callback)
310 h->op->remove_callback(cur_link);
[ee7736e]311 }
312 }
[062d900]313
314 h->item_cnt -= removed;
315 shrink_if_needed(h);
[a35b458]316
[062d900]317 return removed;
318}
319
[d1582b50]320/** Removes an item already present in the table. The item must be in the table. */
[062d900]321void hash_table_remove_item(hash_table_t *h, ht_link_t *item)
322{
323 assert(item);
324 assert(h && h->bucket);
325 assert(link_in_use(&item->link));
326
327 list_remove(&item->link);
328 --h->item_cnt;
[61eb2ce2]329
330 if (h->op->remove_callback)
331 h->op->remove_callback(item);
[062d900]332 shrink_if_needed(h);
[ee7736e]333}
[b2951e2]334
[1ab4aca]335/** Apply function to all items in hash table.
[203a090]336 *
[e1da7ec]337 * @param h Hash table.
[1b20da0]338 * @param f Function to be applied. Return false if no more items
[062d900]339 * should be visited. The functor may only delete the supplied
[1b20da0]340 * item. It must not delete the successor of the item passed
[062d900]341 * in the first argument.
[e1da7ec]342 * @param arg Argument to be passed to the function.
[203a090]343 */
[062d900]344void hash_table_apply(hash_table_t *h, bool (*f)(ht_link_t *, void *), void *arg)
[1b20da0]345{
[062d900]346 assert(f);
347 assert(h && h->bucket);
[a35b458]348
[062d900]349 if (h->item_cnt == 0)
350 return;
[a35b458]351
[062d900]352 h->apply_ongoing = true;
[a35b458]353
[062d900]354 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) {
355 list_foreach_safe(h->bucket[idx], cur, next) {
356 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
[1b20da0]357 /*
358 * The next pointer had already been saved. f() may safely
[062d900]359 * delete cur (but not next!).
[892022a1]360 */
[062d900]361 if (!f(cur_link, arg))
[251d4dd]362 goto out;
[203a090]363 }
364 }
[251d4dd]365out:
[062d900]366 h->apply_ongoing = false;
[a35b458]367
[062d900]368 shrink_if_needed(h);
369 grow_if_needed(h);
370}
371
372/** Rounds up size to the nearest suitable table size. */
373static size_t round_up_size(size_t size)
374{
375 size_t rounded_size = HT_MIN_BUCKETS;
[a35b458]376
[062d900]377 while (rounded_size < size) {
378 rounded_size = 2 * rounded_size + 1;
379 }
[a35b458]380
[062d900]381 return rounded_size;
[203a090]382}
383
[d1582b50]384/** Allocates and initializes the desired number of buckets. True if successful. */
[062d900]385static bool alloc_table(size_t bucket_cnt, list_t **pbuckets)
386{
387 assert(pbuckets && HT_MIN_BUCKETS <= bucket_cnt);
[a35b458]388
[062d900]389 list_t *buckets = malloc(bucket_cnt * sizeof(list_t));
390 if (!buckets)
391 return false;
[a35b458]392
[062d900]393 for (size_t i = 0; i < bucket_cnt; i++)
394 list_initialize(&buckets[i]);
395
396 *pbuckets = buckets;
397 return true;
398}
399
400/** Shrinks the table if the table is only sparely populated. */
401static inline void shrink_if_needed(hash_table_t *h)
402{
403 if (h->item_cnt <= h->full_item_cnt / 4 && HT_MIN_BUCKETS < h->bucket_cnt) {
[1b20da0]404 /*
405 * Keep the bucket_cnt odd (possibly also prime).
[062d900]406 * Shrink from 2n + 1 to n. Integer division discards the +1.
407 */
408 size_t new_bucket_cnt = h->bucket_cnt / 2;
409 resize(h, new_bucket_cnt);
410 }
411}
412
413/** Grows the table if table load exceeds the maximum allowed. */
414static inline void grow_if_needed(hash_table_t *h)
415{
416 /* Grow the table if the average bucket load exceeds the maximum. */
417 if (h->full_item_cnt < h->item_cnt) {
418 /* Keep the bucket_cnt odd (possibly also prime). */
419 size_t new_bucket_cnt = 2 * h->bucket_cnt + 1;
420 resize(h, new_bucket_cnt);
421 }
422}
423
424/** Allocates and rehashes items to a new table. Frees the old table. */
[1b20da0]425static void resize(hash_table_t *h, size_t new_bucket_cnt)
[062d900]426{
427 assert(h && h->bucket);
428 assert(HT_MIN_BUCKETS <= new_bucket_cnt);
[a35b458]429
[062d900]430 /* We are traversing the table and resizing would mess up the buckets. */
431 if (h->apply_ongoing)
432 return;
[a35b458]433
[062d900]434 list_t *new_buckets;
435
436 /* Leave the table as is if we cannot resize. */
437 if (!alloc_table(new_bucket_cnt, &new_buckets))
438 return;
[a35b458]439
[062d900]440 if (0 < h->item_cnt) {
441 /* Rehash all the items to the new table. */
442 for (size_t old_idx = 0; old_idx < h->bucket_cnt; ++old_idx) {
443 list_foreach_safe(h->bucket[old_idx], cur, next) {
444 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
445
446 size_t new_idx = h->op->hash(cur_link) % new_bucket_cnt;
447 list_remove(cur);
448 list_append(cur, &new_buckets[new_idx]);
449 }
450 }
451 }
[a35b458]452
[062d900]453 free(h->bucket);
454 h->bucket = new_buckets;
455 h->bucket_cnt = new_bucket_cnt;
456 h->full_item_cnt = h->max_load * h->bucket_cnt;
457}
458
[fadd381]459/** @}
[b2951e2]460 */
Note: See TracBrowser for help on using the repository browser.