source: mainline/uspace/lib/c/generic/adt/hash_table.c@ a85d5c6

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since a85d5c6 was 36795edf, checked in by Martin Decky <martin@…>, 4 years ago

Improve lists and other data structures

Provide more standard-compliant member_to_inst implementation that uses
offsetof. Avoid potential undefined behavior in list_foreach and
list_foreach_rev by avoiding assinging an unaligned pointer value. Use
size_t instead of unsigned long for list length.

  • Property mode set to 100644
File size: 12.4 KB
Line 
1/*
2 * Copyright (c) 2008 Jakub Jermar
3 * Copyright (c) 2012 Adam Hraska
4 *
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * - Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * - The name of the author may not be used to endorse or promote products
17 * derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31/** @addtogroup libc
32 * @{
33 */
34/** @file
35 */
36
37/*
38 * This is an implementation of a generic resizable chained hash table.
39 *
40 * The table grows to 2*n+1 buckets each time, starting at n == 89,
41 * per Thomas Wang's recommendation:
42 * http://www.concentric.net/~Ttwang/tech/hashsize.htm
43 *
44 * This policy produces prime table sizes for the first five resizes
45 * and generally produces table sizes which are either prime or
46 * have fairly large (prime/odd) divisors. Having a prime table size
47 * mitigates the use of suboptimal hash functions and distributes
48 * items over the whole table.
49 */
50
51#include <adt/hash_table.h>
52#include <adt/list.h>
53#include <assert.h>
54#include <member.h>
55#include <stdlib.h>
56#include <str.h>
57
58/* Optimal initial bucket count. See comment above. */
59#define HT_MIN_BUCKETS 89
60/* The table is resized when the average load per bucket exceeds this number. */
61#define HT_MAX_LOAD 2
62
63static size_t round_up_size(size_t);
64static bool alloc_table(size_t, list_t **);
65static void clear_items(hash_table_t *);
66static void resize(hash_table_t *, size_t);
67static void grow_if_needed(hash_table_t *);
68static void shrink_if_needed(hash_table_t *);
69
70/* Dummy do nothing callback to invoke in place of remove_callback == NULL. */
71static void nop_remove_callback(ht_link_t *item)
72{
73 /* no-op */
74}
75
76/** Create chained hash table.
77 *
78 * @param h Hash table structure. Will be initialized by this call.
79 * @param init_size Initial desired number of hash table buckets. Pass zero
80 * if you want the default initial size.
81 * @param max_load The table is resized when the average load per bucket
82 * exceeds this number. Pass zero if you want the default.
83 * @param op Hash table operations structure. remove_callback()
84 * is optional and can be NULL if no action is to be taken
85 * upon removal. equal() is optional if and only if
86 * hash_table_insert_unique() will never be invoked.
87 * All other operations are mandatory.
88 *
89 * @return True on success
90 *
91 */
92bool hash_table_create(hash_table_t *h, size_t init_size, size_t max_load,
93 hash_table_ops_t *op)
94{
95 assert(h);
96 assert(op && op->hash && op->key_hash && op->key_equal);
97
98 /* Check for compulsory ops. */
99 if (!op || !op->hash || !op->key_hash || !op->key_equal)
100 return false;
101
102 h->bucket_cnt = round_up_size(init_size);
103
104 if (!alloc_table(h->bucket_cnt, &h->bucket))
105 return false;
106
107 h->max_load = (max_load == 0) ? HT_MAX_LOAD : max_load;
108 h->item_cnt = 0;
109 h->op = op;
110 h->full_item_cnt = h->max_load * h->bucket_cnt;
111 h->apply_ongoing = false;
112
113 if (h->op->remove_callback == NULL) {
114 h->op->remove_callback = nop_remove_callback;
115 }
116
117 return true;
118}
119
120/** Destroy a hash table instance.
121 *
122 * @param h Hash table to be destroyed.
123 *
124 */
125void hash_table_destroy(hash_table_t *h)
126{
127 assert(h && h->bucket);
128 assert(!h->apply_ongoing);
129
130 clear_items(h);
131
132 free(h->bucket);
133
134 h->bucket = NULL;
135 h->bucket_cnt = 0;
136}
137
138/** Returns true if there are no items in the table. */
139bool hash_table_empty(hash_table_t *h)
140{
141 assert(h && h->bucket);
142 return h->item_cnt == 0;
143}
144
145/** Returns the number of items in the table. */
146size_t hash_table_size(hash_table_t *h)
147{
148 assert(h && h->bucket);
149 return h->item_cnt;
150}
151
152/** Remove all elements from the hash table
153 *
154 * @param h Hash table to be cleared
155 */
156void hash_table_clear(hash_table_t *h)
157{
158 assert(h && h->bucket);
159 assert(!h->apply_ongoing);
160
161 clear_items(h);
162
163 /* Shrink the table to its minimum size if possible. */
164 if (HT_MIN_BUCKETS < h->bucket_cnt) {
165 resize(h, HT_MIN_BUCKETS);
166 }
167}
168
169/** Unlinks and removes all items but does not resize. */
170static void clear_items(hash_table_t *h)
171{
172 if (h->item_cnt == 0)
173 return;
174
175 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) {
176 list_foreach_safe(h->bucket[idx], cur, next) {
177 assert(cur);
178 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
179
180 list_remove(cur);
181 h->op->remove_callback(cur_link);
182 }
183 }
184
185 h->item_cnt = 0;
186}
187
188/** Insert item into a hash table.
189 *
190 * @param h Hash table.
191 * @param item Item to be inserted into the hash table.
192 */
193void hash_table_insert(hash_table_t *h, ht_link_t *item)
194{
195 assert(item);
196 assert(h && h->bucket);
197 assert(!h->apply_ongoing);
198
199 size_t idx = h->op->hash(item) % h->bucket_cnt;
200
201 list_append(&item->link, &h->bucket[idx]);
202 ++h->item_cnt;
203 grow_if_needed(h);
204}
205
206/** Insert item into a hash table if not already present.
207 *
208 * @param h Hash table.
209 * @param item Item to be inserted into the hash table.
210 *
211 * @return False if such an item had already been inserted.
212 * @return True if the inserted item was the only item with such a lookup key.
213 */
214bool hash_table_insert_unique(hash_table_t *h, ht_link_t *item)
215{
216 assert(item);
217 assert(h && h->bucket && h->bucket_cnt);
218 assert(h->op && h->op->hash && h->op->equal);
219 assert(!h->apply_ongoing);
220
221 size_t idx = h->op->hash(item) % h->bucket_cnt;
222
223 /* Check for duplicates. */
224 list_foreach(h->bucket[idx], link, ht_link_t, cur_link) {
225 /*
226 * We could filter out items using their hashes first, but
227 * calling equal() might very well be just as fast.
228 */
229 if (h->op->equal(cur_link, item))
230 return false;
231 }
232
233 list_append(&item->link, &h->bucket[idx]);
234 ++h->item_cnt;
235 grow_if_needed(h);
236
237 return true;
238}
239
240/** Search hash table for an item matching keys.
241 *
242 * @param h Hash table.
243 * @param key Array of all keys needed to compute hash index.
244 *
245 * @return Matching item on success, NULL if there is no such item.
246 *
247 */
248ht_link_t *hash_table_find(const hash_table_t *h, const void *key)
249{
250 assert(h && h->bucket);
251
252 size_t idx = h->op->key_hash(key) % h->bucket_cnt;
253
254 list_foreach(h->bucket[idx], link, ht_link_t, cur_link) {
255 /*
256 * Is this is the item we are looking for? We could have first
257 * checked if the hashes match but op->key_equal() may very well be
258 * just as fast as op->hash().
259 */
260 if (h->op->key_equal(key, cur_link)) {
261 return cur_link;
262 }
263 }
264
265 return NULL;
266}
267
268/** Find the next item equal to item. */
269ht_link_t *
270hash_table_find_next(const hash_table_t *h, ht_link_t *first, ht_link_t *item)
271{
272 assert(item);
273 assert(h && h->bucket);
274
275 size_t idx = h->op->hash(item) % h->bucket_cnt;
276
277 /* Traverse the circular list until we reach the starting item again. */
278 for (link_t *cur = item->link.next; cur != &first->link;
279 cur = cur->next) {
280 assert(cur);
281
282 if (cur == &h->bucket[idx].head)
283 continue;
284
285 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
286 /*
287 * Is this is the item we are looking for? We could have first
288 * checked if the hashes match but op->equal() may very well be
289 * just as fast as op->hash().
290 */
291 if (h->op->equal(cur_link, item)) {
292 return cur_link;
293 }
294 }
295
296 return NULL;
297}
298
299/** Remove all matching items from hash table.
300 *
301 * For each removed item, h->remove_callback() is called.
302 *
303 * @param h Hash table.
304 * @param key Array of keys that will be compared against items of
305 * the hash table.
306 *
307 * @return Returns the number of removed items.
308 */
309size_t hash_table_remove(hash_table_t *h, const void *key)
310{
311 assert(h && h->bucket);
312 assert(!h->apply_ongoing);
313
314 size_t idx = h->op->key_hash(key) % h->bucket_cnt;
315
316 size_t removed = 0;
317
318 list_foreach_safe(h->bucket[idx], cur, next) {
319 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
320
321 if (h->op->key_equal(key, cur_link)) {
322 ++removed;
323 list_remove(cur);
324 h->op->remove_callback(cur_link);
325 }
326 }
327
328 h->item_cnt -= removed;
329 shrink_if_needed(h);
330
331 return removed;
332}
333
334/** Removes an item already present in the table. The item must be in the table. */
335void hash_table_remove_item(hash_table_t *h, ht_link_t *item)
336{
337 assert(item);
338 assert(h && h->bucket);
339 assert(link_in_use(&item->link));
340
341 list_remove(&item->link);
342 --h->item_cnt;
343 h->op->remove_callback(item);
344 shrink_if_needed(h);
345}
346
347/** Apply function to all items in hash table.
348 *
349 * @param h Hash table.
350 * @param f Function to be applied. Return false if no more items
351 * should be visited. The functor may only delete the supplied
352 * item. It must not delete the successor of the item passed
353 * in the first argument.
354 * @param arg Argument to be passed to the function.
355 */
356void hash_table_apply(hash_table_t *h, bool (*f)(ht_link_t *, void *), void *arg)
357{
358 assert(f);
359 assert(h && h->bucket);
360
361 if (h->item_cnt == 0)
362 return;
363
364 h->apply_ongoing = true;
365
366 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) {
367 list_foreach_safe(h->bucket[idx], cur, next) {
368 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
369 /*
370 * The next pointer had already been saved. f() may safely
371 * delete cur (but not next!).
372 */
373 if (!f(cur_link, arg))
374 goto out;
375 }
376 }
377out:
378 h->apply_ongoing = false;
379
380 shrink_if_needed(h);
381 grow_if_needed(h);
382}
383
384/** Rounds up size to the nearest suitable table size. */
385static size_t round_up_size(size_t size)
386{
387 size_t rounded_size = HT_MIN_BUCKETS;
388
389 while (rounded_size < size) {
390 rounded_size = 2 * rounded_size + 1;
391 }
392
393 return rounded_size;
394}
395
396/** Allocates and initializes the desired number of buckets. True if successful. */
397static bool alloc_table(size_t bucket_cnt, list_t **pbuckets)
398{
399 assert(pbuckets && HT_MIN_BUCKETS <= bucket_cnt);
400
401 list_t *buckets = malloc(bucket_cnt * sizeof(list_t));
402 if (!buckets)
403 return false;
404
405 for (size_t i = 0; i < bucket_cnt; i++)
406 list_initialize(&buckets[i]);
407
408 *pbuckets = buckets;
409 return true;
410}
411
412/** Shrinks the table if the table is only sparely populated. */
413static inline void shrink_if_needed(hash_table_t *h)
414{
415 if (h->item_cnt <= h->full_item_cnt / 4 && HT_MIN_BUCKETS < h->bucket_cnt) {
416 /*
417 * Keep the bucket_cnt odd (possibly also prime).
418 * Shrink from 2n + 1 to n. Integer division discards the +1.
419 */
420 size_t new_bucket_cnt = h->bucket_cnt / 2;
421 resize(h, new_bucket_cnt);
422 }
423}
424
425/** Grows the table if table load exceeds the maximum allowed. */
426static inline void grow_if_needed(hash_table_t *h)
427{
428 /* Grow the table if the average bucket load exceeds the maximum. */
429 if (h->full_item_cnt < h->item_cnt) {
430 /* Keep the bucket_cnt odd (possibly also prime). */
431 size_t new_bucket_cnt = 2 * h->bucket_cnt + 1;
432 resize(h, new_bucket_cnt);
433 }
434}
435
436/** Allocates and rehashes items to a new table. Frees the old table. */
437static void resize(hash_table_t *h, size_t new_bucket_cnt)
438{
439 assert(h && h->bucket);
440 assert(HT_MIN_BUCKETS <= new_bucket_cnt);
441
442 /* We are traversing the table and resizing would mess up the buckets. */
443 if (h->apply_ongoing)
444 return;
445
446 list_t *new_buckets;
447
448 /* Leave the table as is if we cannot resize. */
449 if (!alloc_table(new_bucket_cnt, &new_buckets))
450 return;
451
452 if (0 < h->item_cnt) {
453 /* Rehash all the items to the new table. */
454 for (size_t old_idx = 0; old_idx < h->bucket_cnt; ++old_idx) {
455 list_foreach_safe(h->bucket[old_idx], cur, next) {
456 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
457
458 size_t new_idx = h->op->hash(cur_link) % new_bucket_cnt;
459 list_remove(cur);
460 list_append(cur, &new_buckets[new_idx]);
461 }
462 }
463 }
464
465 free(h->bucket);
466 h->bucket = new_buckets;
467 h->bucket_cnt = new_bucket_cnt;
468 h->full_item_cnt = h->max_load * h->bucket_cnt;
469}
470
471/** @}
472 */
Note: See TracBrowser for help on using the repository browser.