source: mainline/kernel/generic/src/adt/hash_table.c@ df6ded8

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since df6ded8 was 82cbf8c6, checked in by Jakub Jermar <jakub@…>, 8 years ago

Replace the old hash table implementation in the kernel with the newer one

This replaces the original hash table implementation with the resizable one
already used in uspace. Along the way, the IRQ hash table code was streamlined
and cleaned up.

  • Property mode set to 100644
File size: 12.4 KB
Line 
1/*
2 * Copyright (c) 2008 Jakub Jermar
3 * Copyright (c) 2012 Adam Hraska
4 *
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * - Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * - The name of the author may not be used to endorse or promote products
17 * derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31/** @addtogroup libc
32 * @{
33 */
34/** @file
35 */
36
37/*
38 * This is an implementation of a generic resizable chained hash table.
39 *
40 * The table grows to 2*n+1 buckets each time, starting at n == 89,
41 * per Thomas Wang's recommendation:
42 * http://www.concentric.net/~Ttwang/tech/hashsize.htm
43 *
44 * This policy produces prime table sizes for the first five resizes
45 * and generally produces table sizes which are either prime or
46 * have fairly large (prime/odd) divisors. Having a prime table size
47 * mitigates the use of suboptimal hash functions and distributes
48 * items over the whole table.
49 */
50
51#include <adt/hash_table.h>
52#include <adt/list.h>
53#include <mm/slab.h>
54#include <assert.h>
55#include <str.h>
56
57/* Optimal initial bucket count. See comment above. */
58#define HT_MIN_BUCKETS 89
59/* The table is resized when the average load per bucket exceeds this number. */
60#define HT_MAX_LOAD 2
61
62
63static size_t round_up_size(size_t);
64static bool alloc_table(size_t, list_t **);
65static void clear_items(hash_table_t *);
66static void resize(hash_table_t *, size_t);
67static void grow_if_needed(hash_table_t *);
68static void shrink_if_needed(hash_table_t *);
69
70/* Dummy do nothing callback to invoke in place of remove_callback == NULL. */
71static void nop_remove_callback(ht_link_t *item)
72{
73 /* no-op */
74}
75
76
77/** Create chained hash table.
78 *
79 * @param h Hash table structure. Will be initialized by this call.
80 * @param init_size Initial desired number of hash table buckets. Pass zero
81 * if you want the default initial size.
82 * @param max_load The table is resized when the average load per bucket
83 * exceeds this number. Pass zero if you want the default.
84 * @param op Hash table operations structure. remove_callback()
85 * is optional and can be NULL if no action is to be taken
86 * upon removal. equal() is optional if and only if
87 * hash_table_insert_unique() will never be invoked.
88 * All other operations are mandatory.
89 *
90 * @return True on success
91 *
92 */
93bool hash_table_create(hash_table_t *h, size_t init_size, size_t max_load,
94 hash_table_ops_t *op)
95{
96 assert(h);
97 assert(op && op->hash && op->key_hash && op->key_equal);
98
99 /* Check for compulsory ops. */
100 if (!op || !op->hash || !op->key_hash || !op->key_equal)
101 return false;
102
103 h->bucket_cnt = round_up_size(init_size);
104
105 if (!alloc_table(h->bucket_cnt, &h->bucket))
106 return false;
107
108 h->max_load = (max_load == 0) ? HT_MAX_LOAD : max_load;
109 h->item_cnt = 0;
110 h->op = op;
111 h->full_item_cnt = h->max_load * h->bucket_cnt;
112 h->apply_ongoing = false;
113
114 if (h->op->remove_callback == NULL) {
115 h->op->remove_callback = nop_remove_callback;
116 }
117
118 return true;
119}
120
121/** Destroy a hash table instance.
122 *
123 * @param h Hash table to be destroyed.
124 *
125 */
126void hash_table_destroy(hash_table_t *h)
127{
128 assert(h && h->bucket);
129 assert(!h->apply_ongoing);
130
131 clear_items(h);
132
133 free(h->bucket);
134
135 h->bucket = NULL;
136 h->bucket_cnt = 0;
137}
138
139/** Returns true if there are no items in the table. */
140bool hash_table_empty(hash_table_t *h)
141{
142 assert(h && h->bucket);
143 return h->item_cnt == 0;
144}
145
146/** Returns the number of items in the table. */
147size_t hash_table_size(hash_table_t *h)
148{
149 assert(h && h->bucket);
150 return h->item_cnt;
151}
152
153/** Remove all elements from the hash table
154 *
155 * @param h Hash table to be cleared
156 */
157void hash_table_clear(hash_table_t *h)
158{
159 assert(h && h->bucket);
160 assert(!h->apply_ongoing);
161
162 clear_items(h);
163
164 /* Shrink the table to its minimum size if possible. */
165 if (HT_MIN_BUCKETS < h->bucket_cnt) {
166 resize(h, HT_MIN_BUCKETS);
167 }
168}
169
170/** Unlinks and removes all items but does not resize. */
171static void clear_items(hash_table_t *h)
172{
173 if (h->item_cnt == 0)
174 return;
175
176 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) {
177 list_foreach_safe(h->bucket[idx], cur, next) {
178 assert(cur);
179 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
180
181 list_remove(cur);
182 h->op->remove_callback(cur_link);
183 }
184 }
185
186 h->item_cnt = 0;
187}
188
189/** Insert item into a hash table.
190 *
191 * @param h Hash table.
192 * @param item Item to be inserted into the hash table.
193 */
194void hash_table_insert(hash_table_t *h, ht_link_t *item)
195{
196 assert(item);
197 assert(h && h->bucket);
198 assert(!h->apply_ongoing);
199
200 size_t idx = h->op->hash(item) % h->bucket_cnt;
201
202 list_append(&item->link, &h->bucket[idx]);
203 ++h->item_cnt;
204 grow_if_needed(h);
205}
206
207
208/** Insert item into a hash table if not already present.
209 *
210 * @param h Hash table.
211 * @param item Item to be inserted into the hash table.
212 *
213 * @return False if such an item had already been inserted.
214 * @return True if the inserted item was the only item with such a lookup key.
215 */
216bool hash_table_insert_unique(hash_table_t *h, ht_link_t *item)
217{
218 assert(item);
219 assert(h && h->bucket && h->bucket_cnt);
220 assert(h->op && h->op->hash && h->op->equal);
221 assert(!h->apply_ongoing);
222
223 size_t idx = h->op->hash(item) % h->bucket_cnt;
224
225 /* Check for duplicates. */
226 list_foreach(h->bucket[idx], link, ht_link_t, cur_link) {
227 /*
228 * We could filter out items using their hashes first, but
229 * calling equal() might very well be just as fast.
230 */
231 if (h->op->equal(cur_link, item))
232 return false;
233 }
234
235 list_append(&item->link, &h->bucket[idx]);
236 ++h->item_cnt;
237 grow_if_needed(h);
238
239 return true;
240}
241
242/** Search hash table for an item matching keys.
243 *
244 * @param h Hash table.
245 * @param key Array of all keys needed to compute hash index.
246 *
247 * @return Matching item on success, NULL if there is no such item.
248 *
249 */
250ht_link_t *hash_table_find(const hash_table_t *h, void *key)
251{
252 assert(h && h->bucket);
253
254 size_t idx = h->op->key_hash(key) % h->bucket_cnt;
255
256 list_foreach(h->bucket[idx], link, ht_link_t, cur_link) {
257 /*
258 * Is this is the item we are looking for? We could have first
259 * checked if the hashes match but op->key_equal() may very well be
260 * just as fast as op->hash().
261 */
262 if (h->op->key_equal(key, cur_link)) {
263 return cur_link;
264 }
265 }
266
267 return NULL;
268}
269
270/** Find the next item equal to item. */
271ht_link_t *hash_table_find_next(const hash_table_t *h, ht_link_t *item)
272{
273 assert(item);
274 assert(h && h->bucket);
275
276 /* Traverse the circular list until we reach the starting item again. */
277 for (link_t *cur = item->link.next; cur != &item->link; cur = cur->next) {
278 assert(cur);
279 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
280 /*
281 * Is this is the item we are looking for? We could have first
282 * checked if the hashes match but op->equal() may very well be
283 * just as fast as op->hash().
284 */
285 if (h->op->equal(cur_link, item)) {
286 return cur_link;
287 }
288 }
289
290 return NULL;
291}
292
293/** Remove all matching items from hash table.
294 *
295 * For each removed item, h->remove_callback() is called.
296 *
297 * @param h Hash table.
298 * @param key Array of keys that will be compared against items of
299 * the hash table.
300 *
301 * @return Returns the number of removed items.
302 */
303size_t hash_table_remove(hash_table_t *h, void *key)
304{
305 assert(h && h->bucket);
306 assert(!h->apply_ongoing);
307
308 size_t idx = h->op->key_hash(key) % h->bucket_cnt;
309
310 size_t removed = 0;
311
312 list_foreach_safe(h->bucket[idx], cur, next) {
313 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
314
315 if (h->op->key_equal(key, cur_link)) {
316 ++removed;
317 list_remove(cur);
318 h->op->remove_callback(cur_link);
319 }
320 }
321
322 h->item_cnt -= removed;
323 shrink_if_needed(h);
324
325 return removed;
326}
327
328/** Removes an item already present in the table. The item must be in the table.*/
329void hash_table_remove_item(hash_table_t *h, ht_link_t *item)
330{
331 assert(item);
332 assert(h && h->bucket);
333 assert(link_in_use(&item->link));
334
335 list_remove(&item->link);
336 --h->item_cnt;
337 h->op->remove_callback(item);
338 shrink_if_needed(h);
339}
340
341/** Apply function to all items in hash table.
342 *
343 * @param h Hash table.
344 * @param f Function to be applied. Return false if no more items
345 * should be visited. The functor may only delete the supplied
346 * item. It must not delete the successor of the item passed
347 * in the first argument.
348 * @param arg Argument to be passed to the function.
349 */
350void hash_table_apply(hash_table_t *h, bool (*f)(ht_link_t *, void *), void *arg)
351{
352 assert(f);
353 assert(h && h->bucket);
354
355 if (h->item_cnt == 0)
356 return;
357
358 h->apply_ongoing = true;
359
360 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) {
361 list_foreach_safe(h->bucket[idx], cur, next) {
362 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
363 /*
364 * The next pointer had already been saved. f() may safely
365 * delete cur (but not next!).
366 */
367 if (!f(cur_link, arg))
368 goto out;
369 }
370 }
371out:
372 h->apply_ongoing = false;
373
374 shrink_if_needed(h);
375 grow_if_needed(h);
376}
377
378/** Rounds up size to the nearest suitable table size. */
379static size_t round_up_size(size_t size)
380{
381 size_t rounded_size = HT_MIN_BUCKETS;
382
383 while (rounded_size < size) {
384 rounded_size = 2 * rounded_size + 1;
385 }
386
387 return rounded_size;
388}
389
390/** Allocates and initializes the desired number of buckets. True if successful.*/
391static bool alloc_table(size_t bucket_cnt, list_t **pbuckets)
392{
393 assert(pbuckets && HT_MIN_BUCKETS <= bucket_cnt);
394
395 list_t *buckets = malloc(bucket_cnt * sizeof(list_t), FRAME_ATOMIC);
396 if (!buckets)
397 return false;
398
399 for (size_t i = 0; i < bucket_cnt; i++)
400 list_initialize(&buckets[i]);
401
402 *pbuckets = buckets;
403 return true;
404}
405
406
407/** Shrinks the table if the table is only sparely populated. */
408static inline void shrink_if_needed(hash_table_t *h)
409{
410 if (h->item_cnt <= h->full_item_cnt / 4 && HT_MIN_BUCKETS < h->bucket_cnt) {
411 /*
412 * Keep the bucket_cnt odd (possibly also prime).
413 * Shrink from 2n + 1 to n. Integer division discards the +1.
414 */
415 size_t new_bucket_cnt = h->bucket_cnt / 2;
416 resize(h, new_bucket_cnt);
417 }
418}
419
420/** Grows the table if table load exceeds the maximum allowed. */
421static inline void grow_if_needed(hash_table_t *h)
422{
423 /* Grow the table if the average bucket load exceeds the maximum. */
424 if (h->full_item_cnt < h->item_cnt) {
425 /* Keep the bucket_cnt odd (possibly also prime). */
426 size_t new_bucket_cnt = 2 * h->bucket_cnt + 1;
427 resize(h, new_bucket_cnt);
428 }
429}
430
431/** Allocates and rehashes items to a new table. Frees the old table. */
432static void resize(hash_table_t *h, size_t new_bucket_cnt)
433{
434 assert(h && h->bucket);
435 assert(HT_MIN_BUCKETS <= new_bucket_cnt);
436
437 /* We are traversing the table and resizing would mess up the buckets. */
438 if (h->apply_ongoing)
439 return;
440
441 list_t *new_buckets;
442
443 /* Leave the table as is if we cannot resize. */
444 if (!alloc_table(new_bucket_cnt, &new_buckets))
445 return;
446
447 if (0 < h->item_cnt) {
448 /* Rehash all the items to the new table. */
449 for (size_t old_idx = 0; old_idx < h->bucket_cnt; ++old_idx) {
450 list_foreach_safe(h->bucket[old_idx], cur, next) {
451 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
452
453 size_t new_idx = h->op->hash(cur_link) % new_bucket_cnt;
454 list_remove(cur);
455 list_append(cur, &new_buckets[new_idx]);
456 }
457 }
458 }
459
460 free(h->bucket);
461 h->bucket = new_buckets;
462 h->bucket_cnt = new_bucket_cnt;
463 h->full_item_cnt = h->max_load * h->bucket_cnt;
464}
465
466
467/** @}
468 */
Note: See TracBrowser for help on using the repository browser.