Changeset 4c53333 in mainline for uspace/lib/c/generic/adt/hash_table.c
- Timestamp:
- 2013-07-11T08:21:10Z (13 years ago)
- Branches:
- lfn, master, serial, ticket/834-toolchain-update, topic/msim-upgrade, topic/simplify-dev-export
- Children:
- 64e63ce1
- Parents:
- 80445cf (diff), c8bb1633 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)links above to see all the changes relative to each parent. - File:
-
- 1 edited
-
uspace/lib/c/generic/adt/hash_table.c (modified) (7 diffs)
Legend:
- Unmodified
- Added
- Removed
-
uspace/lib/c/generic/adt/hash_table.c
r80445cf r4c53333 1 1 /* 2 2 * Copyright (c) 2008 Jakub Jermar 3 * Copyright (c) 2012 Adam Hraska 4 * 3 5 * All rights reserved. 4 6 * … … 34 36 35 37 /* 36 * This is an implementation of generic chained hash table. 38 * This is an implementation of a generic resizable chained hash table. 39 * 40 * The table grows to 2*n+1 buckets each time, starting at n == 89, 41 * per Thomas Wang's recommendation: 42 * http://www.concentric.net/~Ttwang/tech/hashsize.htm 43 * 44 * This policy produces prime table sizes for the first five resizes 45 * and generally produces table sizes which are either prime or 46 * have fairly large (prime/odd) divisors. Having a prime table size 47 * mitigates the use of suboptimal hash functions and distributes 48 * items over the whole table. 37 49 */ 38 50 … … 44 56 #include <str.h> 45 57 58 /* Optimal initial bucket count. See comment above. */ 59 #define HT_MIN_BUCKETS 89 60 /* The table is resized when the average load per bucket exceeds this number. */ 61 #define HT_MAX_LOAD 2 62 63 64 static size_t round_up_size(size_t); 65 static bool alloc_table(size_t, list_t **); 66 static void clear_items(hash_table_t *); 67 static void resize(hash_table_t *, size_t); 68 static void grow_if_needed(hash_table_t *); 69 static void shrink_if_needed(hash_table_t *); 70 71 /* Dummy do nothing callback to invoke in place of remove_callback == NULL. */ 72 static void nop_remove_callback(ht_link_t *item) 73 { 74 /* no-op */ 75 } 76 77 46 78 /** Create chained hash table. 47 79 * 48 80 * @param h Hash table structure. Will be initialized by this call. 49 * @param m Number of hash table buckets. 50 * @param max_keys Maximal number of keys needed to identify an item. 51 * @param op Hash table operations structure. 81 * @param init_size Initial desired number of hash table buckets. Pass zero 82 * if you want the default initial size. 83 * @param max_load The table is resized when the average load per bucket 84 * exceeds this number. Pass zero if you want the default. 85 * @param op Hash table operations structure. remove_callback() 86 * is optional and can be NULL if no action is to be taken 87 * upon removal. equal() is optional if and only if 88 * hash_table_insert_unique() will never be invoked. 89 * All other operations are mandatory. 52 90 * 53 91 * @return True on success 54 92 * 55 93 */ 56 bool hash_table_create(hash_table_t *h, hash_count_t m, hash_count_t max_keys,57 hash_table_op erations_t *op)94 bool hash_table_create(hash_table_t *h, size_t init_size, size_t max_load, 95 hash_table_ops_t *op) 58 96 { 59 97 assert(h); 60 assert(op && op->hash && op->compare); 61 assert(max_keys > 0); 62 63 h->entry = malloc(m * sizeof(list_t)); 64 if (!h->entry) 98 assert(op && op->hash && op->key_hash && op->key_equal); 99 100 /* Check for compulsory ops. */ 101 if (!op || !op->hash || !op->key_hash || !op->key_equal) 65 102 return false; 66 103 67 memset((void *) h->entry, 0, m * sizeof(list_t)); 68 69 hash_count_t i; 70 for (i = 0; i < m; i++) 71 list_initialize(&h->entry[i]); 72 73 h->entries = m; 74 h->max_keys = max_keys; 104 h->bucket_cnt = round_up_size(init_size); 105 106 if (!alloc_table(h->bucket_cnt, &h->bucket)) 107 return false; 108 109 h->max_load = (max_load == 0) ? HT_MAX_LOAD : max_load; 110 h->item_cnt = 0; 75 111 h->op = op; 112 h->full_item_cnt = h->max_load * h->bucket_cnt; 113 h->apply_ongoing = false; 114 115 if (h->op->remove_callback == NULL) { 116 h->op->remove_callback = nop_remove_callback; 117 } 76 118 77 119 return true; 78 120 } 79 121 122 /** Destroy a hash table instance. 123 * 124 * @param h Hash table to be destroyed. 125 * 126 */ 127 void hash_table_destroy(hash_table_t *h) 128 { 129 assert(h && h->bucket); 130 assert(!h->apply_ongoing); 131 132 clear_items(h); 133 134 free(h->bucket); 135 136 h->bucket = 0; 137 h->bucket_cnt = 0; 138 } 139 140 /** Returns true if there are no items in the table. */ 141 bool hash_table_empty(hash_table_t *h) 142 { 143 assert(h && h->bucket); 144 return h->item_cnt == 0; 145 } 146 147 /** Returns the number of items in the table. */ 148 size_t hash_table_size(hash_table_t *h) 149 { 150 assert(h && h->bucket); 151 return h->item_cnt; 152 } 153 80 154 /** Remove all elements from the hash table 81 155 * … … 84 158 void hash_table_clear(hash_table_t *h) 85 159 { 86 for (hash_count_t chain = 0; chain < h->entries; ++chain) { 87 link_t *cur; 88 link_t *next; 89 90 for (cur = h->entry[chain].head.next; 91 cur != &h->entry[chain].head; 92 cur = next) { 93 next = cur->next; 160 assert(h && h->bucket); 161 assert(!h->apply_ongoing); 162 163 clear_items(h); 164 165 /* Shrink the table to its minimum size if possible. */ 166 if (HT_MIN_BUCKETS < h->bucket_cnt) { 167 resize(h, HT_MIN_BUCKETS); 168 } 169 } 170 171 /** Unlinks and removes all items but does not resize. */ 172 static void clear_items(hash_table_t *h) 173 { 174 if (h->item_cnt == 0) 175 return; 176 177 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) { 178 list_foreach_safe(h->bucket[idx], cur, next) { 179 assert(cur); 180 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link); 181 94 182 list_remove(cur); 95 h->op->remove_callback(cur); 96 } 97 } 98 } 99 100 /** Destroy a hash table instance. 101 * 102 * @param h Hash table to be destroyed. 103 * 104 */ 105 void hash_table_destroy(hash_table_t *h) 106 { 107 assert(h); 108 assert(h->entry); 109 110 free(h->entry); 183 h->op->remove_callback(cur_link); 184 } 185 } 186 187 h->item_cnt = 0; 111 188 } 112 189 … … 117 194 * @param item Item to be inserted into the hash table. 118 195 */ 119 void hash_table_insert(hash_table_t *h, unsigned long key[],link_t *item)196 void hash_table_insert(hash_table_t *h, ht_link_t *item) 120 197 { 121 198 assert(item); 122 assert(h && h->op && h->op->hash && h->op->compare); 123 124 hash_index_t chain = h->op->hash(key); 125 assert(chain < h->entries); 126 127 list_append(item, &h->entry[chain]); 199 assert(h && h->bucket); 200 assert(!h->apply_ongoing); 201 202 size_t idx = h->op->hash(item) % h->bucket_cnt; 203 204 list_append(&item->link, &h->bucket[idx]); 205 ++h->item_cnt; 206 grow_if_needed(h); 207 } 208 209 210 /** Insert item into a hash table if not already present. 211 * 212 * @param h Hash table. 213 * @param key Array of all keys necessary to compute hash index. 214 * @param item Item to be inserted into the hash table. 215 * 216 * @return False if such an item had already been inserted. 217 * @return True if the inserted item was the only item with such a lookup key. 218 */ 219 bool hash_table_insert_unique(hash_table_t *h, ht_link_t *item) 220 { 221 assert(item); 222 assert(h && h->bucket && h->bucket_cnt); 223 assert(h->op && h->op->hash && h->op->equal); 224 assert(!h->apply_ongoing); 225 226 size_t idx = h->op->hash(item) % h->bucket_cnt; 227 228 /* Check for duplicates. */ 229 list_foreach(h->bucket[idx], cur) { 230 /* 231 * We could filter out items using their hashes first, but 232 * calling equal() might very well be just as fast. 233 */ 234 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link); 235 if (h->op->equal(cur_link, item)) 236 return false; 237 } 238 239 list_append(&item->link, &h->bucket[idx]); 240 ++h->item_cnt; 241 grow_if_needed(h); 242 243 return true; 128 244 } 129 245 … … 136 252 * 137 253 */ 138 link_t *hash_table_find(hash_table_t *h, unsigned long key[]) 139 { 140 assert(h && h->op && h->op->hash && h->op->compare); 141 142 hash_index_t chain = h->op->hash(key); 143 assert(chain < h->entries); 144 145 list_foreach(h->entry[chain], cur) { 146 if (h->op->compare(key, h->max_keys, cur)) { 147 /* 148 * The entry is there. 149 */ 150 return cur; 151 } 152 } 153 254 ht_link_t *hash_table_find(const hash_table_t *h, void *key) 255 { 256 assert(h && h->bucket); 257 258 size_t idx = h->op->key_hash(key) % h->bucket_cnt; 259 260 list_foreach(h->bucket[idx], cur) { 261 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link); 262 /* 263 * Is this is the item we are looking for? We could have first 264 * checked if the hashes match but op->key_equal() may very well be 265 * just as fast as op->hash(). 266 */ 267 if (h->op->key_equal(key, cur_link)) { 268 return cur_link; 269 } 270 } 271 272 return NULL; 273 } 274 275 /** Find the next item equal to item. */ 276 ht_link_t *hash_table_find_next(const hash_table_t *h, ht_link_t *item) 277 { 278 assert(item); 279 assert(h && h->bucket); 280 281 /* Traverse the circular list until we reach the starting item again. */ 282 for (link_t *cur = item->link.next; cur != &item->link; cur = cur->next) { 283 assert(cur); 284 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link); 285 /* 286 * Is this is the item we are looking for? We could have first 287 * checked if the hashes match but op->equal() may very well be 288 * just as fast as op->hash(). 289 */ 290 if (h->op->equal(cur_link, item)) { 291 return cur_link; 292 } 293 } 294 154 295 return NULL; 155 296 } … … 163 304 * the hash table. 164 305 * @param keys Number of keys in the 'key' array. 165 * 166 */ 167 void hash_table_remove(hash_table_t *h, unsigned long key[], hash_count_t keys) 168 { 169 assert(h && h->op && h->op->hash && h->op->compare && 170 h->op->remove_callback); 171 assert(keys <= h->max_keys); 172 173 if (keys == h->max_keys) { 174 /* 175 * All keys are known, hash_table_find() can be used to find the 176 * entry. 306 * 307 * @return Returns the number of removed items. 308 */ 309 size_t hash_table_remove(hash_table_t *h, void *key) 310 { 311 assert(h && h->bucket); 312 assert(!h->apply_ongoing); 313 314 size_t idx = h->op->key_hash(key) % h->bucket_cnt; 315 316 size_t removed = 0; 317 318 list_foreach_safe(h->bucket[idx], cur, next) { 319 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link); 320 321 if (h->op->key_equal(key, cur_link)) { 322 ++removed; 323 list_remove(cur); 324 h->op->remove_callback(cur_link); 325 } 326 } 327 328 h->item_cnt -= removed; 329 shrink_if_needed(h); 330 331 return removed; 332 } 333 334 /** Removes an item already present in the table. The item must be in the table.*/ 335 void hash_table_remove_item(hash_table_t *h, ht_link_t *item) 336 { 337 assert(item); 338 assert(h && h->bucket); 339 assert(link_in_use(&item->link)); 340 341 list_remove(&item->link); 342 --h->item_cnt; 343 h->op->remove_callback(item); 344 shrink_if_needed(h); 345 } 346 347 /** Apply function to all items in hash table. 348 * 349 * @param h Hash table. 350 * @param f Function to be applied. Return false if no more items 351 * should be visited. The functor may only delete the supplied 352 * item. It must not delete the successor of the item passed 353 * in the first argument. 354 * @param arg Argument to be passed to the function. 355 */ 356 void hash_table_apply(hash_table_t *h, bool (*f)(ht_link_t *, void *), void *arg) 357 { 358 assert(f); 359 assert(h && h->bucket); 360 361 if (h->item_cnt == 0) 362 return; 363 364 h->apply_ongoing = true; 365 366 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) { 367 list_foreach_safe(h->bucket[idx], cur, next) { 368 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link); 369 /* 370 * The next pointer had already been saved. f() may safely 371 * delete cur (but not next!). 372 */ 373 if (!f(cur_link, arg)) 374 return; 375 } 376 } 377 378 h->apply_ongoing = false; 379 380 shrink_if_needed(h); 381 grow_if_needed(h); 382 } 383 384 /** Rounds up size to the nearest suitable table size. */ 385 static size_t round_up_size(size_t size) 386 { 387 size_t rounded_size = HT_MIN_BUCKETS; 388 389 while (rounded_size < size) { 390 rounded_size = 2 * rounded_size + 1; 391 } 392 393 return rounded_size; 394 } 395 396 /** Allocates and initializes the desired number of buckets. True if successful.*/ 397 static bool alloc_table(size_t bucket_cnt, list_t **pbuckets) 398 { 399 assert(pbuckets && HT_MIN_BUCKETS <= bucket_cnt); 400 401 list_t *buckets = malloc(bucket_cnt * sizeof(list_t)); 402 if (!buckets) 403 return false; 404 405 for (size_t i = 0; i < bucket_cnt; i++) 406 list_initialize(&buckets[i]); 407 408 *pbuckets = buckets; 409 return true; 410 } 411 412 413 /** Shrinks the table if the table is only sparely populated. */ 414 static inline void shrink_if_needed(hash_table_t *h) 415 { 416 if (h->item_cnt <= h->full_item_cnt / 4 && HT_MIN_BUCKETS < h->bucket_cnt) { 417 /* 418 * Keep the bucket_cnt odd (possibly also prime). 419 * Shrink from 2n + 1 to n. Integer division discards the +1. 177 420 */ 178 179 link_t *cur = hash_table_find(h, key); 180 if (cur) { 181 list_remove(cur); 182 h->op->remove_callback(cur); 183 } 184 421 size_t new_bucket_cnt = h->bucket_cnt / 2; 422 resize(h, new_bucket_cnt); 423 } 424 } 425 426 /** Grows the table if table load exceeds the maximum allowed. */ 427 static inline void grow_if_needed(hash_table_t *h) 428 { 429 /* Grow the table if the average bucket load exceeds the maximum. */ 430 if (h->full_item_cnt < h->item_cnt) { 431 /* Keep the bucket_cnt odd (possibly also prime). */ 432 size_t new_bucket_cnt = 2 * h->bucket_cnt + 1; 433 resize(h, new_bucket_cnt); 434 } 435 } 436 437 /** Allocates and rehashes items to a new table. Frees the old table. */ 438 static void resize(hash_table_t *h, size_t new_bucket_cnt) 439 { 440 assert(h && h->bucket); 441 assert(HT_MIN_BUCKETS <= new_bucket_cnt); 442 443 /* We are traversing the table and resizing would mess up the buckets. */ 444 if (h->apply_ongoing) 185 445 return; 186 } 187 188 /* 189 * Fewer keys were passed. 190 * Any partially matching entries are to be removed. 191 */ 192 hash_index_t chain; 193 for (chain = 0; chain < h->entries; chain++) { 194 for (link_t *cur = h->entry[chain].head.next; 195 cur != &h->entry[chain].head; 196 cur = cur->next) { 197 if (h->op->compare(key, keys, cur)) { 198 link_t *hlp; 199 200 hlp = cur; 201 cur = cur->prev; 202 203 list_remove(hlp); 204 h->op->remove_callback(hlp); 205 206 continue; 446 447 list_t *new_buckets; 448 449 /* Leave the table as is if we cannot resize. */ 450 if (!alloc_table(new_bucket_cnt, &new_buckets)) 451 return; 452 453 if (0 < h->item_cnt) { 454 /* Rehash all the items to the new table. */ 455 for (size_t old_idx = 0; old_idx < h->bucket_cnt; ++old_idx) { 456 list_foreach_safe(h->bucket[old_idx], cur, next) { 457 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link); 458 459 size_t new_idx = h->op->hash(cur_link) % new_bucket_cnt; 460 list_remove(cur); 461 list_append(cur, &new_buckets[new_idx]); 207 462 } 208 463 } 209 464 } 210 } 211 212 /** Apply function to all items in hash table. 213 * 214 * @param h Hash table. 215 * @param f Function to be applied. 216 * @param arg Argument to be passed to the function. 217 * 218 */ 219 void hash_table_apply(hash_table_t *h, void (*f)(link_t *, void *), void *arg) 220 { 221 for (hash_index_t bucket = 0; bucket < h->entries; bucket++) { 222 link_t *cur; 223 link_t *next; 224 225 for (cur = h->entry[bucket].head.next; cur != &h->entry[bucket].head; 226 cur = next) { 227 /* 228 * The next pointer must be stored prior to the functor 229 * call to allow using destructor as the functor (the 230 * free function could overwrite the cur->next pointer). 231 */ 232 next = cur->next; 233 f(cur, arg); 234 } 235 } 236 } 465 466 free(h->bucket); 467 h->bucket = new_buckets; 468 h->bucket_cnt = new_bucket_cnt; 469 h->full_item_cnt = h->max_load * h->bucket_cnt; 470 } 471 237 472 238 473 /** @}
Note:
See TracChangeset
for help on using the changeset viewer.
