Changeset 0db0df2 in mainline for common


Ignore:
Timestamp:
2025-04-07T17:53:53Z (3 months ago)
Author:
Jiří Zárevúcky <zarevucky.jiri@…>
Branches:
master
Children:
45226285, 93de384
Parents:
8f8818ac
git-author:
Jiří Zárevúcky <zarevucky.jiri@…> (2025-04-07 16:41:57)
git-committer:
Jiří Zárevúcky <zarevucky.jiri@…> (2025-04-07 17:53:53)
Message:

Hash table improvements

Implement hash_table_foreach macro, analogous to list_foreach.

Remove superfluous argument to hash_table_find_next().
(If the user needs to recheck the part of the list already
checked by hash_table_find(), they can just rerun that function.)

Add hash argument to hash_table_ops_t::key_equal.
The big change here is that users with big keys can store the hash
value alongside key in their entries, and for the low low cost of
sizeof(size_t) bytes eliminate a bunch of expensive key comparisons.

Also added a hash function for strings and arbitrary data.
Found this one by asking ChatGPT, because the latency of accesses
to my book collection is currently a couple of hours.

+ Some drive-by unused #include removal.

Location:
common
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • common/adt/hash_table.c

    r8f8818ac r0db0df2  
    247247        assert(h && h->bucket);
    248248
    249         size_t idx = h->op->key_hash(key) % h->bucket_cnt;
     249        size_t hash = h->op->key_hash(key);
     250        size_t idx = hash % h->bucket_cnt;
    250251
    251252        list_foreach(h->bucket[idx], link, ht_link_t, cur_link) {
    252                 /*
    253                  * Is this is the item we are looking for? We could have first
    254                  * checked if the hashes match but op->key_equal() may very well be
    255                  * just as fast as op->hash().
    256                  */
    257                 if (h->op->key_equal(key, cur_link)) {
     253                if (h->op->key_equal(key, hash, cur_link))
    258254                        return cur_link;
    259                 }
    260255        }
    261256
     
    265260/** Find the next item equal to item. */
    266261ht_link_t *
    267 hash_table_find_next(const hash_table_t *h, ht_link_t *first, ht_link_t *item)
     262hash_table_find_next(const hash_table_t *h, ht_link_t *item)
    268263{
    269264        assert(item);
     
    271266
    272267        size_t idx = h->op->hash(item) % h->bucket_cnt;
    273 
    274         /* Traverse the circular list until we reach the starting item again. */
    275         for (link_t *cur = item->link.next; cur != &first->link;
    276             cur = cur->next) {
    277                 assert(cur);
    278 
    279                 if (cur == &h->bucket[idx].head)
    280                         continue;
    281 
     268        list_t *list = &h->bucket[idx];
     269        link_t *cur = list_next(&item->link, list);
     270
     271        /* Traverse the list until we reach its end. */
     272        for (; cur != NULL; cur = list_next(cur, list)) {
    282273                ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
    283                 /*
    284                  * Is this is the item we are looking for? We could have first
    285                  * checked if the hashes match but op->equal() may very well be
    286                  * just as fast as op->hash().
    287                  */
    288                 if (h->op->equal(cur_link, item)) {
     274
     275                if (h->op->equal(cur_link, item))
    289276                        return cur_link;
    290                 }
    291277        }
    292278
     
    309295        assert(!h->apply_ongoing);
    310296
    311         size_t idx = h->op->key_hash(key) % h->bucket_cnt;
     297        size_t hash = h->op->key_hash(key);
     298        size_t idx = hash % h->bucket_cnt;
    312299
    313300        size_t removed = 0;
     
    316303                ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
    317304
    318                 if (h->op->key_equal(key, cur_link)) {
     305                if (h->op->key_equal(key, hash, cur_link)) {
    319306                        ++removed;
    320307                        list_remove(cur);
  • common/include/adt/hash.h

    r8f8818ac r0db0df2  
    108108}
    109109
     110/** Hash a NUL-terminated string.
     111 * The algorithm may change in the future, so never use it for hashes
     112 * that will be stored to a file or sent over a network.
     113 */
     114static inline size_t hash_string(const char *str)
     115{
     116        /* djb2 hash + extra mixing at the end */
     117
     118        char c;
     119        size_t hash = 5381;
     120
     121        while ((c = *(str++)))
     122                hash = (hash << 5) + hash + c;
     123
     124        return hash_mix(hash);
     125}
     126
     127/** Hash an arbitrarily sized sequence of bytes.
     128 * The algorithm may change in the future, so never use it for hashes
     129 * that will be stored to a file or sent over a network.
     130 */
     131static inline size_t hash_bytes(const void *b, size_t len)
     132{
     133        /* djb2 hash + extra mixing at the end */
     134
     135        // TODO: work in bigger chunks for faster hashing
     136
     137        const char *str = b;
     138
     139        size_t hash = 5381;
     140
     141        for (size_t i = 0; i < len; i++)
     142                hash = (hash << 5) + hash + str[i];
     143
     144        return hash_mix(hash);
     145}
     146
    110147#endif
  • common/include/adt/hash_table.h

    r8f8818ac r0db0df2  
    6060
    6161        /** Returns true if the key is equal to the item's lookup key. */
    62         bool (*key_equal)(const void *key, const ht_link_t *item);
     62        bool (*key_equal)(const void *key, size_t hash, const ht_link_t *item);
    6363
    6464        /** Hash table item removal callback.
     
    8585        member_to_inst((item), type, member)
    8686
     87#define hash_table_foreach(ht, key, member, itype, iterator) \
     88        for (itype *iterator = NULL; \
     89            iterator == NULL; iterator = (itype *) INTPTR_MAX) \
     90                for (ht_link_t *__link = hash_table_find((ht), (key)); \
     91                    __link != NULL && ((iterator = member_to_inst(__link, itype, member))); \
     92                        __link = hash_table_find_next((ht), __link))
     93
    8794extern bool hash_table_create(hash_table_t *, size_t, size_t,
    8895    const hash_table_ops_t *);
     
    96103extern bool hash_table_insert_unique(hash_table_t *, ht_link_t *);
    97104extern ht_link_t *hash_table_find(const hash_table_t *, const void *);
    98 extern ht_link_t *hash_table_find_next(const hash_table_t *, ht_link_t *,
    99     ht_link_t *);
     105extern ht_link_t *hash_table_find_next(const hash_table_t *, ht_link_t *);
    100106extern size_t hash_table_remove(hash_table_t *, const void *);
    101107extern void hash_table_remove_item(hash_table_t *, ht_link_t *);
Note: See TracChangeset for help on using the changeset viewer.