Changeset 0db0df2 in mainline for common/include


Ignore:
Timestamp:
2025-04-07T17:53:53Z (3 months ago)
Author:
Jiří Zárevúcky <zarevucky.jiri@…>
Branches:
master
Children:
45226285, 93de384
Parents:
8f8818ac
git-author:
Jiří Zárevúcky <zarevucky.jiri@…> (2025-04-07 16:41:57)
git-committer:
Jiří Zárevúcky <zarevucky.jiri@…> (2025-04-07 17:53:53)
Message:

Hash table improvements

Implement hash_table_foreach macro, analogous to list_foreach.

Remove superfluous argument to hash_table_find_next().
(If the user needs to recheck the part of the list already
checked by hash_table_find(), they can just rerun that function.)

Add hash argument to hash_table_ops_t::key_equal.
The big change here is that users with big keys can store the hash
value alongside key in their entries, and for the low low cost of
sizeof(size_t) bytes eliminate a bunch of expensive key comparisons.

Also added a hash function for strings and arbitrary data.
Found this one by asking ChatGPT, because the latency of accesses
to my book collection is currently a couple of hours.

+ Some drive-by unused #include removal.

Location:
common/include/adt
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • common/include/adt/hash.h

    r8f8818ac r0db0df2  
    108108}
    109109
     110/** Hash a NUL-terminated string.
     111 * The algorithm may change in the future, so never use it for hashes
     112 * that will be stored to a file or sent over a network.
     113 */
     114static inline size_t hash_string(const char *str)
     115{
     116        /* djb2 hash + extra mixing at the end */
     117
     118        char c;
     119        size_t hash = 5381;
     120
     121        while ((c = *(str++)))
     122                hash = (hash << 5) + hash + c;
     123
     124        return hash_mix(hash);
     125}
     126
     127/** Hash an arbitrarily sized sequence of bytes.
     128 * The algorithm may change in the future, so never use it for hashes
     129 * that will be stored to a file or sent over a network.
     130 */
     131static inline size_t hash_bytes(const void *b, size_t len)
     132{
     133        /* djb2 hash + extra mixing at the end */
     134
     135        // TODO: work in bigger chunks for faster hashing
     136
     137        const char *str = b;
     138
     139        size_t hash = 5381;
     140
     141        for (size_t i = 0; i < len; i++)
     142                hash = (hash << 5) + hash + str[i];
     143
     144        return hash_mix(hash);
     145}
     146
    110147#endif
  • common/include/adt/hash_table.h

    r8f8818ac r0db0df2  
    6060
    6161        /** Returns true if the key is equal to the item's lookup key. */
    62         bool (*key_equal)(const void *key, const ht_link_t *item);
     62        bool (*key_equal)(const void *key, size_t hash, const ht_link_t *item);
    6363
    6464        /** Hash table item removal callback.
     
    8585        member_to_inst((item), type, member)
    8686
     87#define hash_table_foreach(ht, key, member, itype, iterator) \
     88        for (itype *iterator = NULL; \
     89            iterator == NULL; iterator = (itype *) INTPTR_MAX) \
     90                for (ht_link_t *__link = hash_table_find((ht), (key)); \
     91                    __link != NULL && ((iterator = member_to_inst(__link, itype, member))); \
     92                        __link = hash_table_find_next((ht), __link))
     93
    8794extern bool hash_table_create(hash_table_t *, size_t, size_t,
    8895    const hash_table_ops_t *);
     
    96103extern bool hash_table_insert_unique(hash_table_t *, ht_link_t *);
    97104extern ht_link_t *hash_table_find(const hash_table_t *, const void *);
    98 extern ht_link_t *hash_table_find_next(const hash_table_t *, ht_link_t *,
    99     ht_link_t *);
     105extern ht_link_t *hash_table_find_next(const hash_table_t *, ht_link_t *);
    100106extern size_t hash_table_remove(hash_table_t *, const void *);
    101107extern void hash_table_remove_item(hash_table_t *, ht_link_t *);
Note: See TracChangeset for help on using the changeset viewer.