source: mainline/common/include/adt/hash.h@ 0db0df2

Last change on this file since 0db0df2 was 0db0df2, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 3 months ago

Hash table improvements

Implement hash_table_foreach macro, analogous to list_foreach.

Remove superfluous argument to hash_table_find_next().
(If the user needs to recheck the part of the list already
checked by hash_table_find(), they can just rerun that function.)

Add hash argument to hash_table_ops_t::key_equal.
The big change here is that users with big keys can store the hash
value alongside key in their entries, and for the low low cost of
sizeof(size_t) bytes eliminate a bunch of expensive key comparisons.

Also added a hash function for strings and arbitrary data.
Found this one by asking ChatGPT, because the latency of accesses
to my book collection is currently a couple of hours.

+ Some drive-by unused #include removal.

  • Property mode set to 100644
File size: 4.2 KB
Line 
1/*
2 * Copyright (c) 2012 Adam Hraska
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup libc
30 * @{
31 */
32/** @file
33 */
34#ifndef _LIBC_ADT_HASH_H_
35#define _LIBC_ADT_HASH_H_
36
37#include <stddef.h>
38#include <stdint.h>
39
40/** Produces a uniform hash affecting all output bits from the skewed input. */
41static inline uint32_t hash_mix32(uint32_t hash)
42{
43 /*
44 * Thomas Wang's modification of Bob Jenkin's hash mixing function:
45 * http://www.concentric.net/~Ttwang/tech/inthash.htm
46 * Public domain.
47 */
48 hash = ~hash + (hash << 15);
49 hash = hash ^ (hash >> 12);
50 hash = hash + (hash << 2);
51 hash = hash ^ (hash >> 4);
52 hash = hash * 2057;
53 hash = hash ^ (hash >> 16);
54 return hash;
55}
56
57/** Produces a uniform hash affecting all output bits from the skewed input. */
58static inline uint64_t hash_mix64(uint64_t hash)
59{
60 /*
61 * Thomas Wang's public domain 64-bit hash mixing function:
62 * http://www.concentric.net/~Ttwang/tech/inthash.htm
63 */
64 hash = (hash ^ 61) ^ (hash >> 16);
65 hash = hash + (hash << 3);
66 hash = hash ^ (hash >> 4);
67 hash = hash * 0x27d4eb2d;
68 hash = hash ^ (hash >> 15);
69 /*
70 * Lower order bits are mixed more thoroughly. Swap them with
71 * the higher order bits and make the resulting higher order bits
72 * more usable.
73 */
74 return (hash << 32) | (hash >> 32);
75}
76
77/** Produces a uniform hash affecting all output bits from the skewed input. */
78static inline size_t hash_mix(size_t hash)
79{
80 if (sizeof(long) == 4)
81 return hash_mix32(hash);
82 else
83 return hash_mix64(hash);
84}
85
86/** Use to create a hash from multiple values.
87 *
88 * Typical usage:
89 * @code
90 * int car_id;
91 * bool car_convertible;
92 * // ..
93 * size_t hash = 0;
94 * hash = hash_combine(hash, car_id);
95 * hash = hash_combine(hash, car_convertible);
96 * // Now use hash as a hash of both car_id and car_convertible.
97 * @endcode
98 */
99static inline size_t hash_combine(size_t seed, size_t hash)
100{
101 /*
102 * todo: use Bob Jenkin's proper mixing hash pass:
103 * http://burtleburtle.net/bob/c/lookup3.c
104 */
105 seed ^= hash + 0x9e3779b9 +
106 ((seed << 5) | (seed >> (sizeof(size_t) * 8 - 5)));
107 return seed;
108}
109
110/** Hash a NUL-terminated string.
111 * The algorithm may change in the future, so never use it for hashes
112 * that will be stored to a file or sent over a network.
113 */
114static inline size_t hash_string(const char *str)
115{
116 /* djb2 hash + extra mixing at the end */
117
118 char c;
119 size_t hash = 5381;
120
121 while ((c = *(str++)))
122 hash = (hash << 5) + hash + c;
123
124 return hash_mix(hash);
125}
126
127/** Hash an arbitrarily sized sequence of bytes.
128 * The algorithm may change in the future, so never use it for hashes
129 * that will be stored to a file or sent over a network.
130 */
131static inline size_t hash_bytes(const void *b, size_t len)
132{
133 /* djb2 hash + extra mixing at the end */
134
135 // TODO: work in bigger chunks for faster hashing
136
137 const char *str = b;
138
139 size_t hash = 5381;
140
141 for (size_t i = 0; i < len; i++)
142 hash = (hash << 5) + hash + str[i];
143
144 return hash_mix(hash);
145}
146
147#endif
Note: See TracBrowser for help on using the repository browser.