source: mainline/uspace/lib/cpp/include/internal/hash_table.hpp@ ee8c5ec

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since ee8c5ec was 65a0d0c, checked in by Dzejrou <dzejrou@…>, 7 years ago

cpp: fixed bugs found by the uset tests

  • Property mode set to 100644
File size: 18.4 KB
RevLine 
[e29ce3d]1/*
2 * Copyright (c) 2018 Jaroslav Jindrak
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#ifndef LIBCPP_INTERNAL_HASH_TABLE
30#define LIBCPP_INTERNAL_HASH_TABLE
31
32#include <cstdlib>
33#include <internal/list.hpp>
[9751280]34#include <internal/key_extractors.hpp>
35#include <internal/hash_table_iterators.hpp>
36#include <internal/hash_table_policies.hpp>
[871cfe0c]37#include <iterator>
[db628a0]38#include <limits>
[e29ce3d]39#include <memory>
[871cfe0c]40#include <tuple>
[e29ce3d]41#include <utility>
42
43namespace std::aux
44{
45 /**
46 * To save code, we're going to implement one hash table
47 * for both unordered_map and unordered_set. To do this,
48 * we create one inner hash table that is oblivious to its
49 * held type (and uses a key extractor to get the key from it)
50 * and two proxies that either use plain Key type or a pair
51 * of a key and a value.
[9751280]52 * Additionally, we will use policies to represent both single
53 * and multi variants of these containers at once.
[e29ce3d]54 * Note: I am aware the naming is very unimaginative here,
55 * not my strong side :)
56 */
57
58 template<
59 class Value, class Key, class KeyExtractor,
60 class Hasher, class KeyEq,
61 class Alloc, class Size,
62 class Iterator, class ConstIterator,
[871cfe0c]63 class LocalIterator, class ConstLocalIterator,
[e29ce3d]64 class Policy
65 >
66 class hash_table
67 {
68 public:
69 using value_type = Value;
70 using key_type = Key;
71 using size_type = Size;
72 using allocator_type = Alloc;
73 using key_equal = KeyEq;
74 using hasher = Hasher;
75 using key_extract = KeyExtractor;
76
77 using iterator = Iterator;
78 using const_iterator = ConstIterator;
79 using local_iterator = LocalIterator;
80 using const_local_iterator = ConstLocalIterator;
81
[86b3ae98]82 using node_type = list_node<value_type>;
83
[947ad139]84 using place_type = tuple<
[871cfe0c]85 hash_table_bucket<value_type, size_type>*,
86 list_node<value_type>*, size_type
87 >;
88
89 hash_table(size_type buckets, float max_load_factor = 1.f)
[1d5424a]90 : table_{new hash_table_bucket<value_type, size_type>[buckets]()},
[871cfe0c]91 bucket_count_{buckets}, size_{}, hasher_{}, key_eq_{},
92 key_extractor_{}, max_load_factor_{max_load_factor}
93 { /* DUMMY BODY */ }
[e29ce3d]94
[8ec1cd2]95 hash_table(size_type buckets, const hasher& hf, const key_equal& eql,
96 float max_load_factor = 1.f)
97 : table_{new hash_table_bucket<value_type, size_type>[buckets]()},
98 bucket_count_{buckets}, size_{}, hasher_{hf}, key_eq_{eql},
99 key_extractor_{}, max_load_factor_{max_load_factor}
100 { /* DUMMY BODY */ }
101
[b9076db]102 hash_table(const hash_table& other)
103 : hash_table{other.bucket_count_, other.hasher_, other.key_eq_,
104 other.max_load_factor_}
105 {
106 for (const auto& x: other)
[b22ccaa]107 insert(x);
[b9076db]108 }
109
110 hash_table(hash_table&& other)
111 : table_{other.table_}, bucket_count_{other.bucket_count_},
112 size_{other.size_}, hasher_{move(other.hasher_)},
113 key_eq_{move(other.key_eq_)}, key_extractor_{move(other.key_extractor_)},
114 max_load_factor_{other.max_load_factor_}
115 {
116 other.table_ = nullptr;
117 other.bucket_count_ = size_type{};
118 other.size_ = size_type{};
119 other.max_load_factor_ = 1.f;
120 }
121
122 hash_table& operator=(const hash_table& other)
123 {
124 hash_table tmp{other};
125 tmp.swap(*this);
126
127 return *this;
128 }
129
130 hash_table& operator=(hash_table&& other)
131 {
132 hash_table tmp{move(other)};
133 tmp.swap(*this);
134
135 return *this;
136 }
[8ec1cd2]137
[e29ce3d]138 bool empty() const noexcept
139 {
140 return size_ == 0;
141 }
142
143 size_type size() const noexcept
144 {
145 return size_;
146 }
147
[275bdafb]148 size_type max_size(allocator_type& alloc)
[e29ce3d]149 {
[275bdafb]150 return allocator_traits<allocator_type>::max_size(alloc);
[e29ce3d]151 }
152
153 iterator begin() noexcept
154 {
[b22ccaa]155 auto idx = first_filled_bucket_();
[871cfe0c]156 return iterator{
[b22ccaa]157 table_, idx, bucket_count_,
158 table_[idx].head
[871cfe0c]159 };
[e29ce3d]160 }
161
162 const_iterator begin() const noexcept
163 {
[871cfe0c]164 return cbegin();
[e29ce3d]165 }
166
167 iterator end() noexcept
168 {
[871cfe0c]169 return iterator{};
[e29ce3d]170 }
171
172 const_iterator end() const noexcept
173 {
[871cfe0c]174 return cend();
[e29ce3d]175 }
176
177 const_iterator cbegin() const noexcept
178 {
[b22ccaa]179 auto idx = first_filled_bucket_();
[871cfe0c]180 return const_iterator{
[b22ccaa]181 table_, idx, bucket_count_,
182 table_[idx].head
[871cfe0c]183 };
[e29ce3d]184 }
185
186 const_iterator cend() const noexcept
187 {
[871cfe0c]188 return const_iterator{};
[e29ce3d]189 }
190
[492377a]191 template<class... Args>
[a2f01c4]192 auto emplace(Args&&... args)
[e29ce3d]193 {
[492377a]194 return Policy::emplace(*this, forward<Args>(args)...);
[e29ce3d]195 }
196
[a2f01c4]197 auto insert(const value_type& val)
[e29ce3d]198 {
[492377a]199 return Policy::insert(*this, val);
[871cfe0c]200 }
201
[a2f01c4]202 auto insert(value_type&& val)
[871cfe0c]203 {
[492377a]204 return Policy::insert(*this, forward<value_type>(val));
[e29ce3d]205 }
206
207 size_type erase(const key_type& key)
208 {
[e9027b5]209 return Policy::erase(*this, key);
[e29ce3d]210 }
211
212 iterator erase(const_iterator it)
213 {
[e9027b5]214 auto node = it.node();
215 auto idx = it.idx();
216
217 /**
218 * Note: This way we will continue on the next bucket
219 * if this is the last element in its bucket.
220 */
221 iterator res{table_, idx, size_, node};
222 ++res;
223
224 if (table_[idx].head == node)
225 {
226 if (node->next != node)
227 table_[idx].head = node->next;
228 else
229 table_[idx].head = nullptr;
230 }
[65a0d0c]231 --size_;
[e9027b5]232
233 node->unlink();
234 delete node;
235
236 return res;
[e29ce3d]237 }
238
239 void clear() noexcept
240 {
[7320ca6]241 for (size_type i = 0; i < bucket_count_; ++i)
242 table_[i].clear();
[871cfe0c]243 size_ = size_type{};
[e29ce3d]244 }
245
246 void swap(hash_table& other)
[1d5424a]247 noexcept(allocator_traits<allocator_type>::is_always_equal::value &&
[e29ce3d]248 noexcept(swap(declval<Hasher&>(), declval<Hasher&>())) &&
249 noexcept(swap(declval<KeyEq&>(), declval<KeyEq&>())))
250 {
[871cfe0c]251 std::swap(table_, other.table_);
252 std::swap(bucket_count_, other.bucket_count_);
253 std::swap(size_, other.size_);
254 std::swap(hasher_, other.hasher_);
255 std::swap(key_eq_, other.key_eq_);
256 std::swap(max_load_factor_, other.max_load_factor_);
[e29ce3d]257 }
258
259 hasher hash_function() const
260 {
261 return hasher_;
262 }
263
264 key_equal key_eq() const
265 {
266 return key_eq_;
267 }
268
269 iterator find(const key_type& key)
270 {
[1d5424a]271 auto idx = get_bucket_idx_(key);
272 auto head = table_[idx].head;
273
274 if (!head)
275 return end();
276
277 auto current = head;
278 do
279 {
280 if (key_eq_(key, key_extractor_(current->value)))
281 return iterator{table_, idx, size_, current};
282 current = current->next;
283 }
284 while (current != head);
285
286 return end();
[e29ce3d]287 }
288
289 const_iterator find(const key_type& key) const
290 {
[1d5424a]291 auto idx = get_bucket_idx_(key);
292 auto head = table_[idx].head;
293
294 if (!head)
295 return end();
296
297 auto current = head;
298 do
299 {
300 if (key_eq_(key, key_extractor_(current->value)))
301 return iterator{table_, idx, size_, current};
302 current = current->next;
303 }
304 while (current != head);
305
306 return end();
[e29ce3d]307 }
308
309 size_type count(const key_type& key) const
310 {
[871cfe0c]311 return Policy::count(*this, key);
[e29ce3d]312 }
313
314 pair<iterator, iterator> equal_range(const key_type& key)
315 {
[1d5424a]316 return Policy::equal_range(*this, key);
[e29ce3d]317 }
318
319 pair<const_iterator, const_iterator> equal_range(const key_type& key) const
320 {
[1d5424a]321 return Policy::equal_range_const(*this, key);
[e29ce3d]322 }
323
324 size_type bucket_count() const noexcept
325 {
326 return bucket_count_;
327 }
328
329 size_type max_bucket_count() const noexcept
330 {
[db628a0]331 return numeric_limits<size_type>::max() /
332 sizeof(hash_table_bucket<value_type, size_type>);
[e29ce3d]333 }
334
335 size_type bucket_size(size_type n) const
336 {
[871cfe0c]337 return table_[n].size();
[e29ce3d]338 }
339
340 size_type bucket(const key_type& key) const
341 {
[871cfe0c]342 return get_bucket_idx_(key);
[e29ce3d]343 }
344
345 local_iterator begin(size_type n)
346 {
[871cfe0c]347 return local_iterator{table_[n].head, table_[n].head};
[e29ce3d]348 }
349
350 const_local_iterator begin(size_type n) const
351 {
[871cfe0c]352 return cbegin(n);
[e29ce3d]353 }
354
[871cfe0c]355 local_iterator end(size_type n)
[e29ce3d]356 {
[871cfe0c]357 return local_iterator{};
358 }
359
360 const_local_iterator end(size_type n) const
361 {
362 return cend(n);
[e29ce3d]363 }
364
365 const_local_iterator cbegin(size_type n) const
366 {
[871cfe0c]367 return const_local_iterator{table_[n].head, table_[n].head};
[e29ce3d]368 }
369
370 const_local_iterator cend(size_type n) const
371 {
[871cfe0c]372 return const_local_iterator{};
[e29ce3d]373 }
374
375 float load_factor() const noexcept
376 {
377 return size_ / static_cast<float>(bucket_count_);
378 }
379
380 float max_load_factor() const noexcept
381 {
382 return max_load_factor_;
383 }
384
385 void max_load_factor(float factor)
386 {
[e912cdf]387 if (factor > 0.f)
388 max_load_factor_ = factor;
389
390 rehash_if_needed();
[e29ce3d]391 }
392
[1d5424a]393 void rehash(size_type count)
[e29ce3d]394 {
[1d5424a]395 if (count < size_ / max_load_factor_)
396 count = size_ / max_load_factor_;
397
[b9076db]398 /**
399 * Note: If an exception is thrown, there
400 * is no effect. Since this is the only
401 * place where an exception (no mem) can
402 * be thrown and no changes to this have been
403 * made, we're ok.
404 */
[1d5424a]405 hash_table new_table{count, max_load_factor_};
406
407 for (std::size_t i = 0; i < bucket_count_; ++i)
408 {
409 auto head = table_[i].head;
410 if (!head)
411 continue;
412
413 auto current = head;
414
415 do
416 {
417 auto next = current->next;
418
419 current->next = current;
420 current->prev = current;
421
422 auto where = Policy::find_insertion_spot(
423 new_table, key_extractor_(current->value)
424 );
425
426 /**
427 * Note: We're rehashing, so we know each
428 * key can be inserted.
429 */
430 auto new_bucket = get<0>(where);
431 auto new_successor = get<1>(where);
432
433 if (new_successor)
434 new_successor->append(current);
435 else
436 new_bucket->append(current);
437
438 current = next;
439 } while (current != head);
440
441 table_[i].head = nullptr;
442 }
443
444 new_table.size_ = size_;
445 swap(new_table);
446
447 delete[] new_table.table_;
448 new_table.table_ = nullptr;
[e29ce3d]449 }
450
[1d5424a]451 void reserve(size_type count)
[e29ce3d]452 {
[1d5424a]453 rehash(count / max_load_factor_ + 1);
[e29ce3d]454 }
455
[947ad139]456 bool is_eq_to(const hash_table& other) const
[e29ce3d]457 {
[b22ccaa]458 if (size() != other.size())
459 return false;
460
461 auto it = begin();
462 while (it != end())
463 {
464 /**
465 * For each key K we will check how many
466 * instances of K are there in the table.
467 * Then we will check if the count for K
468 * is equal to that amount.
469 */
470
471 size_type cnt{};
472 auto tmp = it;
473
474 while (key_eq_(key_extractor_(*it), key_extractor_(*tmp)))
475 {
476 ++cnt;
477 if (++tmp == end())
478 break;
479 }
480
481 auto other_cnt = other.count(key_extractor_(*it));
482 if (cnt != other_cnt)
483 return false;
484
485 it = tmp; // tmp is one past *it's key.
486 }
487
488 return true;
[e29ce3d]489 }
490
491 ~hash_table()
492 {
493 // Lists are deleted in ~hash_table_bucket.
[1d5424a]494 if (table_)
495 delete[] table_;
[e29ce3d]496 }
497
[947ad139]498 place_type find_insertion_spot(const key_type& key) const
[871cfe0c]499 {
500 return Policy::find_insertion_spot(*this, key);
501 }
502
[947ad139]503 place_type find_insertion_spot(key_type&& key) const
[871cfe0c]504 {
505 return Policy::find_insertion_spot(*this, key);
506 }
507
[947ad139]508 const key_type& get_key(const value_type& val) const
[875788a8]509 {
510 return key_extractor_(val);
511 }
512
[86b3ae98]513 bool keys_equal(const key_type& key, const value_type& val)
[8ec1cd2]514 {
[86b3ae98]515 return key_eq_(key, key_extractor_(val));
[8ec1cd2]516 }
517
[99bf4c4]518 bool keys_equal(const key_type& key, const value_type& val) const
519 {
520 return key_eq_(key, key_extractor_(val));
521 }
522
[86b3ae98]523 hash_table_bucket<value_type, size_type>* table()
[8ec1cd2]524 {
[86b3ae98]525 return table_;
[8ec1cd2]526 }
527
[86b3ae98]528 hash_table_bucket<value_type, size_type>* head(size_type idx)
529 {
530 if (idx < bucket_count_)
[947ad139]531 return table_[idx]->head;
[86b3ae98]532 else
533 return nullptr;
534 }
535
[3be3752]536 void rehash_if_needed()
537 {
538 if (size_ > max_load_factor_ * bucket_count_)
539 rehash(bucket_count_ * bucket_count_growth_factor_);
540 }
541
542 void increment_size()
543 {
544 ++size_;
[e912cdf]545
546 rehash_if_needed();
547 }
548
549 void decrement_size()
550 {
551 --size_;
552 }
553
[86b3ae98]554 private:
[e29ce3d]555 hash_table_bucket<value_type, size_type>* table_;
556 size_type bucket_count_;
557 size_type size_;
558 hasher hasher_;
559 key_equal key_eq_;
[871cfe0c]560 key_extract key_extractor_;
[e29ce3d]561 float max_load_factor_;
562
[3be3752]563 static constexpr float bucket_count_growth_factor_{1.25};
564
[871cfe0c]565 size_type get_bucket_idx_(const key_type& key) const
566 {
567 return hasher_(key) % bucket_count_;
568 }
[e29ce3d]569
[b22ccaa]570 size_type first_filled_bucket_() const
571 {
572 size_type res{};
573 while (res < bucket_count_)
574 {
575 if (table_[res].head)
576 return res;
577 ++res;
578 }
579
580 /**
581 * Note: This is used for iterators,
582 * so we need to return a valid index.
583 * But since table_[0].head is nullptr
584 * we know that if we return 0 the
585 * created iterator will test as equal
586 * to end().
587 */
588 return 0;
589 }
590
[871cfe0c]591 friend Policy;
[e29ce3d]592 };
[871cfe0c]593}
[e29ce3d]594
595#endif
Note: See TracBrowser for help on using the repository browser.