source: mainline/uspace/lib/cpp/include/__bits/adt/hash_table.hpp@ 5ea9dd2

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 5ea9dd2 was 07eaeea, checked in by Dzejrou <dzejrou@…>, 7 years ago

cpp: fixed unordered_map tests on ppc32 and sparc64, added additional checks to make sure the programs don't fail

  • Property mode set to 100644
File size: 18.5 KB
Line 
1/*
2 * Copyright (c) 2018 Jaroslav Jindrak
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#ifndef LIBCPP_BITS_ADT_HASH_TABLE
30#define LIBCPP_BITS_ADT_HASH_TABLE
31
32#include <__bits/adt/list_node.hpp>
33#include <__bits/adt/key_extractors.hpp>
34#include <__bits/adt/hash_table_iterators.hpp>
35#include <__bits/adt/hash_table_policies.hpp>
36#include <cstdlib>
37#include <iterator>
38#include <limits>
39#include <memory>
40#include <tuple>
41#include <utility>
42
43namespace std::aux
44{
45 /**
46 * To save code, we're going to implement one hash table
47 * for both unordered_map and unordered_set. To do this,
48 * we create one inner hash table that is oblivious to its
49 * held type (and uses a key extractor to get the key from it)
50 * and two proxies that either use plain Key type or a pair
51 * of a key and a value.
52 * Additionally, we will use policies to represent both single
53 * and multi variants of these containers at once.
54 * Note: I am aware the naming is very unimaginative here,
55 * not my strong side :)
56 */
57
58 template<
59 class Value, class Key, class KeyExtractor,
60 class Hasher, class KeyEq,
61 class Alloc, class Size,
62 class Iterator, class ConstIterator,
63 class LocalIterator, class ConstLocalIterator,
64 class Policy
65 >
66 class hash_table
67 {
68 public:
69 using value_type = Value;
70 using key_type = Key;
71 using size_type = Size;
72 using allocator_type = Alloc;
73 using key_equal = KeyEq;
74 using hasher = Hasher;
75 using key_extract = KeyExtractor;
76
77 using iterator = Iterator;
78 using const_iterator = ConstIterator;
79 using local_iterator = LocalIterator;
80 using const_local_iterator = ConstLocalIterator;
81
82 using node_type = list_node<value_type>;
83
84 using place_type = tuple<
85 hash_table_bucket<value_type, size_type>*,
86 list_node<value_type>*, size_type
87 >;
88
89 hash_table(size_type buckets, float max_load_factor = 1.f)
90 : table_{new hash_table_bucket<value_type, size_type>[buckets]()},
91 bucket_count_{buckets}, size_{}, hasher_{}, key_eq_{},
92 key_extractor_{}, max_load_factor_{max_load_factor}
93 { /* DUMMY BODY */ }
94
95 hash_table(size_type buckets, const hasher& hf, const key_equal& eql,
96 float max_load_factor = 1.f)
97 : table_{new hash_table_bucket<value_type, size_type>[buckets]()},
98 bucket_count_{buckets}, size_{}, hasher_{hf}, key_eq_{eql},
99 key_extractor_{}, max_load_factor_{max_load_factor}
100 { /* DUMMY BODY */ }
101
102 hash_table(const hash_table& other)
103 : hash_table{other.bucket_count_, other.hasher_, other.key_eq_,
104 other.max_load_factor_}
105 {
106 for (const auto& x: other)
107 insert(x);
108 }
109
110 hash_table(hash_table&& other)
111 : table_{other.table_}, bucket_count_{other.bucket_count_},
112 size_{other.size_}, hasher_{move(other.hasher_)},
113 key_eq_{move(other.key_eq_)}, key_extractor_{move(other.key_extractor_)},
114 max_load_factor_{other.max_load_factor_}
115 {
116 other.table_ = nullptr;
117 other.bucket_count_ = size_type{};
118 other.size_ = size_type{};
119 other.max_load_factor_ = 1.f;
120 }
121
122 hash_table& operator=(const hash_table& other)
123 {
124 hash_table tmp{other};
125 tmp.swap(*this);
126
127 return *this;
128 }
129
130 hash_table& operator=(hash_table&& other)
131 {
132 hash_table tmp{move(other)};
133 tmp.swap(*this);
134
135 return *this;
136 }
137
138 bool empty() const noexcept
139 {
140 return size_ == 0;
141 }
142
143 size_type size() const noexcept
144 {
145 return size_;
146 }
147
148 size_type max_size(allocator_type& alloc)
149 {
150 return allocator_traits<allocator_type>::max_size(alloc);
151 }
152
153 iterator begin() noexcept
154 {
155 auto idx = first_filled_bucket_();
156 return iterator{
157 table_, idx, bucket_count_,
158 table_[idx].head
159 };
160 }
161
162 const_iterator begin() const noexcept
163 {
164 return cbegin();
165 }
166
167 iterator end() noexcept
168 {
169 return iterator{};
170 }
171
172 const_iterator end() const noexcept
173 {
174 return cend();
175 }
176
177 const_iterator cbegin() const noexcept
178 {
179 auto idx = first_filled_bucket_();
180 return const_iterator{
181 table_, idx, bucket_count_,
182 table_[idx].head
183 };
184 }
185
186 const_iterator cend() const noexcept
187 {
188 return const_iterator{};
189 }
190
191 template<class... Args>
192 auto emplace(Args&&... args)
193 {
194 return Policy::emplace(*this, forward<Args>(args)...);
195 }
196
197 auto insert(const value_type& val)
198 {
199 return Policy::insert(*this, val);
200 }
201
202 auto insert(value_type&& val)
203 {
204 return Policy::insert(*this, forward<value_type>(val));
205 }
206
207 size_type erase(const key_type& key)
208 {
209 return Policy::erase(*this, key);
210 }
211
212 iterator erase(const_iterator it)
213 {
214 if (it == cend())
215 return end();
216
217 auto node = it.node();
218 auto idx = it.idx();
219
220 /**
221 * Note: This way we will continue on the next bucket
222 * if this is the last element in its bucket.
223 */
224 iterator res{table_, idx, size_, node};
225 ++res;
226
227 if (table_[idx].head == node)
228 {
229 if (node->next != node)
230 table_[idx].head = node->next;
231 else
232 table_[idx].head = nullptr;
233 }
234 --size_;
235
236 node->unlink();
237 delete node;
238
239 if (empty())
240 return end();
241 else
242 return res;
243 }
244
245 void clear() noexcept
246 {
247 for (size_type i = 0; i < bucket_count_; ++i)
248 table_[i].clear();
249 size_ = size_type{};
250 }
251
252 void swap(hash_table& other)
253 noexcept(allocator_traits<allocator_type>::is_always_equal::value &&
254 noexcept(swap(declval<Hasher&>(), declval<Hasher&>())) &&
255 noexcept(swap(declval<KeyEq&>(), declval<KeyEq&>())))
256 {
257 std::swap(table_, other.table_);
258 std::swap(bucket_count_, other.bucket_count_);
259 std::swap(size_, other.size_);
260 std::swap(hasher_, other.hasher_);
261 std::swap(key_eq_, other.key_eq_);
262 std::swap(max_load_factor_, other.max_load_factor_);
263 }
264
265 hasher hash_function() const
266 {
267 return hasher_;
268 }
269
270 key_equal key_eq() const
271 {
272 return key_eq_;
273 }
274
275 iterator find(const key_type& key)
276 {
277 auto idx = get_bucket_idx_(key);
278 auto head = table_[idx].head;
279
280 if (!head)
281 return end();
282
283 auto current = head;
284 do
285 {
286 if (key_eq_(key, key_extractor_(current->value)))
287 return iterator{table_, idx, size_, current};
288 current = current->next;
289 }
290 while (current && current != head);
291
292 return end();
293 }
294
295 const_iterator find(const key_type& key) const
296 {
297 auto idx = get_bucket_idx_(key);
298 auto head = table_[idx].head;
299
300 if (!head)
301 return end();
302
303 auto current = head;
304 do
305 {
306 if (key_eq_(key, key_extractor_(current->value)))
307 return iterator{table_, idx, size_, current};
308 current = current->next;
309 }
310 while (current != head);
311
312 return end();
313 }
314
315 size_type count(const key_type& key) const
316 {
317 return Policy::count(*this, key);
318 }
319
320 pair<iterator, iterator> equal_range(const key_type& key)
321 {
322 return Policy::equal_range(*this, key);
323 }
324
325 pair<const_iterator, const_iterator> equal_range(const key_type& key) const
326 {
327 return Policy::equal_range_const(*this, key);
328 }
329
330 size_type bucket_count() const noexcept
331 {
332 return bucket_count_;
333 }
334
335 size_type max_bucket_count() const noexcept
336 {
337 return numeric_limits<size_type>::max() /
338 sizeof(hash_table_bucket<value_type, size_type>);
339 }
340
341 size_type bucket_size(size_type n) const
342 {
343 return table_[n].size();
344 }
345
346 size_type bucket(const key_type& key) const
347 {
348 return get_bucket_idx_(key);
349 }
350
351 local_iterator begin(size_type n)
352 {
353 return local_iterator{table_[n].head, table_[n].head};
354 }
355
356 const_local_iterator begin(size_type n) const
357 {
358 return cbegin(n);
359 }
360
361 local_iterator end(size_type n)
362 {
363 return local_iterator{};
364 }
365
366 const_local_iterator end(size_type n) const
367 {
368 return cend(n);
369 }
370
371 const_local_iterator cbegin(size_type n) const
372 {
373 return const_local_iterator{table_[n].head, table_[n].head};
374 }
375
376 const_local_iterator cend(size_type n) const
377 {
378 return const_local_iterator{};
379 }
380
381 float load_factor() const noexcept
382 {
383 return size_ / static_cast<float>(bucket_count_);
384 }
385
386 float max_load_factor() const noexcept
387 {
388 return max_load_factor_;
389 }
390
391 void max_load_factor(float factor)
392 {
393 if (factor > 0.f)
394 max_load_factor_ = factor;
395
396 rehash_if_needed();
397 }
398
399 void rehash(size_type count)
400 {
401 if (count < size_ / max_load_factor_)
402 count = size_ / max_load_factor_;
403
404 /**
405 * Note: If an exception is thrown, there
406 * is no effect. Since this is the only
407 * place where an exception (no mem) can
408 * be thrown and no changes to this have been
409 * made, we're ok.
410 */
411 hash_table new_table{count, max_load_factor_};
412
413 for (std::size_t i = 0; i < bucket_count_; ++i)
414 {
415 auto head = table_[i].head;
416 if (!head)
417 continue;
418
419 auto current = head;
420
421 do
422 {
423 auto next = current->next;
424
425 current->next = current;
426 current->prev = current;
427
428 auto where = Policy::find_insertion_spot(
429 new_table, key_extractor_(current->value)
430 );
431
432 /**
433 * Note: We're rehashing, so we know each
434 * key can be inserted.
435 */
436 auto new_bucket = get<0>(where);
437 auto new_successor = get<1>(where);
438
439 if (new_successor)
440 new_successor->append(current);
441 else
442 new_bucket->append(current);
443
444 current = next;
445 } while (current != head);
446
447 table_[i].head = nullptr;
448 }
449
450 new_table.size_ = size_;
451 swap(new_table);
452
453 delete[] new_table.table_;
454 new_table.table_ = nullptr;
455 }
456
457 void reserve(size_type count)
458 {
459 rehash(count / max_load_factor_ + 1);
460 }
461
462 bool is_eq_to(const hash_table& other) const
463 {
464 if (size() != other.size())
465 return false;
466
467 auto it = begin();
468 while (it != end())
469 {
470 /**
471 * For each key K we will check how many
472 * instances of K are there in the table.
473 * Then we will check if the count for K
474 * is equal to that amount.
475 */
476
477 size_type cnt{};
478 auto tmp = it;
479
480 while (key_eq_(key_extractor_(*it), key_extractor_(*tmp)))
481 {
482 ++cnt;
483 if (++tmp == end())
484 break;
485 }
486
487 auto other_cnt = other.count(key_extractor_(*it));
488 if (cnt != other_cnt)
489 return false;
490
491 it = tmp; // tmp is one past *it's key.
492 }
493
494 return true;
495 }
496
497 ~hash_table()
498 {
499 // Lists are deleted in ~hash_table_bucket.
500 if (table_)
501 delete[] table_;
502 }
503
504 place_type find_insertion_spot(const key_type& key) const
505 {
506 return Policy::find_insertion_spot(*this, key);
507 }
508
509 place_type find_insertion_spot(key_type&& key) const
510 {
511 return Policy::find_insertion_spot(*this, key);
512 }
513
514 const key_type& get_key(const value_type& val) const
515 {
516 return key_extractor_(val);
517 }
518
519 bool keys_equal(const key_type& key, const value_type& val)
520 {
521 return key_eq_(key, key_extractor_(val));
522 }
523
524 bool keys_equal(const key_type& key, const value_type& val) const
525 {
526 return key_eq_(key, key_extractor_(val));
527 }
528
529 hash_table_bucket<value_type, size_type>* table()
530 {
531 return table_;
532 }
533
534 hash_table_bucket<value_type, size_type>* head(size_type idx)
535 {
536 if (idx < bucket_count_)
537 return table_[idx]->head;
538 else
539 return nullptr;
540 }
541
542 void rehash_if_needed()
543 {
544 if (size_ > max_load_factor_ * bucket_count_)
545 rehash(bucket_count_ * bucket_count_growth_factor_);
546 }
547
548 void increment_size()
549 {
550 ++size_;
551
552 rehash_if_needed();
553 }
554
555 void decrement_size()
556 {
557 --size_;
558 }
559
560 private:
561 hash_table_bucket<value_type, size_type>* table_;
562 size_type bucket_count_;
563 size_type size_;
564 hasher hasher_;
565 key_equal key_eq_;
566 key_extract key_extractor_;
567 float max_load_factor_;
568
569 static constexpr float bucket_count_growth_factor_{1.25};
570
571 size_type get_bucket_idx_(const key_type& key) const
572 {
573 return hasher_(key) % bucket_count_;
574 }
575
576 size_type first_filled_bucket_() const
577 {
578 size_type res{};
579 while (res < bucket_count_)
580 {
581 if (table_[res].head)
582 return res;
583 ++res;
584 }
585
586 /**
587 * Note: This is used for iterators,
588 * so we need to return a valid index.
589 * But since table_[0].head is nullptr
590 * we know that if we return 0 the
591 * created iterator will test as equal
592 * to end().
593 */
594 return 0;
595 }
596
597 friend Policy;
598 };
599}
600
601#endif
Note: See TracBrowser for help on using the repository browser.