source: mainline/uspace/lib/cpp/include/__bits/hash_table.hpp@ 8a8a9273

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 8a8a9273 was 8a8a9273, checked in by Dzejrou <dzejrou@…>, 7 years ago

cpp: renamed bits/string.hpp and bits/list.hpp to avoid future name clashes

  • Property mode set to 100644
File size: 18.4 KB
Line 
1/*
2 * Copyright (c) 2018 Jaroslav Jindrak
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#ifndef LIBCPP_BITS_HASH_TABLE
30#define LIBCPP_BITS_HASH_TABLE
31
32#include <cstdlib>
33#include <__bits/list_node.hpp>
34#include <__bits/key_extractors.hpp>
35#include <__bits/hash_table_iterators.hpp>
36#include <__bits/hash_table_policies.hpp>
37#include <iterator>
38#include <limits>
39#include <memory>
40#include <tuple>
41#include <utility>
42
43namespace std::aux
44{
45 /**
46 * To save code, we're going to implement one hash table
47 * for both unordered_map and unordered_set. To do this,
48 * we create one inner hash table that is oblivious to its
49 * held type (and uses a key extractor to get the key from it)
50 * and two proxies that either use plain Key type or a pair
51 * of a key and a value.
52 * Additionally, we will use policies to represent both single
53 * and multi variants of these containers at once.
54 * Note: I am aware the naming is very unimaginative here,
55 * not my strong side :)
56 */
57
58 template<
59 class Value, class Key, class KeyExtractor,
60 class Hasher, class KeyEq,
61 class Alloc, class Size,
62 class Iterator, class ConstIterator,
63 class LocalIterator, class ConstLocalIterator,
64 class Policy
65 >
66 class hash_table
67 {
68 public:
69 using value_type = Value;
70 using key_type = Key;
71 using size_type = Size;
72 using allocator_type = Alloc;
73 using key_equal = KeyEq;
74 using hasher = Hasher;
75 using key_extract = KeyExtractor;
76
77 using iterator = Iterator;
78 using const_iterator = ConstIterator;
79 using local_iterator = LocalIterator;
80 using const_local_iterator = ConstLocalIterator;
81
82 using node_type = list_node<value_type>;
83
84 using place_type = tuple<
85 hash_table_bucket<value_type, size_type>*,
86 list_node<value_type>*, size_type
87 >;
88
89 hash_table(size_type buckets, float max_load_factor = 1.f)
90 : table_{new hash_table_bucket<value_type, size_type>[buckets]()},
91 bucket_count_{buckets}, size_{}, hasher_{}, key_eq_{},
92 key_extractor_{}, max_load_factor_{max_load_factor}
93 { /* DUMMY BODY */ }
94
95 hash_table(size_type buckets, const hasher& hf, const key_equal& eql,
96 float max_load_factor = 1.f)
97 : table_{new hash_table_bucket<value_type, size_type>[buckets]()},
98 bucket_count_{buckets}, size_{}, hasher_{hf}, key_eq_{eql},
99 key_extractor_{}, max_load_factor_{max_load_factor}
100 { /* DUMMY BODY */ }
101
102 hash_table(const hash_table& other)
103 : hash_table{other.bucket_count_, other.hasher_, other.key_eq_,
104 other.max_load_factor_}
105 {
106 for (const auto& x: other)
107 insert(x);
108 }
109
110 hash_table(hash_table&& other)
111 : table_{other.table_}, bucket_count_{other.bucket_count_},
112 size_{other.size_}, hasher_{move(other.hasher_)},
113 key_eq_{move(other.key_eq_)}, key_extractor_{move(other.key_extractor_)},
114 max_load_factor_{other.max_load_factor_}
115 {
116 other.table_ = nullptr;
117 other.bucket_count_ = size_type{};
118 other.size_ = size_type{};
119 other.max_load_factor_ = 1.f;
120 }
121
122 hash_table& operator=(const hash_table& other)
123 {
124 hash_table tmp{other};
125 tmp.swap(*this);
126
127 return *this;
128 }
129
130 hash_table& operator=(hash_table&& other)
131 {
132 hash_table tmp{move(other)};
133 tmp.swap(*this);
134
135 return *this;
136 }
137
138 bool empty() const noexcept
139 {
140 return size_ == 0;
141 }
142
143 size_type size() const noexcept
144 {
145 return size_;
146 }
147
148 size_type max_size(allocator_type& alloc)
149 {
150 return allocator_traits<allocator_type>::max_size(alloc);
151 }
152
153 iterator begin() noexcept
154 {
155 auto idx = first_filled_bucket_();
156 return iterator{
157 table_, idx, bucket_count_,
158 table_[idx].head
159 };
160 }
161
162 const_iterator begin() const noexcept
163 {
164 return cbegin();
165 }
166
167 iterator end() noexcept
168 {
169 return iterator{};
170 }
171
172 const_iterator end() const noexcept
173 {
174 return cend();
175 }
176
177 const_iterator cbegin() const noexcept
178 {
179 auto idx = first_filled_bucket_();
180 return const_iterator{
181 table_, idx, bucket_count_,
182 table_[idx].head
183 };
184 }
185
186 const_iterator cend() const noexcept
187 {
188 return const_iterator{};
189 }
190
191 template<class... Args>
192 auto emplace(Args&&... args)
193 {
194 return Policy::emplace(*this, forward<Args>(args)...);
195 }
196
197 auto insert(const value_type& val)
198 {
199 return Policy::insert(*this, val);
200 }
201
202 auto insert(value_type&& val)
203 {
204 return Policy::insert(*this, forward<value_type>(val));
205 }
206
207 size_type erase(const key_type& key)
208 {
209 return Policy::erase(*this, key);
210 }
211
212 iterator erase(const_iterator it)
213 {
214 auto node = it.node();
215 auto idx = it.idx();
216
217 /**
218 * Note: This way we will continue on the next bucket
219 * if this is the last element in its bucket.
220 */
221 iterator res{table_, idx, size_, node};
222 ++res;
223
224 if (table_[idx].head == node)
225 {
226 if (node->next != node)
227 table_[idx].head = node->next;
228 else
229 table_[idx].head = nullptr;
230 }
231 --size_;
232
233 node->unlink();
234 delete node;
235
236 return res;
237 }
238
239 void clear() noexcept
240 {
241 for (size_type i = 0; i < bucket_count_; ++i)
242 table_[i].clear();
243 size_ = size_type{};
244 }
245
246 void swap(hash_table& other)
247 noexcept(allocator_traits<allocator_type>::is_always_equal::value &&
248 noexcept(swap(declval<Hasher&>(), declval<Hasher&>())) &&
249 noexcept(swap(declval<KeyEq&>(), declval<KeyEq&>())))
250 {
251 std::swap(table_, other.table_);
252 std::swap(bucket_count_, other.bucket_count_);
253 std::swap(size_, other.size_);
254 std::swap(hasher_, other.hasher_);
255 std::swap(key_eq_, other.key_eq_);
256 std::swap(max_load_factor_, other.max_load_factor_);
257 }
258
259 hasher hash_function() const
260 {
261 return hasher_;
262 }
263
264 key_equal key_eq() const
265 {
266 return key_eq_;
267 }
268
269 iterator find(const key_type& key)
270 {
271 auto idx = get_bucket_idx_(key);
272 auto head = table_[idx].head;
273
274 if (!head)
275 return end();
276
277 auto current = head;
278 do
279 {
280 if (key_eq_(key, key_extractor_(current->value)))
281 return iterator{table_, idx, size_, current};
282 current = current->next;
283 }
284 while (current != head);
285
286 return end();
287 }
288
289 const_iterator find(const key_type& key) const
290 {
291 auto idx = get_bucket_idx_(key);
292 auto head = table_[idx].head;
293
294 if (!head)
295 return end();
296
297 auto current = head;
298 do
299 {
300 if (key_eq_(key, key_extractor_(current->value)))
301 return iterator{table_, idx, size_, current};
302 current = current->next;
303 }
304 while (current != head);
305
306 return end();
307 }
308
309 size_type count(const key_type& key) const
310 {
311 return Policy::count(*this, key);
312 }
313
314 pair<iterator, iterator> equal_range(const key_type& key)
315 {
316 return Policy::equal_range(*this, key);
317 }
318
319 pair<const_iterator, const_iterator> equal_range(const key_type& key) const
320 {
321 return Policy::equal_range_const(*this, key);
322 }
323
324 size_type bucket_count() const noexcept
325 {
326 return bucket_count_;
327 }
328
329 size_type max_bucket_count() const noexcept
330 {
331 return numeric_limits<size_type>::max() /
332 sizeof(hash_table_bucket<value_type, size_type>);
333 }
334
335 size_type bucket_size(size_type n) const
336 {
337 return table_[n].size();
338 }
339
340 size_type bucket(const key_type& key) const
341 {
342 return get_bucket_idx_(key);
343 }
344
345 local_iterator begin(size_type n)
346 {
347 return local_iterator{table_[n].head, table_[n].head};
348 }
349
350 const_local_iterator begin(size_type n) const
351 {
352 return cbegin(n);
353 }
354
355 local_iterator end(size_type n)
356 {
357 return local_iterator{};
358 }
359
360 const_local_iterator end(size_type n) const
361 {
362 return cend(n);
363 }
364
365 const_local_iterator cbegin(size_type n) const
366 {
367 return const_local_iterator{table_[n].head, table_[n].head};
368 }
369
370 const_local_iterator cend(size_type n) const
371 {
372 return const_local_iterator{};
373 }
374
375 float load_factor() const noexcept
376 {
377 return size_ / static_cast<float>(bucket_count_);
378 }
379
380 float max_load_factor() const noexcept
381 {
382 return max_load_factor_;
383 }
384
385 void max_load_factor(float factor)
386 {
387 if (factor > 0.f)
388 max_load_factor_ = factor;
389
390 rehash_if_needed();
391 }
392
393 void rehash(size_type count)
394 {
395 if (count < size_ / max_load_factor_)
396 count = size_ / max_load_factor_;
397
398 /**
399 * Note: If an exception is thrown, there
400 * is no effect. Since this is the only
401 * place where an exception (no mem) can
402 * be thrown and no changes to this have been
403 * made, we're ok.
404 */
405 hash_table new_table{count, max_load_factor_};
406
407 for (std::size_t i = 0; i < bucket_count_; ++i)
408 {
409 auto head = table_[i].head;
410 if (!head)
411 continue;
412
413 auto current = head;
414
415 do
416 {
417 auto next = current->next;
418
419 current->next = current;
420 current->prev = current;
421
422 auto where = Policy::find_insertion_spot(
423 new_table, key_extractor_(current->value)
424 );
425
426 /**
427 * Note: We're rehashing, so we know each
428 * key can be inserted.
429 */
430 auto new_bucket = get<0>(where);
431 auto new_successor = get<1>(where);
432
433 if (new_successor)
434 new_successor->append(current);
435 else
436 new_bucket->append(current);
437
438 current = next;
439 } while (current != head);
440
441 table_[i].head = nullptr;
442 }
443
444 new_table.size_ = size_;
445 swap(new_table);
446
447 delete[] new_table.table_;
448 new_table.table_ = nullptr;
449 }
450
451 void reserve(size_type count)
452 {
453 rehash(count / max_load_factor_ + 1);
454 }
455
456 bool is_eq_to(const hash_table& other) const
457 {
458 if (size() != other.size())
459 return false;
460
461 auto it = begin();
462 while (it != end())
463 {
464 /**
465 * For each key K we will check how many
466 * instances of K are there in the table.
467 * Then we will check if the count for K
468 * is equal to that amount.
469 */
470
471 size_type cnt{};
472 auto tmp = it;
473
474 while (key_eq_(key_extractor_(*it), key_extractor_(*tmp)))
475 {
476 ++cnt;
477 if (++tmp == end())
478 break;
479 }
480
481 auto other_cnt = other.count(key_extractor_(*it));
482 if (cnt != other_cnt)
483 return false;
484
485 it = tmp; // tmp is one past *it's key.
486 }
487
488 return true;
489 }
490
491 ~hash_table()
492 {
493 // Lists are deleted in ~hash_table_bucket.
494 if (table_)
495 delete[] table_;
496 }
497
498 place_type find_insertion_spot(const key_type& key) const
499 {
500 return Policy::find_insertion_spot(*this, key);
501 }
502
503 place_type find_insertion_spot(key_type&& key) const
504 {
505 return Policy::find_insertion_spot(*this, key);
506 }
507
508 const key_type& get_key(const value_type& val) const
509 {
510 return key_extractor_(val);
511 }
512
513 bool keys_equal(const key_type& key, const value_type& val)
514 {
515 return key_eq_(key, key_extractor_(val));
516 }
517
518 bool keys_equal(const key_type& key, const value_type& val) const
519 {
520 return key_eq_(key, key_extractor_(val));
521 }
522
523 hash_table_bucket<value_type, size_type>* table()
524 {
525 return table_;
526 }
527
528 hash_table_bucket<value_type, size_type>* head(size_type idx)
529 {
530 if (idx < bucket_count_)
531 return table_[idx]->head;
532 else
533 return nullptr;
534 }
535
536 void rehash_if_needed()
537 {
538 if (size_ > max_load_factor_ * bucket_count_)
539 rehash(bucket_count_ * bucket_count_growth_factor_);
540 }
541
542 void increment_size()
543 {
544 ++size_;
545
546 rehash_if_needed();
547 }
548
549 void decrement_size()
550 {
551 --size_;
552 }
553
554 private:
555 hash_table_bucket<value_type, size_type>* table_;
556 size_type bucket_count_;
557 size_type size_;
558 hasher hasher_;
559 key_equal key_eq_;
560 key_extract key_extractor_;
561 float max_load_factor_;
562
563 static constexpr float bucket_count_growth_factor_{1.25};
564
565 size_type get_bucket_idx_(const key_type& key) const
566 {
567 return hasher_(key) % bucket_count_;
568 }
569
570 size_type first_filled_bucket_() const
571 {
572 size_type res{};
573 while (res < bucket_count_)
574 {
575 if (table_[res].head)
576 return res;
577 ++res;
578 }
579
580 /**
581 * Note: This is used for iterators,
582 * so we need to return a valid index.
583 * But since table_[0].head is nullptr
584 * we know that if we return 0 the
585 * created iterator will test as equal
586 * to end().
587 */
588 return 0;
589 }
590
591 friend Policy;
592 };
593}
594
595#endif
Note: See TracBrowser for help on using the repository browser.