source: mainline/uspace/lib/cpp/include/internal/hash_table.hpp@ c866a83

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since c866a83 was a2f01c4, checked in by Dzejrou <dzejrou@…>, 7 years ago

cpp: fixed return type on multi policy insert and made return type on insert/emplace in hash_table dependent on the policy

  • Property mode set to 100644
File size: 18.3 KB
Line 
1/*
2 * Copyright (c) 2018 Jaroslav Jindrak
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#ifndef LIBCPP_INTERNAL_HASH_TABLE
30#define LIBCPP_INTERNAL_HASH_TABLE
31
32#include <cstdlib>
33#include <internal/list.hpp>
34#include <internal/key_extractors.hpp>
35#include <internal/hash_table_iterators.hpp>
36#include <internal/hash_table_policies.hpp>
37#include <iterator>
38#include <limits>
39#include <memory>
40#include <tuple>
41#include <utility>
42
43namespace std::aux
44{
45 /**
46 * To save code, we're going to implement one hash table
47 * for both unordered_map and unordered_set. To do this,
48 * we create one inner hash table that is oblivious to its
49 * held type (and uses a key extractor to get the key from it)
50 * and two proxies that either use plain Key type or a pair
51 * of a key and a value.
52 * Additionally, we will use policies to represent both single
53 * and multi variants of these containers at once.
54 * Note: I am aware the naming is very unimaginative here,
55 * not my strong side :)
56 */
57
58 template<
59 class Value, class Key, class KeyExtractor,
60 class Hasher, class KeyEq,
61 class Alloc, class Size,
62 class Iterator, class ConstIterator,
63 class LocalIterator, class ConstLocalIterator,
64 class Policy
65 >
66 class hash_table
67 {
68 public:
69 using value_type = Value;
70 using key_type = Key;
71 using size_type = Size;
72 using allocator_type = Alloc;
73 using key_equal = KeyEq;
74 using hasher = Hasher;
75 using key_extract = KeyExtractor;
76
77 using iterator = Iterator;
78 using const_iterator = ConstIterator;
79 using local_iterator = LocalIterator;
80 using const_local_iterator = ConstLocalIterator;
81
82 using node_type = list_node<value_type>;
83
84 using place_type = tuple<
85 hash_table_bucket<value_type, size_type>*,
86 list_node<value_type>*, size_type
87 >;
88
89 hash_table(size_type buckets, float max_load_factor = 1.f)
90 : table_{new hash_table_bucket<value_type, size_type>[buckets]()},
91 bucket_count_{buckets}, size_{}, hasher_{}, key_eq_{},
92 key_extractor_{}, max_load_factor_{max_load_factor}
93 { /* DUMMY BODY */ }
94
95 hash_table(size_type buckets, const hasher& hf, const key_equal& eql,
96 float max_load_factor = 1.f)
97 : table_{new hash_table_bucket<value_type, size_type>[buckets]()},
98 bucket_count_{buckets}, size_{}, hasher_{hf}, key_eq_{eql},
99 key_extractor_{}, max_load_factor_{max_load_factor}
100 { /* DUMMY BODY */ }
101
102 hash_table(const hash_table& other)
103 : hash_table{other.bucket_count_, other.hasher_, other.key_eq_,
104 other.max_load_factor_}
105 {
106 for (const auto& x: other)
107 insert(x);
108 }
109
110 hash_table(hash_table&& other)
111 : table_{other.table_}, bucket_count_{other.bucket_count_},
112 size_{other.size_}, hasher_{move(other.hasher_)},
113 key_eq_{move(other.key_eq_)}, key_extractor_{move(other.key_extractor_)},
114 max_load_factor_{other.max_load_factor_}
115 {
116 other.table_ = nullptr;
117 other.bucket_count_ = size_type{};
118 other.size_ = size_type{};
119 other.max_load_factor_ = 1.f;
120 }
121
122 hash_table& operator=(const hash_table& other)
123 {
124 hash_table tmp{other};
125 tmp.swap(*this);
126
127 return *this;
128 }
129
130 hash_table& operator=(hash_table&& other)
131 {
132 hash_table tmp{move(other)};
133 tmp.swap(*this);
134
135 return *this;
136 }
137
138 bool empty() const noexcept
139 {
140 return size_ == 0;
141 }
142
143 size_type size() const noexcept
144 {
145 return size_;
146 }
147
148 size_type max_size(allocator_type& alloc)
149 {
150 return allocator_traits<allocator_type>::max_size(alloc);
151 }
152
153 iterator begin() noexcept
154 {
155 auto idx = first_filled_bucket_();
156 return iterator{
157 table_, idx, bucket_count_,
158 table_[idx].head
159 };
160 }
161
162 const_iterator begin() const noexcept
163 {
164 return cbegin();
165 }
166
167 iterator end() noexcept
168 {
169 return iterator{};
170 }
171
172 const_iterator end() const noexcept
173 {
174 return cend();
175 }
176
177 const_iterator cbegin() const noexcept
178 {
179 auto idx = first_filled_bucket_();
180 return const_iterator{
181 table_, idx, bucket_count_,
182 table_[idx].head
183 };
184 }
185
186 const_iterator cend() const noexcept
187 {
188 return const_iterator{};
189 }
190
191 template<class... Args>
192 auto emplace(Args&&... args)
193 {
194 return Policy::emplace(*this, forward<Args>(args)...);
195 }
196
197 auto insert(const value_type& val)
198 {
199 return Policy::insert(*this, val);
200 }
201
202 auto insert(value_type&& val)
203 {
204 return Policy::insert(*this, forward<value_type>(val));
205 }
206
207 size_type erase(const key_type& key)
208 {
209 return Policy::erase(*this, key);
210 }
211
212 iterator erase(const_iterator it)
213 {
214 auto node = it.node();
215 auto idx = it.idx();
216
217 /**
218 * Note: This way we will continue on the next bucket
219 * if this is the last element in its bucket.
220 */
221 iterator res{table_, idx, size_, node};
222 ++res;
223
224 if (table_[idx].head == node)
225 {
226 if (node->next != node)
227 table_[idx].head = node->next;
228 else
229 table_[idx].head = nullptr;
230 }
231
232 node->unlink();
233 delete node;
234
235 return res;
236 }
237
238 void clear() noexcept
239 {
240 for (size_type i = 0; i < bucket_count_; ++i)
241 table_[i].clear();
242 size_ = size_type{};
243 }
244
245 void swap(hash_table& other)
246 noexcept(allocator_traits<allocator_type>::is_always_equal::value &&
247 noexcept(swap(declval<Hasher&>(), declval<Hasher&>())) &&
248 noexcept(swap(declval<KeyEq&>(), declval<KeyEq&>())))
249 {
250 std::swap(table_, other.table_);
251 std::swap(bucket_count_, other.bucket_count_);
252 std::swap(size_, other.size_);
253 std::swap(hasher_, other.hasher_);
254 std::swap(key_eq_, other.key_eq_);
255 std::swap(max_load_factor_, other.max_load_factor_);
256 }
257
258 hasher hash_function() const
259 {
260 return hasher_;
261 }
262
263 key_equal key_eq() const
264 {
265 return key_eq_;
266 }
267
268 iterator find(const key_type& key)
269 {
270 auto idx = get_bucket_idx_(key);
271 auto head = table_[idx].head;
272
273 if (!head)
274 return end();
275
276 auto current = head;
277 do
278 {
279 if (key_eq_(key, key_extractor_(current->value)))
280 return iterator{table_, idx, size_, current};
281 current = current->next;
282 }
283 while (current != head);
284
285 return end();
286 }
287
288 const_iterator find(const key_type& key) const
289 {
290 auto idx = get_bucket_idx_(key);
291 auto head = table_[idx].head;
292
293 if (!head)
294 return end();
295
296 auto current = head;
297 do
298 {
299 if (key_eq_(key, key_extractor_(current->value)))
300 return iterator{table_, idx, size_, current};
301 current = current->next;
302 }
303 while (current != head);
304
305 return end();
306 }
307
308 size_type count(const key_type& key) const
309 {
310 return Policy::count(*this, key);
311 }
312
313 pair<iterator, iterator> equal_range(const key_type& key)
314 {
315 return Policy::equal_range(*this, key);
316 }
317
318 pair<const_iterator, const_iterator> equal_range(const key_type& key) const
319 {
320 return Policy::equal_range_const(*this, key);
321 }
322
323 size_type bucket_count() const noexcept
324 {
325 return bucket_count_;
326 }
327
328 size_type max_bucket_count() const noexcept
329 {
330 return numeric_limits<size_type>::max() /
331 sizeof(hash_table_bucket<value_type, size_type>);
332 }
333
334 size_type bucket_size(size_type n) const
335 {
336 return table_[n].size();
337 }
338
339 size_type bucket(const key_type& key) const
340 {
341 return get_bucket_idx_(key);
342 }
343
344 local_iterator begin(size_type n)
345 {
346 return local_iterator{table_[n].head, table_[n].head};
347 }
348
349 const_local_iterator begin(size_type n) const
350 {
351 return cbegin(n);
352 }
353
354 local_iterator end(size_type n)
355 {
356 return local_iterator{};
357 }
358
359 const_local_iterator end(size_type n) const
360 {
361 return cend(n);
362 }
363
364 const_local_iterator cbegin(size_type n) const
365 {
366 return const_local_iterator{table_[n].head, table_[n].head};
367 }
368
369 const_local_iterator cend(size_type n) const
370 {
371 return const_local_iterator{};
372 }
373
374 float load_factor() const noexcept
375 {
376 return size_ / static_cast<float>(bucket_count_);
377 }
378
379 float max_load_factor() const noexcept
380 {
381 return max_load_factor_;
382 }
383
384 void max_load_factor(float factor)
385 {
386 if (factor > 0.f)
387 max_load_factor_ = factor;
388
389 rehash_if_needed();
390 }
391
392 void rehash(size_type count)
393 {
394 if (count < size_ / max_load_factor_)
395 count = size_ / max_load_factor_;
396
397 /**
398 * Note: If an exception is thrown, there
399 * is no effect. Since this is the only
400 * place where an exception (no mem) can
401 * be thrown and no changes to this have been
402 * made, we're ok.
403 */
404 hash_table new_table{count, max_load_factor_};
405
406 for (std::size_t i = 0; i < bucket_count_; ++i)
407 {
408 auto head = table_[i].head;
409 if (!head)
410 continue;
411
412 auto current = head;
413
414 do
415 {
416 auto next = current->next;
417
418 current->next = current;
419 current->prev = current;
420
421 auto where = Policy::find_insertion_spot(
422 new_table, key_extractor_(current->value)
423 );
424
425 /**
426 * Note: We're rehashing, so we know each
427 * key can be inserted.
428 */
429 auto new_bucket = get<0>(where);
430 auto new_successor = get<1>(where);
431
432 if (new_successor)
433 new_successor->append(current);
434 else
435 new_bucket->append(current);
436
437 current = next;
438 } while (current != head);
439
440 table_[i].head = nullptr;
441 }
442
443 new_table.size_ = size_;
444 swap(new_table);
445
446 delete[] new_table.table_;
447 new_table.table_ = nullptr;
448 }
449
450 void reserve(size_type count)
451 {
452 rehash(count / max_load_factor_ + 1);
453 }
454
455 bool is_eq_to(const hash_table& other) const
456 {
457 if (size() != other.size())
458 return false;
459
460 auto it = begin();
461 while (it != end())
462 {
463 /**
464 * For each key K we will check how many
465 * instances of K are there in the table.
466 * Then we will check if the count for K
467 * is equal to that amount.
468 */
469
470 size_type cnt{};
471 auto tmp = it;
472
473 while (key_eq_(key_extractor_(*it), key_extractor_(*tmp)))
474 {
475 ++cnt;
476 if (++tmp == end())
477 break;
478 }
479
480 auto other_cnt = other.count(key_extractor_(*it));
481 if (cnt != other_cnt)
482 return false;
483
484 it = tmp; // tmp is one past *it's key.
485 }
486
487 return true;
488 }
489
490 ~hash_table()
491 {
492 // Lists are deleted in ~hash_table_bucket.
493 if (table_)
494 delete[] table_;
495 }
496
497 place_type find_insertion_spot(const key_type& key) const
498 {
499 return Policy::find_insertion_spot(*this, key);
500 }
501
502 place_type find_insertion_spot(key_type&& key) const
503 {
504 return Policy::find_insertion_spot(*this, key);
505 }
506
507 const key_type& get_key(const value_type& val) const
508 {
509 return key_extractor_(val);
510 }
511
512 bool keys_equal(const key_type& key, const value_type& val)
513 {
514 return key_eq_(key, key_extractor_(val));
515 }
516
517 bool keys_equal(const key_type& key, const value_type& val) const
518 {
519 return key_eq_(key, key_extractor_(val));
520 }
521
522 hash_table_bucket<value_type, size_type>* table()
523 {
524 return table_;
525 }
526
527 hash_table_bucket<value_type, size_type>* head(size_type idx)
528 {
529 if (idx < bucket_count_)
530 return table_[idx]->head;
531 else
532 return nullptr;
533 }
534
535 void rehash_if_needed()
536 {
537 if (size_ > max_load_factor_ * bucket_count_)
538 rehash(bucket_count_ * bucket_count_growth_factor_);
539 }
540
541 void increment_size()
542 {
543 ++size_;
544
545 rehash_if_needed();
546 }
547
548 void decrement_size()
549 {
550 --size_;
551 }
552
553 private:
554 hash_table_bucket<value_type, size_type>* table_;
555 size_type bucket_count_;
556 size_type size_;
557 hasher hasher_;
558 key_equal key_eq_;
559 key_extract key_extractor_;
560 float max_load_factor_;
561
562 static constexpr float bucket_count_growth_factor_{1.25};
563
564 size_type get_bucket_idx_(const key_type& key) const
565 {
566 return hasher_(key) % bucket_count_;
567 }
568
569 size_type first_filled_bucket_() const
570 {
571 size_type res{};
572 while (res < bucket_count_)
573 {
574 if (table_[res].head)
575 return res;
576 ++res;
577 }
578
579 /**
580 * Note: This is used for iterators,
581 * so we need to return a valid index.
582 * But since table_[0].head is nullptr
583 * we know that if we return 0 the
584 * created iterator will test as equal
585 * to end().
586 */
587 return 0;
588 }
589
590 friend Policy;
591 };
592}
593
594#endif
Note: See TracBrowser for help on using the repository browser.