/* * Copyright (c) 2018 Jaroslav Jindrak * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef LIBCPP_INTERNAL_HASH_TABLE #define LIBCPP_INTERNAL_HASH_TABLE #include #include #include #include #include #include namespace std::aux { /** * To save code, we're going to implement one hash table * for both unordered_map and unordered_set. To do this, * we create one inner hash table that is oblivious to its * held type (and uses a key extractor to get the key from it) * and two proxies that either use plain Key type or a pair * of a key and a value. * Note: I am aware the naming is very unimaginative here, * not my strong side :) */ template struct key_value_key_extractor { Key& operator()(pair& p) const noexcept { return p.first; } }; template struct key_no_value_key_extractor { Key& operator()(Key& k) const noexcept { return k; } }; struct key_value_allocator { /* DUMMY BODY */ }; template struct hash_table_bucket { /** * Note: We use a doubly linked list because * we need to use hints, which point to the * element after the hinted spot. */ list_node* head; Size size() const noexcept { // TODO: implement } void append(list_node* node) { if (!head) head = node; else head->append(node); } ~hash_table_bucket() { // TODO: deallocate the entire list } }; struct hash_single_policy { // TODO: umap/uset operations template static typename Table::size_type count(const Table& table, const Key& key) { return table.find(key) == table.end() ? 0 : 1; } template static typename Table::hint_type find_insertion_spot(const Table& table, const Key& key) { auto idx = table.get_bucket_idx_(key); return make_tuple( &table.table_[idx], table.table_[idx].head, idx ); } template static typename Table::size_type erase(Table& table, const Key& key) { auto idx = table.get_bucket_idx_(key); auto head = table.table_[idx].head; auto current = head; do { if (table.key_eq_(key, table.key_extractor_(current->value))) { --table.size_; if (current == head) { if (current->next != head) table.table_[idx].head = current->next; else table.table_[idx].head = nullptr; } current->unlink(); delete current; return 1; } else current = current->next; } while (current != head); return 0; } }; struct hash_multi_policy { // TODO: umultimap/umultiset operations template static typename Table::size_type count(const Table& table, const Key& key) { auto head = table.table_[get_bucket_idx_(key)].head; if (!head) return 0; auto current = head; typename Table::size_type res = 0; do { if (table.key_eq_(key, table.key_extractor_(current->value))) ++res; current = current->next; } while (current != head); return res; } template static typename Table::hint_type find_insertion_spot(const Table& table, const Key& key) { // TODO: find the right bucket, in that, find the right // link (if key is already in it, place the new copy // next to it, otherwise just return head) } template static typename Table::size_type erase(Table& table, const Key& key) { // TODO: erase all items with given key } }; template class hash_table_const_iterator { public: using value_type = Value; using size_type = Size; using const_reference = ConstReference; using const_pointer = ConstPointer; using difference_type = ptrdiff_t; using iterator_category = forward_iterator_tag; hash_table_const_iterator(const hash_table_bucket* table = nullptr, size_type idx = size_type{}, size_type max_idx = size_type{}, const list_node* current = nullptr) : table_{table}, idx_{idx}, max_idx_{max_idx}, current_{current} { /* DUMMY BODY */ } hash_table_const_iterator(const hash_table_const_iterator&) = default; hash_table_const_iterator& operator=(const hash_table_const_iterator&) = default; const_reference operator*() const { return current_->value; } const_pointer operator->() const { return ¤t_->value; } hash_table_const_iterator& operator++() { current_ = current_->next; if (current_ == table_[idx_].head) { if (idx_ < max_idx_) { while (!table_[++idx_].head) { /* DUMMY BODY */ } if (idx_ < max_idx_) current_ = table_[idx_].head; else current_ = nullptr; } else current_ = nullptr; } return *this; } hash_table_const_iterator operator++(int) { auto tmp_current = current_; auto tmp_idx = idx_; current_ = current_->next; if (current_ == table_[idx_].head) { if (idx_ < max_idx_) { while (!table_[++idx_].head) { /* DUMMY BODY */ } if (idx_ < max_idx_) current_ = table_[idx_].head; else current_ = nullptr; } else current_ = nullptr; } return hash_table_const_iterator{ table_, tmp_idx, max_idx_, tmp_current }; } list_node* node() { return const_cast*>(current_); } const list_node* node() const { return current_; } size_type idx() const { return idx_; } private: const hash_table_bucket* table_; size_type idx_; size_type max_idx_; const list_node* current_; }; template bool operator==(const hash_table_const_iterator& lhs, const hash_table_const_iterator& rhs) { return lhs.node() == rhs.node(); } template bool operator!=(const hash_table_const_iterator& lhs, const hash_table_const_iterator& rhs) { return !(lhs == rhs); } template class hash_table_iterator { public: using value_type = Value; using size_type = Size; using reference = Reference; using pointer = Pointer; using difference_type = ptrdiff_t; using iterator_category = forward_iterator_tag; hash_table_iterator(hash_table_bucket* table = nullptr, size_type idx = size_type{}, size_type max_idx = size_type{}, list_node* current = nullptr) : table_{table}, idx_{idx}, max_idx_{max_idx}, current_{current} { /* DUMMY BODY */ } hash_table_iterator(const hash_table_iterator&) = default; hash_table_iterator& operator=(const hash_table_iterator&) = default; reference operator*() { return current_->value; } pointer operator->() { return ¤t_->value; } hash_table_iterator& operator++() { current_ = current_->next; if (current_ == table_[idx_].head) { if (idx_ < max_idx_) { while (!table_[++idx_].head) { /* DUMMY BODY */ } if (idx_ < max_idx_) current_ = table_[idx_].head; else current_ = nullptr; } else current_ = nullptr; } return *this; } hash_table_iterator operator++(int) { auto tmp_current = current_; auto tmp_idx = idx_; current_ = current_->next; if (current_ == table_[idx_].head) { if (idx_ < max_idx_) { while (!table_[++idx_].head) { /* DUMMY BODY */ } if (idx_ < max_idx_) current_ = table_[idx_].head; else current_ = nullptr; } else current_ = nullptr; } return hash_table_iterator{ table_, tmp_idx, max_idx_, tmp_current }; } list_node* node() { return current_; } const list_node* node() const { return current_; } size_type idx() const { return idx_; } template operator hash_table_const_iterator< Value, ConstRef, ConstPtr, Size >() const { return hash_table_const_iterator{ table_, idx_, max_idx_, current_ }; } private: hash_table_bucket* table_; size_type idx_; size_type max_idx_; list_node* current_; }; template bool operator==(const hash_table_iterator& lhs, const hash_table_iterator& rhs) { return lhs.node() == rhs.node(); } template bool operator!=(const hash_table_iterator& lhs, const hash_table_iterator& rhs) { return !(lhs == rhs); } template class hash_table_const_local_iterator { public: using value_type = Value; using const_reference = ConstReference; using const_pointer = ConstPointer; using difference_type = ptrdiff_t; using iterator_category = forward_iterator_tag; // TODO: requirement for forward iterator is default constructibility, fix others! hash_table_const_local_iterator(const list_node* head = nullptr, const list_node* current = nullptr) : head_{head}, current_{current} { /* DUMMY BODY */ } hash_table_const_local_iterator(const hash_table_const_local_iterator&) = default; hash_table_const_local_iterator& operator=(const hash_table_const_local_iterator&) = default; const_reference operator*() const { return current_->value; } const_pointer operator->() const { return ¤t_->value; } hash_table_const_local_iterator& operator++() { current_ = current_->next; if (current_ == head_) current_ = nullptr; return *this; } hash_table_const_local_iterator operator++(int) { auto tmp = current_; current_ = current_->next; if (current_ == head_) current_ = nullptr; return hash_table_const_local_iterator{head_, tmp}; } list_node* node() { return const_cast*>(current_); } const list_node* node() const { return current_; } private: const list_node* head_; const list_node* current_; }; template bool operator==(const hash_table_const_local_iterator& lhs, const hash_table_const_local_iterator& rhs) { return lhs.node() == rhs.node(); } template bool operator!=(const hash_table_const_local_iterator& lhs, const hash_table_const_local_iterator& rhs) { return !(lhs == rhs); } template class hash_table_local_iterator { public: using value_type = Value; using reference = Reference; using pointer = Pointer; using difference_type = ptrdiff_t; using iterator_category = forward_iterator_tag; hash_table_local_iterator(list_node* head = nullptr, list_node* current = nullptr) : head_{head}, current_{current} { /* DUMMY BODY */ } hash_table_local_iterator(const hash_table_local_iterator&) = default; hash_table_local_iterator& operator=(const hash_table_local_iterator&) = default; reference operator*() { return current_->value; } pointer operator->() { return ¤t_->value; } hash_table_local_iterator& operator++() { current_ = current_->next; if (current_ == head_) current_ = nullptr; return *this; } hash_table_local_iterator operator++(int) { auto tmp = current_; current_ = current_->next; if (current_ == head_) current_ = nullptr; return hash_table_local_iterator{head_, tmp}; } list_node* node() { return current_; } const list_node* node() const { return current_; } template operator hash_table_const_local_iterator< Value, ConstRef, ConstPtr >() const { return hash_table_const_local_iterator< value_type, ConstRef, ConstPtr >{head_, current_}; } private: list_node* head_; list_node* current_; }; template bool operator==(const hash_table_local_iterator& lhs, const hash_table_local_iterator& rhs) { return lhs.node() == rhs.node(); } template bool operator!=(const hash_table_local_iterator& lhs, const hash_table_local_iterator& rhs) { return !(lhs == rhs); } template< class Value, class Key, class KeyExtractor, class Hasher, class KeyEq, class Alloc, class Size, class Iterator, class ConstIterator, class LocalIterator, class ConstLocalIterator, class Policy > class hash_table { public: using value_type = Value; using key_type = Key; using size_type = Size; using allocator_type = Alloc; using key_equal = KeyEq; using hasher = Hasher; using key_extract = KeyExtractor; using iterator = Iterator; using const_iterator = ConstIterator; using local_iterator = LocalIterator; using const_local_iterator = ConstLocalIterator; using hint_type = tuple< hash_table_bucket*, list_node*, size_type >; hash_table(size_type buckets, float max_load_factor = 1.f) : table_{new hash_table_bucket[buckets]}, bucket_count_{buckets}, size_{}, hasher_{}, key_eq_{}, key_extractor_{}, max_load_factor_{max_load_factor} { /* DUMMY BODY */ } bool empty() const noexcept { return size_ == 0; } size_type size() const noexcept { return size_; } template size_type max_size(Allocator& alloc) { return allocator_traits::max_size(alloc); } iterator begin() noexcept { return iterator{ table_, size_type{}, bucket_count_, table_[0].head }; } const_iterator begin() const noexcept { return cbegin(); } iterator end() noexcept { return iterator{}; } const_iterator end() const noexcept { return cend(); } const_iterator cbegin() const noexcept { return const_iterator{ table_, size_type{}, bucket_count_, table_[0].head }; } const_iterator cend() const noexcept { return const_iterator{}; } template iterator emplace(Allocator& alloc, Args&&... args) { // TODO: use allocator traits of allocator_type but pass alloc! // TODO: also, try_emplace should be one level above (we don't know // keys) } void insert(const hint_type& where, const value_type& val) { if (!hint_ok_(where)) return; auto node = new list_node{val}; if (get<1>(where) == nullptr) // Append here will create a new head. get<0>(where)->append(node); else // Prepending before an exact position is common in the standard. get<1>(where)->prepend(node); ++size_; // TODO: if we go over max load factor, rehash } void insert(const hint_type& where, value_type&& val) { if (!hint_ok_(where)) return; auto node = new list_node{forward(val)}; if (get<1>(where) == nullptr) get<0>(where)->append(node); else get<1>(where)->prepend(node); ++size_; // TODO: if we go over max load factor, rehash } size_type erase(const key_type& key) { return Policy::erase(*this, key); } iterator erase(const_iterator it) { auto node = it.node(); auto idx = it.idx(); /** * Note: This way we will continue on the next bucket * if this is the last element in its bucket. */ iterator res{table_, idx, size_, node}; ++res; if (table_[idx].head == node) { if (node->next != node) table_[idx].head = node->next; else table_[idx].head = nullptr; } node->unlink(); delete node; return res; } void clear() noexcept { delete[] table_; size_ = size_type{}; table_ = new hash_table_bucket[bucket_count_]; } template void swap(hash_table& other) noexcept(allocator_traits::is_always_equal::value && noexcept(swap(declval(), declval())) && noexcept(swap(declval(), declval()))) { std::swap(table_, other.table_); std::swap(bucket_count_, other.bucket_count_); std::swap(size_, other.size_); std::swap(hasher_, other.hasher_); std::swap(key_eq_, other.key_eq_); std::swap(max_load_factor_, other.max_load_factor_); } hasher hash_function() const { return hasher_; } key_equal key_eq() const { return key_eq_; } iterator find(const key_type& key) { // TODO: implement } const_iterator find(const key_type& key) const { // TODO: implement } size_type count(const key_type& key) const { return Policy::count(*this, key); } pair equal_range(const key_type& key) { // TODO: implement } pair equal_range(const key_type& key) const { // TODO: implement } size_type bucket_count() const noexcept { return bucket_count_; } size_type max_bucket_count() const noexcept { // TODO: implement return 0; } size_type bucket_size(size_type n) const { return table_[n].size(); } size_type bucket(const key_type& key) const { return get_bucket_idx_(key); } local_iterator begin(size_type n) { return local_iterator{table_[n].head, table_[n].head}; } const_local_iterator begin(size_type n) const { return cbegin(n); } local_iterator end(size_type n) { return local_iterator{}; } const_local_iterator end(size_type n) const { return cend(n); } const_local_iterator cbegin(size_type n) const { return const_local_iterator{table_[n].head, table_[n].head}; } const_local_iterator cend(size_type n) const { return const_local_iterator{}; } float load_factor() const noexcept { return size_ / static_cast(bucket_count_); } float max_load_factor() const noexcept { return max_load_factor_; } void max_load_factor(float factor) { /** * Note: According to the standard, this function * can have no effect. */ } void rehash(size_type n) { // TODO: implement // TODO: exception thrown in rehash means the rehash // has no effect, so we create a new table, insert in it // and then swap } void reserve(size_type n) { // TODO: implement } bool is_eq_to(hash_table& other) { // TODO: implement return false; } ~hash_table() { // Lists are deleted in ~hash_table_bucket. delete[] table_; } void set_hint(const_iterator hint) { // TODO: hint_ should be a ptr and we extract it here, // then set it to nullptr after each operation } hint_type find_insertion_spot(const key_type& key) { return Policy::find_insertion_spot(*this, key); } hint_type find_insertion_spot(key_type&& key) { return Policy::find_insertion_spot(*this, key); } /* private: */ hash_table_bucket* table_; size_type bucket_count_; size_type size_; hasher hasher_; key_equal key_eq_; key_extract key_extractor_; float max_load_factor_; size_type get_bucket_idx_(const key_type& key) const { return hasher_(key) % bucket_count_; } bool hint_ok_(const hint_type& hint) { // TODO: pass this to the policy, because the multi policy // will need to check if a similar key is close, // that is something like: // return get<1>(hint)->prev->key == key || !bucket.contains(key) return get<0>(hint) != nullptr && get<2>(hint) < bucket_count_; } // Praise C++11 for this. friend Policy; }; } namespace std { template<> struct allocator_traits { template static void construct(Alloc& alloc, pair* ptr, Args&&... args) { alloc.construct(&ptr->second, forward(args)...); } }; } #endif