summaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Common/HashTable/TwoLevelHashTable.h
diff options
context:
space:
mode:
authorAlexSm <[email protected]>2024-01-04 15:09:05 +0100
committerGitHub <[email protected]>2024-01-04 15:09:05 +0100
commitdab291146f6cd7d35684e3a1150e5bb1c412982c (patch)
tree36ef35f6cacb6432845a4a33f940c95871036b32 /contrib/clickhouse/src/Common/HashTable/TwoLevelHashTable.h
parent63660ad5e7512029fd0218e7a636580695a24e1f (diff)
Library import 5, delete go dependencies (#832)
* Library import 5, delete go dependencies * Fix yt client
Diffstat (limited to 'contrib/clickhouse/src/Common/HashTable/TwoLevelHashTable.h')
-rw-r--r--contrib/clickhouse/src/Common/HashTable/TwoLevelHashTable.h348
1 files changed, 0 insertions, 348 deletions
diff --git a/contrib/clickhouse/src/Common/HashTable/TwoLevelHashTable.h b/contrib/clickhouse/src/Common/HashTable/TwoLevelHashTable.h
deleted file mode 100644
index a60655e4072..00000000000
--- a/contrib/clickhouse/src/Common/HashTable/TwoLevelHashTable.h
+++ /dev/null
@@ -1,348 +0,0 @@
-#pragma once
-
-#include <Common/HashTable/HashTable.h>
-
-
-/** Two-level hash table.
- * Represents 256 (or 1ULL << BITS_FOR_BUCKET) small hash tables (buckets of the first level).
- * To determine which one to use, one of the bytes of the hash function is taken.
- *
- * Usually works a little slower than a simple hash table.
- * However, it has advantages in some cases:
- * - if you need to merge two hash tables together, then you can easily parallelize it by buckets;
- * - delay during resizes is amortized, since the small hash tables will be resized separately;
- * - in theory, resizes are cache-local in a larger range of sizes.
- */
-
-template <size_t initial_size_degree = 8>
-struct TwoLevelHashTableGrower : public HashTableGrowerWithPrecalculation<initial_size_degree>
-{
- /// Increase the size of the hash table.
- void increaseSize() { this->increaseSizeDegree(this->sizeDegree() >= 15 ? 1 : 2); }
-};
-
-template
-<
- typename Key,
- typename Cell,
- typename Hash,
- typename Grower,
- typename Allocator,
- typename ImplTable = HashTable<Key, Cell, Hash, Grower, Allocator>,
- size_t BITS_FOR_BUCKET = 8
->
-class TwoLevelHashTable :
- private boost::noncopyable,
- protected Hash /// empty base optimization
-{
-protected:
- friend class const_iterator;
- friend class iterator;
-
- using HashValue = size_t;
- using Self = TwoLevelHashTable;
-public:
- using Impl = ImplTable;
-
- static constexpr UInt32 NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET;
- static constexpr UInt32 MAX_BUCKET = NUM_BUCKETS - 1;
-
- size_t hash(const Key & x) const { return Hash::operator()(x); }
-
- /// NOTE Bad for hash tables with more than 2^32 cells.
- static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; }
-
-protected:
- typename Impl::iterator beginOfNextNonEmptyBucket(size_t & bucket)
- {
- while (bucket != NUM_BUCKETS && impls[bucket].empty())
- ++bucket;
-
- if (bucket != NUM_BUCKETS)
- return impls[bucket].begin();
-
- --bucket;
- return impls[MAX_BUCKET].end();
- }
-
- typename Impl::const_iterator beginOfNextNonEmptyBucket(size_t & bucket) const
- {
- while (bucket != NUM_BUCKETS && impls[bucket].empty())
- ++bucket;
-
- if (bucket != NUM_BUCKETS)
- return impls[bucket].begin();
-
- --bucket;
- return impls[MAX_BUCKET].end();
- }
-
-public:
- using key_type = typename Impl::key_type;
- using mapped_type = typename Impl::mapped_type;
- using value_type = typename Impl::value_type;
- using cell_type = typename Impl::cell_type;
-
- using LookupResult = typename Impl::LookupResult;
- using ConstLookupResult = typename Impl::ConstLookupResult;
-
- Impl impls[NUM_BUCKETS];
-
-
- TwoLevelHashTable() = default;
-
- explicit TwoLevelHashTable(size_t size_hint)
- {
- for (auto & impl : impls)
- impl.reserve(size_hint / NUM_BUCKETS);
- }
-
- /// Copy the data from another (normal) hash table. It should have the same hash function.
- template <typename Source>
- explicit TwoLevelHashTable(const Source & src)
- {
- typename Source::const_iterator it = src.begin();
-
- /// It is assumed that the zero key (stored separately) is first in iteration order.
- if (it != src.end() && it.getPtr()->isZero(src))
- {
- insert(it->getValue());
- ++it;
- }
-
- for (; it != src.end(); ++it)
- {
- const Cell * cell = it.getPtr();
- size_t hash_value = cell->getHash(src);
- size_t buck = getBucketFromHash(hash_value);
- impls[buck].insertUniqueNonZero(cell, hash_value);
- }
- }
-
-
- class iterator /// NOLINT
- {
- Self * container{};
- size_t bucket{};
- typename Impl::iterator current_it{};
-
- friend class TwoLevelHashTable;
-
- iterator(Self * container_, size_t bucket_, typename Impl::iterator current_it_)
- : container(container_), bucket(bucket_), current_it(current_it_) {}
-
- public:
- iterator() = default;
-
- bool operator== (const iterator & rhs) const { return bucket == rhs.bucket && current_it == rhs.current_it; }
- bool operator!= (const iterator & rhs) const { return !(*this == rhs); }
-
- iterator & operator++()
- {
- ++current_it;
- if (current_it == container->impls[bucket].end())
- {
- ++bucket;
- current_it = container->beginOfNextNonEmptyBucket(bucket);
- }
-
- return *this;
- }
-
- Cell & operator* () const { return *current_it; }
- Cell * operator->() const { return current_it.getPtr(); }
-
- Cell * getPtr() const { return current_it.getPtr(); }
- size_t getHash() const { return current_it.getHash(); }
- };
-
-
- class const_iterator /// NOLINT
- {
- const Self * container{};
- size_t bucket{};
- typename Impl::const_iterator current_it{};
-
- friend class TwoLevelHashTable;
-
- const_iterator(const Self * container_, size_t bucket_, typename Impl::const_iterator current_it_)
- : container(container_), bucket(bucket_), current_it(current_it_)
- {
- }
-
- public:
- const_iterator() = default;
- const_iterator(const iterator & rhs) : container(rhs.container), bucket(rhs.bucket), current_it(rhs.current_it) {} /// NOLINT
-
- bool operator== (const const_iterator & rhs) const { return bucket == rhs.bucket && current_it == rhs.current_it; }
- bool operator!= (const const_iterator & rhs) const { return !(*this == rhs); }
-
- const_iterator & operator++()
- {
- ++current_it;
- if (current_it == container->impls[bucket].end())
- {
- ++bucket;
- current_it = container->beginOfNextNonEmptyBucket(bucket);
- }
-
- return *this;
- }
-
- const Cell & operator* () const { return *current_it; }
- const Cell * operator->() const { return current_it->getPtr(); }
-
- const Cell * getPtr() const { return current_it.getPtr(); }
- size_t getHash() const { return current_it.getHash(); }
- };
-
-
- const_iterator begin() const
- {
- size_t buck = 0;
- typename Impl::const_iterator impl_it = beginOfNextNonEmptyBucket(buck);
- return { this, buck, impl_it };
- }
-
- iterator begin()
- {
- size_t buck = 0;
- typename Impl::iterator impl_it = beginOfNextNonEmptyBucket(buck);
- return { this, buck, impl_it };
- }
-
- const_iterator end() const { return { this, MAX_BUCKET, impls[MAX_BUCKET].end() }; }
- iterator end() { return { this, MAX_BUCKET, impls[MAX_BUCKET].end() }; }
-
-
- /// Insert a value. In the case of any more complex values, it is better to use the `emplace` function.
- std::pair<LookupResult, bool> ALWAYS_INLINE insert(const value_type & x)
- {
- size_t hash_value = hash(Cell::getKey(x));
-
- std::pair<LookupResult, bool> res;
- emplace(Cell::getKey(x), res.first, res.second, hash_value);
-
- if (res.second)
- res.first->setMapped(x);
-
- return res;
- }
-
- template <typename KeyHolder>
- void ALWAYS_INLINE prefetch(KeyHolder && key_holder) const
- {
- const auto & key = keyHolderGetKey(key_holder);
- const auto key_hash = hash(key);
- const auto bucket = getBucketFromHash(key_hash);
- impls[bucket].prefetchByHash(key_hash);
- }
-
- /** Insert the key,
- * return an iterator to a position that can be used for `placement new` of value,
- * as well as the flag - whether a new key was inserted.
- *
- * You have to make `placement new` values if you inserted a new key,
- * since when destroying a hash table, the destructor will be invoked for it!
- *
- * Example usage:
- *
- * Map::iterator it;
- * bool inserted;
- * map.emplace(key, it, inserted);
- * if (inserted)
- * new(&it->second) Mapped(value);
- */
- template <typename KeyHolder>
- void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
- {
- size_t hash_value = hash(keyHolderGetKey(key_holder));
- emplace(key_holder, it, inserted, hash_value);
- }
-
-
- /// Same, but with a precalculated values of hash function.
- template <typename KeyHolder>
- void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it,
- bool & inserted, size_t hash_value)
- {
- size_t buck = getBucketFromHash(hash_value);
- impls[buck].emplace(key_holder, it, inserted, hash_value);
- }
-
- LookupResult ALWAYS_INLINE find(Key x, size_t hash_value)
- {
- size_t buck = getBucketFromHash(hash_value);
- return impls[buck].find(x, hash_value);
- }
-
- ConstLookupResult ALWAYS_INLINE find(Key x, size_t hash_value) const
- {
- return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x, hash_value);
- }
-
- LookupResult ALWAYS_INLINE find(Key x) { return find(x, hash(x)); }
-
- ConstLookupResult ALWAYS_INLINE find(Key x) const { return find(x, hash(x)); }
-
-
- void write(DB::WriteBuffer & wb) const
- {
- for (UInt32 i = 0; i < NUM_BUCKETS; ++i)
- impls[i].write(wb);
- }
-
- void writeText(DB::WriteBuffer & wb) const
- {
- for (UInt32 i = 0; i < NUM_BUCKETS; ++i)
- {
- if (i != 0)
- DB::writeChar(',', wb);
- impls[i].writeText(wb);
- }
- }
-
- void read(DB::ReadBuffer & rb)
- {
- for (UInt32 i = 0; i < NUM_BUCKETS; ++i)
- impls[i].read(rb);
- }
-
- void readText(DB::ReadBuffer & rb)
- {
- for (UInt32 i = 0; i < NUM_BUCKETS; ++i)
- {
- if (i != 0)
- DB::assertChar(',', rb);
- impls[i].readText(rb);
- }
- }
-
-
- size_t size() const
- {
- size_t res = 0;
- for (UInt32 i = 0; i < NUM_BUCKETS; ++i)
- res += impls[i].size();
-
- return res;
- }
-
- bool empty() const
- {
- for (UInt32 i = 0; i < NUM_BUCKETS; ++i)
- if (!impls[i].empty())
- return false;
-
- return true;
- }
-
- size_t getBufferSizeInBytes() const
- {
- size_t res = 0;
- for (UInt32 i = 0; i < NUM_BUCKETS; ++i)
- res += impls[i].getBufferSizeInBytes();
-
- return res;
- }
-};