aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/containers
diff options
context:
space:
mode:
authormonster <monster@ydb.tech>2022-07-07 14:41:37 +0300
committermonster <monster@ydb.tech>2022-07-07 14:41:37 +0300
commit06e5c21a835c0e923506c4ff27929f34e00761c2 (patch)
tree75efcbc6854ef9bd476eb8bf00cc5c900da436a2 /library/cpp/containers
parent03f024c4412e3aa613bb543cf1660176320ba8f4 (diff)
downloadydb-06e5c21a835c0e923506c4ff27929f34e00761c2.tar.gz
fix ya.make
Diffstat (limited to 'library/cpp/containers')
-rw-r--r--library/cpp/containers/absl_flat_hash/flat_hash_map.cpp1
-rw-r--r--library/cpp/containers/absl_flat_hash/flat_hash_map.h3
-rw-r--r--library/cpp/containers/absl_flat_hash/flat_hash_set.cpp1
-rw-r--r--library/cpp/containers/absl_flat_hash/flat_hash_set.h3
-rw-r--r--library/cpp/containers/comptrie/loader/loader.cpp1
-rw-r--r--library/cpp/containers/comptrie/loader/loader.h22
-rw-r--r--library/cpp/containers/comptrie/loader/loader_ut.cpp30
-rw-r--r--library/cpp/containers/comptrie/loader/ut/dummy.triebin25 -> 0 bytes
-rw-r--r--library/cpp/containers/dense_hash/dense_hash.h652
-rw-r--r--library/cpp/containers/dense_hash/fwd.h16
-rw-r--r--library/cpp/containers/limited_heap/limited_heap.h62
11 files changed, 738 insertions, 53 deletions
diff --git a/library/cpp/containers/absl_flat_hash/flat_hash_map.cpp b/library/cpp/containers/absl_flat_hash/flat_hash_map.cpp
new file mode 100644
index 0000000000..899b8a2321
--- /dev/null
+++ b/library/cpp/containers/absl_flat_hash/flat_hash_map.cpp
@@ -0,0 +1 @@
+#include "flat_hash_map.h"
diff --git a/library/cpp/containers/absl_flat_hash/flat_hash_map.h b/library/cpp/containers/absl_flat_hash/flat_hash_map.h
new file mode 100644
index 0000000000..62e2dab5fe
--- /dev/null
+++ b/library/cpp/containers/absl_flat_hash/flat_hash_map.h
@@ -0,0 +1,3 @@
+#pragma once
+
+#include <absl/container/flat_hash_map.h>
diff --git a/library/cpp/containers/absl_flat_hash/flat_hash_set.cpp b/library/cpp/containers/absl_flat_hash/flat_hash_set.cpp
new file mode 100644
index 0000000000..be779b32df
--- /dev/null
+++ b/library/cpp/containers/absl_flat_hash/flat_hash_set.cpp
@@ -0,0 +1 @@
+#include "flat_hash_set.h"
diff --git a/library/cpp/containers/absl_flat_hash/flat_hash_set.h b/library/cpp/containers/absl_flat_hash/flat_hash_set.h
new file mode 100644
index 0000000000..2f0d92a953
--- /dev/null
+++ b/library/cpp/containers/absl_flat_hash/flat_hash_set.h
@@ -0,0 +1,3 @@
+#pragma once
+
+#include <absl/container/flat_hash_set.h>
diff --git a/library/cpp/containers/comptrie/loader/loader.cpp b/library/cpp/containers/comptrie/loader/loader.cpp
deleted file mode 100644
index 4811e9d521..0000000000
--- a/library/cpp/containers/comptrie/loader/loader.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "loader.h"
diff --git a/library/cpp/containers/comptrie/loader/loader.h b/library/cpp/containers/comptrie/loader/loader.h
deleted file mode 100644
index ee10e9b451..0000000000
--- a/library/cpp/containers/comptrie/loader/loader.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#pragma once
-
-#include <library/cpp/archive/yarchive.h>
-#include <util/generic/string.h>
-#include <util/generic/ptr.h>
-#include <util/generic/yexception.h>
-#include <util/memory/blob.h>
-
-template <class TrieType, size_t N>
-TrieType LoadTrieFromArchive(const TString& key,
- const unsigned char (&data)[N],
- bool ignoreErrors = false) {
- TArchiveReader archive(TBlob::NoCopy(data, sizeof(data)));
- if (archive.Has(key)) {
- TAutoPtr<IInputStream> trie = archive.ObjectByKey(key);
- return TrieType(TBlob::FromStream(*trie));
- }
- if (!ignoreErrors) {
- ythrow yexception() << "Resource " << key << " not found";
- }
- return TrieType();
-}
diff --git a/library/cpp/containers/comptrie/loader/loader_ut.cpp b/library/cpp/containers/comptrie/loader/loader_ut.cpp
deleted file mode 100644
index 345063a31e..0000000000
--- a/library/cpp/containers/comptrie/loader/loader_ut.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-#include <library/cpp/testing/unittest/registar.h>
-#include <library/cpp/containers/comptrie/comptrie.h>
-#include <library/cpp/containers/comptrie/loader/loader.h>
-
-using TDummyTrie = TCompactTrie<char, i32>;
-
-namespace {
- const unsigned char DATA[] = {
-#include "data.inc"
- };
-}
-
-Y_UNIT_TEST_SUITE(ArchiveLoaderTests) {
- Y_UNIT_TEST(BaseTest) {
- TDummyTrie trie = LoadTrieFromArchive<TDummyTrie>("/dummy.trie", DATA, true);
- UNIT_ASSERT_EQUAL(trie.Size(), 3);
-
- const TString TrieKyes[3] = {
- "zero", "one", "two"};
- i32 val = -1;
- for (i32 i = 0; i < 3; ++i) {
- UNIT_ASSERT(trie.Find(TrieKyes[i].data(), TrieKyes[i].size(), &val));
- UNIT_ASSERT_EQUAL(i, val);
- }
-
- UNIT_CHECK_GENERATED_EXCEPTION(
- LoadTrieFromArchive<TDummyTrie>("/noname.trie", DATA),
- yexception);
- }
-}
diff --git a/library/cpp/containers/comptrie/loader/ut/dummy.trie b/library/cpp/containers/comptrie/loader/ut/dummy.trie
deleted file mode 100644
index 4af18add2f..0000000000
--- a/library/cpp/containers/comptrie/loader/ut/dummy.trie
+++ /dev/null
Binary files differ
diff --git a/library/cpp/containers/dense_hash/dense_hash.h b/library/cpp/containers/dense_hash/dense_hash.h
new file mode 100644
index 0000000000..5dae848739
--- /dev/null
+++ b/library/cpp/containers/dense_hash/dense_hash.h
@@ -0,0 +1,652 @@
+#pragma once
+
+#include "fwd.h"
+
+#include <util/generic/utility.h>
+#include <util/generic/vector.h>
+#include <util/generic/mapfindptr.h>
+
+#include <util/str_stl.h>
+#include <util/ysaveload.h>
+
+/*
+ * There are 2 classes in this file:
+ * - TDenseHash - analog of THashMap
+ * - TDenseHashSet - analog of THashSet
+ */
+
+/*
+ * Implements dense-hash, in some circumstances it is a lot (2x) faster than THashMap.
+ * We support only adding new elements.
+ * TKey value equal to EmptyMarker (by default, it is TKey())
+ * can not be inserted into hash - it is used as marker of empty element.
+ * TValue type must be default constructible
+ */
+
+template <class TKey,
+ class TValue,
+ class TKeyHash,
+ size_t MaxLoadFactor,
+ size_t LogInitSize>
+class TDenseHash : public TMapOps<TDenseHash<TKey, TValue, TKeyHash, MaxLoadFactor, LogInitSize>> {
+private:
+ template <class THash, class TVal>
+ class TIteratorBase {
+ friend class TDenseHash;
+
+ template <class THash2, class TVal2>
+ friend class TIteratorBase;
+
+ THash* Hash;
+ size_t Idx;
+
+ // used only to implement end()
+ TIteratorBase(THash* hash, size_t initIdx)
+ : Hash(hash)
+ , Idx(initIdx)
+ {
+ }
+
+ public:
+ TIteratorBase(THash& hash)
+ : Hash(&hash)
+ , Idx(0)
+ {
+ if (Hash->EmptyMarker == Hash->Buckets[Idx].first) {
+ Next();
+ }
+ }
+
+ template <class THash2, class TVal2>
+ TIteratorBase(const TIteratorBase<THash2, TVal2>& it)
+ : Hash(it.Hash)
+ , Idx(it.Idx)
+ {
+ }
+
+ TIteratorBase(const TIteratorBase&) = default;
+
+ static TIteratorBase CreateEmpty() {
+ return TIteratorBase(nullptr, 0);
+ }
+
+ TIteratorBase& operator=(const TIteratorBase&) = default;
+
+ void Next() {
+ ++Idx;
+ while (Idx < Hash->Buckets.size() && Hash->EmptyMarker == Hash->Buckets[Idx].first) {
+ ++Idx;
+ }
+ }
+
+ TIteratorBase& operator++() {
+ Next();
+ return *this;
+ }
+
+ TVal& operator*() {
+ return Hash->Buckets[Idx];
+ }
+
+ TVal* operator->() {
+ return &Hash->Buckets[Idx];
+ }
+
+ const TVal* operator->() const {
+ return &Hash->Buckets[Idx];
+ }
+
+ THash* GetHash() {
+ return Hash;
+ }
+
+ bool operator==(const TIteratorBase& rhs) const {
+ Y_ASSERT(Hash == rhs.Hash);
+ return Idx == rhs.Idx;
+ }
+
+ bool operator!=(const TIteratorBase& rhs) const {
+ return !(*this == rhs);
+ }
+ };
+
+public:
+ using key_type = TKey;
+ using mapped_type = TValue;
+ using value_type = std::pair<const key_type, mapped_type>;
+ using size_type = std::size_t;
+ using difference_type = std::ptrdiff_t;
+ using hasher = TKeyHash;
+ using key_equal = std::equal_to<key_type>; // TODO(tender-bum): template argument
+ // using allocator_type = ...
+ using reference = value_type&;
+ using const_reference = const value_type&;
+ using pointer = value_type*; // TODO(tender-bum): std::allocator_traits<Alloc>::pointer;
+ using const_pointer = const value_type*; // TODO(tender-bum):
+ // std::allocator_traits<Alloc>::const_pointer;
+ using iterator = TIteratorBase<TDenseHash, value_type>;
+ using const_iterator = TIteratorBase<const TDenseHash, const value_type>;
+
+public:
+ TDenseHash(const key_type& emptyMarker = key_type{}, size_type initSize = 0)
+ : EmptyMarker(emptyMarker)
+ {
+ MakeEmpty(initSize);
+ }
+
+ TDenseHash(const TDenseHash&) = default;
+ TDenseHash(TDenseHash&&) = default;
+
+ TDenseHash& operator=(const TDenseHash& rhs) {
+ TDenseHash tmp{ rhs };
+ return *this = std::move(tmp);
+ }
+
+ TDenseHash& operator=(TDenseHash&&) = default;
+
+ friend bool operator==(const TDenseHash& lhs, const TDenseHash& rhs) {
+ return lhs.Size() == rhs.Size() &&
+ AllOf(lhs, [&rhs](const auto& v) {
+ auto it = rhs.find(v.first);
+ return it != rhs.end() && *it == v;
+ });
+ }
+
+ void Clear() {
+ for (auto& bucket : Buckets) {
+ if (bucket.first != EmptyMarker) {
+ SetValue(bucket, EmptyMarker, mapped_type{});
+ }
+ }
+ NumFilled = 0;
+ }
+
+ void MakeEmpty(size_type initSize = 0) {
+ if (!initSize) {
+ initSize = 1 << LogInitSize;
+ } else {
+ initSize = FastClp2(initSize);
+ }
+ BucketMask = initSize - 1;
+ NumFilled = 0;
+ TVector<value_type> tmp;
+ for (size_type i = 0; i < initSize; ++i) {
+ tmp.emplace_back(EmptyMarker, mapped_type{});
+ }
+ tmp.swap(Buckets);
+ GrowThreshold = Max<size_type>(1, initSize * MaxLoadFactor / 100) - 1;
+ }
+
+ template <class K>
+ bool Has(const K& key) const {
+ return ProcessKey<bool>(
+ key,
+ [](size_type) { return true; },
+ [](size_type) { return false; });
+ }
+
+ size_type Capacity() const {
+ return Buckets.capacity();
+ }
+
+ bool Empty() const {
+ return Size() == 0;
+ }
+
+ size_type Size() const {
+ return NumFilled;
+ }
+
+ template <size_type maxFillPercents, size_type logInitSize>
+ void Swap(TDenseHash<key_type, mapped_type, hasher, maxFillPercents, logInitSize>& other) {
+ Buckets.swap(other.Buckets);
+ DoSwap(BucketMask, other.BucketMask);
+ DoSwap(NumFilled, other.NumFilled);
+ DoSwap(GrowThreshold, other.GrowThreshold);
+ DoSwap(EmptyMarker, other.EmptyMarker);
+ }
+
+ void Save(IOutputStream* s) const {
+ // TODO(tender-bum): make SaveLoad great again
+ ::SaveMany(s, BucketMask, NumFilled, GrowThreshold);
+ // We need to do so because Buckets may be serialized as a pod-array
+ // that doesn't correspond to the previous behaviour
+ ::SaveSize(s, Buckets.size());
+ for (const auto& b : Buckets) {
+ ::Save(s, b.first);
+ ::Save(s, b.second);
+ }
+ mapped_type defaultValue{};
+ ::SaveMany(s, EmptyMarker, defaultValue);
+ }
+
+ void Load(IInputStream* s) {
+ // TODO(tender-bum): make SaveLoad great again
+ ::LoadMany(s, BucketMask, NumFilled, GrowThreshold);
+ // We need to do so because we can't load const fields
+ struct TPairMimic {
+ key_type First;
+ mapped_type Second;
+ Y_SAVELOAD_DEFINE(First, Second);
+ };
+ TVector<TPairMimic> tmp;
+ ::Load(s, tmp);
+ Buckets.clear();
+ for (auto& v : tmp) {
+ Buckets.emplace_back(std::move(v.First), std::move(v.Second));
+ }
+ ::Load(s, EmptyMarker);
+ mapped_type defaultValue;
+ ::Load(s, defaultValue);
+ }
+
+public:
+ iterator begin() {
+ return iterator(*this);
+ }
+
+ iterator end() {
+ return iterator(this, Buckets.size());
+ }
+
+ const_iterator begin() const {
+ return const_iterator(*this);
+ }
+
+ const_iterator end() const {
+ return const_iterator(this, Buckets.size());
+ }
+
+ template <class K>
+ iterator find(const K& key) {
+ return ProcessKey<iterator>(
+ key,
+ [&](size_type idx) { return iterator(this, idx); },
+ [&](size_type) { return end(); });
+ }
+
+ template <class K>
+ const_iterator find(const K& key) const {
+ return ProcessKey<const_iterator>(
+ key,
+ [&](size_type idx) { return const_iterator(this, idx); },
+ [&](size_type) { return end(); });
+ }
+
+ template <class K>
+ const TValue& at(const K& key) const {
+ return ProcessKey<const TValue&>(
+ key,
+ [&](size_type idx) -> const TValue& { return Buckets[idx].second; },
+ [&](size_type) -> const TValue& { throw std::out_of_range("TDenseHash: missing key"); });
+ }
+
+ template <class K>
+ TValue& at(const K& key) {
+ return ProcessKey<TValue&>(
+ key,
+ [&](size_type idx) -> TValue& { return Buckets[idx].second; },
+ [&](size_type) -> TValue& { throw std::out_of_range("TDenseHash: missing key"); });
+ }
+
+ bool Grow(size_type to = 0, bool force = false) {
+ if (!to) {
+ to = Buckets.size() * 2;
+ } else {
+ to = FastClp2(to);
+ if (to <= Buckets.size() && !force) {
+ return false;
+ }
+ }
+ TVector<value_type> oldBuckets(Reserve(to));
+ for (size_type i = 0; i < to; ++i) {
+ oldBuckets.emplace_back(EmptyMarker, mapped_type{});
+ }
+ oldBuckets.swap(Buckets);
+
+ BucketMask = Buckets.size() - 1;
+ GrowThreshold = Max<size_type>(1, Buckets.size() * (MaxLoadFactor / 100.f)) - 1;
+
+ for (auto& item : oldBuckets) {
+ if (EmptyMarker != item.first) {
+ SetValue(FindProperBucket(item.first), std::move(item));
+ }
+ }
+ return true;
+ }
+
+ // Grow to size with which GrowThreshold will be higher then passed value
+ //
+ // (to) = (desired_num_filled + 2) * (100.f / MaxLoadFactor) + 2 after conversion to size_type
+ // is not less than x := (desired_num_filled + 2) * (100.f / MaxLoadFactor) + 1 and FastClp2(to) is not less that (to)
+ // (to) * (MaxLoadFactor / 100.f) >= x * (MaxLoadFactor / 100.f) = (desired_num_filled + 2) + (MaxLoadFactor / 100.f).
+ // This require calculations with two or more significand decimal places
+ // to have no less than (desired_num_filled + 2) after second conversion to size_type.
+ // In that case after substracting 1 we got GrowThreshold >= desired_num_filled + 1
+ //
+ bool ReserveSpace(size_type desired_num_filled, bool force = false) {
+ size_type to = Max<size_type>(1, (desired_num_filled + 2) * (100.f / MaxLoadFactor) + 2);
+ return Grow(to, force);
+ }
+
+ // We need this overload because we want to optimize insertion when somebody inserts value_type.
+ // So we don't need to extract the key.
+ // This overload also allows brace enclosed initializer to be inserted.
+ std::pair<iterator, bool> insert(const value_type& t) {
+ size_type hs = hasher{}(t.first);
+ auto p = GetBucketInfo(hs & BucketMask, t.first);
+ if (p.second) {
+ ++NumFilled;
+ if (NumFilled >= GrowThreshold) {
+ Grow();
+ p.first = FindProperBucket(hs & BucketMask, t.first);
+ }
+ SetValue(p.first, t);
+ return { iterator{ this, p.first }, true };
+ }
+ return { iterator{ this, p.first }, false };
+ }
+
+ // We need this overload because we want to optimize insertion when somebody inserts value_type.
+ // So we don't need to extract the key.
+ // This overload also allows brace enclosed initializer to be inserted.
+ std::pair<iterator, bool> insert(value_type&& t) {
+ size_type hs = hasher{}(t.first);
+ auto p = GetBucketInfo(hs & BucketMask, t.first);
+ if (p.second) {
+ ++NumFilled;
+ if (NumFilled >= GrowThreshold) {
+ Grow();
+ p.first = FindProperBucket(hs & BucketMask, t.first);
+ }
+ SetValue(p.first, std::move(t));
+ return { iterator{ this, p.first }, true };
+ }
+ return { iterator{ this, p.first }, false };
+ }
+
+ // Standart integration. This overload is equivalent to emplace(std::forward<P>(p)).
+ template <class P>
+ std::enable_if_t<!std::is_same<std::decay_t<P>, value_type>::value,
+ std::pair<iterator, bool>> insert(P&& p) {
+ return emplace(std::forward<P>(p));
+ }
+
+ // Not really emplace because we need to know the key anyway. So we need to construct value_type.
+ template <class... Args>
+ std::pair<iterator, bool> emplace(Args&&... args) {
+ return insert(value_type{ std::forward<Args>(args)... });
+ }
+
+ template <class K>
+ mapped_type& operator[](K&& key) {
+ size_type hs = hasher{}(key);
+ auto p = GetBucketInfo(hs & BucketMask, key);
+ if (p.second) {
+ ++NumFilled;
+ if (NumFilled >= GrowThreshold) {
+ Grow();
+ p.first = FindProperBucket(hs & BucketMask, key);
+ }
+ SetValue(p.first, std::forward<K>(key), mapped_type{});
+ }
+ return Buckets[p.first].second;
+ }
+
+private:
+ key_type EmptyMarker;
+ size_type NumFilled;
+ size_type BucketMask;
+ size_type GrowThreshold;
+ TVector<value_type> Buckets;
+
+private:
+ // Tricky way to set value of type with const fields
+ template <class... Args>
+ void SetValue(value_type& bucket, Args&&... args) {
+ bucket.~value_type();
+ new (&bucket) value_type(std::forward<Args>(args)...);
+ }
+
+ template <class... Args>
+ void SetValue(size_type idx, Args&&... args) {
+ SetValue(Buckets[idx], std::forward<Args>(args)...);
+ }
+
+ template <class K>
+ size_type FindProperBucket(size_type idx, const K& key) const {
+ return ProcessIndex<size_type>(
+ idx,
+ key,
+ [](size_type idx) { return idx; },
+ [](size_type idx) { return idx; });
+ }
+
+ template <class K>
+ size_type FindProperBucket(const K& key) const {
+ return FindProperBucket(hasher{}(key) & BucketMask, key);
+ }
+
+ // { idx, is_empty }
+ template <class K>
+ std::pair<size_type, bool> GetBucketInfo(size_type idx, const K& key) const {
+ return ProcessIndex<std::pair<size_type, bool>>(
+ idx,
+ key,
+ [](size_type idx) { return std::make_pair(idx, false); },
+ [](size_type idx) { return std::make_pair(idx, true); });
+ }
+
+ template <class R, class K, class OnFound, class OnEmpty>
+ R ProcessIndex(size_type idx, const K& key, OnFound f0, OnEmpty f1) const {
+ for (size_type numProbes = 1; EmptyMarker != Buckets[idx].first; ++numProbes) {
+ if (Buckets[idx].first == key) {
+ return f0(idx);
+ }
+ idx = (idx + numProbes) & BucketMask;
+ }
+ return f1(idx);
+ }
+
+ template <class R, class K, class OnFound, class OnEmpty>
+ R ProcessKey(const K& key, OnFound&& f0, OnEmpty&& f1) const {
+ return ProcessIndex<R>(
+ hasher{}(key) & BucketMask, key, std::forward<OnFound>(f0), std::forward<OnEmpty>(f1));
+ }
+};
+
+template <class TKey,
+ class TKeyHash,
+ size_t MaxLoadFactor,
+ size_t LogInitSize>
+class TDenseHashSet {
+public:
+ TDenseHashSet(const TKey& emptyMarker = TKey(), size_t initSize = 0)
+ : EmptyMarker(emptyMarker)
+ {
+ MakeEmpty(initSize);
+ }
+
+ void Clear() {
+ size_t currentSize = Buckets.size();
+ Buckets.clear();
+ Buckets.resize(currentSize, EmptyMarker);
+ NumFilled = 0;
+ }
+
+ void MakeEmpty(size_t initSize = 0) {
+ if (!initSize) {
+ initSize = 1 << LogInitSize;
+ } else {
+ initSize = FastClp2(initSize);
+ }
+ BucketMask = initSize - 1;
+ NumFilled = 0;
+ TVector<TKey>(initSize, EmptyMarker).swap(Buckets);
+ GrowThreshold = Max<size_t>(1, initSize * MaxLoadFactor / 100) - 1;
+ }
+
+ template <class K>
+ bool Has(const K& key) const {
+ return Buckets[FindBucket(key)] != EmptyMarker;
+ }
+
+ // gets existing item or inserts new
+ template <class K>
+ bool Insert(const K& key) {
+ bool inserted = InsertNoGrow(key);
+ if (inserted) {
+ MaybeGrow();
+ }
+ return inserted;
+ }
+
+ size_t Capacity() const {
+ return Buckets.capacity();
+ }
+
+ bool Empty() const {
+ return Size() == 0;
+ }
+
+ size_t Size() const {
+ return NumFilled;
+ }
+
+ template <size_t maxFillPercents, size_t logInitSize>
+ void Swap(TDenseHashSet<TKey, TKeyHash, maxFillPercents, logInitSize>& other) {
+ Buckets.swap(other.Buckets);
+ DoSwap(BucketMask, other.BucketMask);
+ DoSwap(NumFilled, other.NumFilled);
+ DoSwap(GrowThreshold, other.GrowThreshold);
+ DoSwap(EmptyMarker, other.EmptyMarker);
+ }
+
+ Y_SAVELOAD_DEFINE(BucketMask, NumFilled, GrowThreshold, Buckets, EmptyMarker);
+
+private:
+ template <class THash>
+ class TIteratorBase {
+ friend class TDenseHashSet;
+
+ THash* Hash;
+ size_t Idx;
+
+ // used only to implement end()
+ TIteratorBase(THash* hash, size_t initIdx)
+ : Hash(hash)
+ , Idx(initIdx)
+ {
+ }
+
+ public:
+ TIteratorBase(THash& hash)
+ : Hash(&hash)
+ , Idx(0)
+ {
+ if (Hash->Buckets[Idx] == Hash->EmptyMarker) {
+ Next();
+ }
+ }
+
+ void Next() {
+ ++Idx;
+ while (Idx < Hash->Buckets.size() && Hash->Buckets[Idx] == Hash->EmptyMarker) {
+ ++Idx;
+ }
+ }
+
+ TIteratorBase& operator++() {
+ Next();
+ return *this;
+ }
+
+ bool Initialized() const {
+ return Hash != nullptr;
+ }
+
+ bool Ok() const {
+ return Idx < Hash->Buckets.size();
+ }
+
+ const TKey& operator*() const {
+ return Key();
+ }
+
+ const TKey& Key() const {
+ return Hash->Buckets[Idx];
+ }
+
+ bool operator==(const TIteratorBase& rhs) const {
+ Y_ASSERT(Hash == rhs.Hash);
+ return Idx == rhs.Idx;
+ }
+
+ bool operator!=(const TIteratorBase& rhs) const {
+ return !(*this == rhs);
+ }
+ };
+
+public:
+ typedef TIteratorBase<const TDenseHashSet> TConstIterator;
+
+ TConstIterator begin() const {
+ return TConstIterator(*this);
+ }
+
+ TConstIterator end() const {
+ return TConstIterator(this, Buckets.size());
+ }
+
+private:
+ size_t BucketMask;
+ size_t NumFilled;
+ size_t GrowThreshold;
+ TVector<TKey> Buckets;
+
+ TKey EmptyMarker;
+
+ template <class K>
+ bool InsertNoGrow(const K& key) {
+ size_t idx = FindBucket(key);
+ if (Buckets[idx] == EmptyMarker) {
+ ++NumFilled;
+ Buckets[idx] = key;
+ return true;
+ }
+ return false;
+ }
+
+ bool MaybeGrow() {
+ if (NumFilled < GrowThreshold) {
+ return false;
+ }
+
+ TVector<TKey> oldBuckets(Buckets.size() * 2, EmptyMarker);
+ oldBuckets.swap(Buckets);
+
+ BucketMask = Buckets.size() - 1;
+ GrowThreshold = Max<size_t>(1, Buckets.size() * (MaxLoadFactor / 100.f)) - 1;
+
+ NumFilled = 0;
+ for (const TKey& key : oldBuckets) {
+ if (key != EmptyMarker) {
+ InsertNoGrow(key);
+ }
+ }
+
+ return true;
+ }
+
+ template <class K>
+ size_t FindBucket(const K& key) const {
+ size_t idx = TKeyHash()(key) & BucketMask;
+ for (size_t numProbes = 1; Buckets[idx] != EmptyMarker; ++numProbes) {
+ if (Buckets[idx] == key) {
+ return idx;
+ }
+ idx = (idx + numProbes) & BucketMask;
+ }
+ return idx;
+ }
+};
diff --git a/library/cpp/containers/dense_hash/fwd.h b/library/cpp/containers/dense_hash/fwd.h
new file mode 100644
index 0000000000..3557c3a2de
--- /dev/null
+++ b/library/cpp/containers/dense_hash/fwd.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include <util/generic/fwd.h>
+
+template <class TKey,
+ class TValue,
+ class TKeyHash = THash<TKey>,
+ size_t MaxLoadFactor = 50, // in percents
+ size_t LogInitSize = 8>
+class TDenseHash;
+
+template <class TKey,
+ class TKeyHash = THash<TKey>,
+ size_t MaxLoadFactor = 50, // in percents
+ size_t LogInitSize = 8>
+class TDenseHashSet;
diff --git a/library/cpp/containers/limited_heap/limited_heap.h b/library/cpp/containers/limited_heap/limited_heap.h
new file mode 100644
index 0000000000..0110d6978b
--- /dev/null
+++ b/library/cpp/containers/limited_heap/limited_heap.h
@@ -0,0 +1,62 @@
+#pragma once
+
+#include <util/generic/queue.h>
+#include <util/generic/algorithm.h>
+
+template <class T, class TComparator = TGreater<T>>
+class TLimitedHeap {
+private:
+ size_t MaxSize;
+ TComparator Comparer;
+ TPriorityQueue<T, TVector<T>, TComparator> Internal;
+
+public:
+ TLimitedHeap(size_t maxSize, const TComparator& comp = TComparator())
+ : MaxSize(maxSize)
+ , Comparer(comp)
+ , Internal(Comparer)
+ {
+ Y_ASSERT(maxSize >= 1);
+ }
+
+ const T& GetMin() const {
+ return Internal.top();
+ }
+
+ void PopMin() {
+ Internal.pop();
+ }
+
+ bool Insert(const T& value) {
+ if (GetSize() == MaxSize) {
+ if (Comparer(GetMin(), value)) {
+ return false;
+ } else {
+ PopMin();
+ }
+ }
+
+ Internal.push(value);
+
+ return true;
+ }
+
+ bool IsEmpty() const {
+ return Internal.empty();
+ }
+
+ size_t GetSize() const {
+ return Internal.size();
+ }
+
+ size_t GetMaxSize() const {
+ return MaxSize;
+ }
+
+ void SetMaxSize(size_t newMaxSize) {
+ while (GetSize() > newMaxSize) {
+ PopMin();
+ }
+ MaxSize = newMaxSize;
+ }
+};