aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/yt/containers
diff options
context:
space:
mode:
authorqrort <qrort@yandex-team.com>2022-11-30 23:47:12 +0300
committerqrort <qrort@yandex-team.com>2022-11-30 23:47:12 +0300
commit22f8ae0e3f5d68b92aecccdf96c1d841a0334311 (patch)
treebffa27765faf54126ad44bcafa89fadecb7a73d7 /library/cpp/yt/containers
parent332b99e2173f0425444abb759eebcb2fafaa9209 (diff)
downloadydb-22f8ae0e3f5d68b92aecccdf96c1d841a0334311.tar.gz
validate canons without yatest_common
Diffstat (limited to 'library/cpp/yt/containers')
-rw-r--r--library/cpp/yt/containers/sharded_set-inl.h217
-rw-r--r--library/cpp/yt/containers/sharded_set.h69
-rw-r--r--library/cpp/yt/containers/unittests/sharded_set_ut.cpp121
3 files changed, 407 insertions, 0 deletions
diff --git a/library/cpp/yt/containers/sharded_set-inl.h b/library/cpp/yt/containers/sharded_set-inl.h
new file mode 100644
index 0000000000..67d5be58c6
--- /dev/null
+++ b/library/cpp/yt/containers/sharded_set-inl.h
@@ -0,0 +1,217 @@
+#ifndef SHARDED_SET_INL_H_
+#error "Direct inclusion of this file is not allowed, include sharded_set.h"
+// For the sake of sane code completion.
+#include "sharded_set.h"
+#endif
+
+#include <library/cpp/yt/assert/assert.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class T, int N, class F, class S>
+class TShardedSet<T, N, F, S>::const_iterator
+{
+private:
+ friend class TShardedSet<T, N, F, S>;
+
+ using TOwner = TShardedSet<T, N, F, S>;
+ using TShardIterator = typename S::const_iterator;
+
+ const TOwner* const Owner_;
+
+ int ShardIndex_;
+ TShardIterator ShardIterator_;
+
+ const_iterator(
+ const TOwner* owner,
+ int shardIndex,
+ TShardIterator shardIterator)
+ : Owner_(owner)
+ , ShardIndex_(shardIndex)
+ , ShardIterator_(shardIterator)
+ { }
+
+ bool IsValid() const
+ {
+ return ShardIterator_ != Owner_->Shards_[ShardIndex_].end();
+ }
+
+ void FastForward()
+ {
+ while (ShardIndex_ != N - 1 && !IsValid()) {
+ ++ShardIndex_;
+ ShardIterator_ = Owner_->Shards_[ShardIndex_].begin();
+ }
+ }
+
+public:
+ using difference_type = typename std::iterator_traits<TShardIterator>::difference_type;
+ using value_type = typename std::iterator_traits<TShardIterator>::value_type;
+ using pointer = typename std::iterator_traits<TShardIterator>::pointer;
+ using reference = typename std::iterator_traits<TShardIterator>::reference;
+ using iterator_category = std::forward_iterator_tag;
+
+ const_iterator& operator++()
+ {
+ ++ShardIterator_;
+ FastForward();
+
+ return *this;
+ }
+
+ const_iterator operator++(int)
+ {
+ auto result = *this;
+
+ ++ShardIterator_;
+ FastForward();
+
+ return result;
+ }
+
+ bool operator==(const const_iterator& rhs) const
+ {
+ return
+ ShardIndex_ == rhs.ShardIndex_ &&
+ ShardIterator_ == rhs.ShardIterator_;
+ }
+
+ bool operator!=(const const_iterator& rhs) const
+ {
+ return !(*this == rhs);
+ }
+
+ const T& operator*() const
+ {
+ return *ShardIterator_;
+ }
+
+ const T* operator->() const
+ {
+ return &operator*();
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class T, int N, class F, class S>
+TShardedSet<T, N, F, S>::TShardedSet(F elementToShard)
+ : ElementToShard_(elementToShard)
+{ }
+
+template <class T, int N, class F, class S>
+bool TShardedSet<T, N, F, S>::empty() const
+{
+ return size() == 0;
+}
+
+template <class T, int N, class F, class S>
+typename TShardedSet<T, N, F, S>::size_type TShardedSet<T, N, F, S>::size() const
+{
+ size_type result = 0;
+ for (const auto& shard : Shards_) {
+ result += shard.size();
+ }
+
+ return result;
+}
+
+template <class T, int N, class F, class S>
+const T& TShardedSet<T, N, F, S>::front() const
+{
+ return *begin();
+}
+
+template <class T, int N, class F, class S>
+typename TShardedSet<T, N, F, S>::size_type TShardedSet<T, N, F, S>::count(const T& value) const
+{
+ return GetShard(value).count(value);
+}
+
+template <class T, int N, class F, class S>
+bool TShardedSet<T, N, F, S>::contains(const T& value) const
+{
+ return GetShard(value).contains(value);
+}
+
+template <class T, int N, class F, class S>
+std::pair<typename TShardedSet<T, N, F, S>::const_iterator, bool> TShardedSet<T, N, F, S>::insert(const T& value)
+{
+ auto shardIndex = ElementToShard_(value);
+ auto& shard = Shards_[shardIndex];
+ auto [shardIterator, inserted] = shard.insert(value);
+
+ const_iterator iterator(this, shardIndex, shardIterator);
+ return {iterator, inserted};
+}
+
+template <class T, int N, class F, class S>
+bool TShardedSet<T, N, F, S>::erase(const T& value)
+{
+ return GetShard(value).erase(value);
+}
+
+template <class T, int N, class F, class S>
+void TShardedSet<T, N, F, S>::clear()
+{
+ for (auto& shard : Shards_) {
+ shard.clear();
+ }
+}
+
+template <class T, int N, class F, class S>
+typename TShardedSet<T, N, F, S>::const_iterator TShardedSet<T, N, F, S>::begin() const
+{
+ const_iterator iterator(this, /*shardIndex*/ 0, /*shardIterator*/ Shards_[0].begin());
+ iterator.FastForward();
+
+ return iterator;
+}
+
+template <class T, int N, class F, class S>
+typename TShardedSet<T, N, F, S>::const_iterator TShardedSet<T, N, F, S>::cbegin() const
+{
+ return begin();
+}
+
+template <class T, int N, class F, class S>
+typename TShardedSet<T, N, F, S>::const_iterator TShardedSet<T, N, F, S>::end() const
+{
+ return const_iterator(this, /*shardIndex*/ N - 1, /*shardIterator*/ Shards_[N - 1].end());
+}
+
+template <class T, int N, class F, class S>
+typename TShardedSet<T, N, F, S>::const_iterator TShardedSet<T, N, F, S>::cend() const
+{
+ return end();
+}
+
+template <class T, int N, class F, class S>
+const S& TShardedSet<T, N, F, S>::Shard(int shardIndex) const
+{
+ return Shards_[shardIndex];
+}
+
+template <class T, int N, class F, class S>
+S& TShardedSet<T, N, F, S>::MutableShard(int shardIndex)
+{
+ return Shards_[shardIndex];
+}
+
+template <class T, int N, class F, class S>
+S& TShardedSet<T, N, F, S>::GetShard(const T& value)
+{
+ return Shards_[ElementToShard_(value)];
+}
+
+template <class T, int N, class F, class S>
+const S& TShardedSet<T, N, F, S>::GetShard(const T& value) const
+{
+ return Shards_[ElementToShard_(value)];
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/library/cpp/yt/containers/sharded_set.h b/library/cpp/yt/containers/sharded_set.h
new file mode 100644
index 0000000000..fa24893aa4
--- /dev/null
+++ b/library/cpp/yt/containers/sharded_set.h
@@ -0,0 +1,69 @@
+#pragma once
+
+#include <util/generic/hash_set.h>
+
+#include <array>
+#include <cstddef>
+#include <utility>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+//! A set that stores elements divided into fixed amount of shards.
+//! Provides access to whole set and particular shards.
+//! The interface is pretty minimalistic, feel free to extend it when needed.
+template <class T, int N, class F, class S = THashSet<T>>
+class TShardedSet
+{
+public:
+ using size_type = size_t;
+ using difference_type = ptrdiff_t;
+
+ using value_type = T;
+
+ class const_iterator;
+
+ explicit TShardedSet(F elementToShard = F());
+
+ [[nodiscard]] bool empty() const;
+
+ size_type size() const;
+
+ const T& front() const;
+
+ size_type count(const T& value) const;
+
+ bool contains(const T& value) const;
+
+ std::pair<const_iterator, bool> insert(const T& value);
+
+ bool erase(const T& value);
+
+ void clear();
+
+ const_iterator begin() const;
+ const_iterator cbegin() const;
+
+ const_iterator end() const;
+ const_iterator cend() const;
+
+ const S& Shard(int shardIndex) const;
+ S& MutableShard(int shardIndex);
+
+private:
+ std::array<S, N> Shards_;
+
+ const F ElementToShard_;
+
+ S& GetShard(const T& value);
+ const S& GetShard(const T& value) const;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
+
+#define SHARDED_SET_INL_H_
+#include "sharded_set-inl.h"
+#undef SHARDED_SET_INL_H_
diff --git a/library/cpp/yt/containers/unittests/sharded_set_ut.cpp b/library/cpp/yt/containers/unittests/sharded_set_ut.cpp
new file mode 100644
index 0000000000..2c4f8c5935
--- /dev/null
+++ b/library/cpp/yt/containers/unittests/sharded_set_ut.cpp
@@ -0,0 +1,121 @@
+#include <library/cpp/yt/containers/sharded_set.h>
+
+#include <library/cpp/testing/gtest/gtest.h>
+
+#include <random>
+
+namespace NYT {
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TIntToShard
+{
+ int operator()(int value) const
+ {
+ return value % 16;
+ }
+};
+
+using TSet = TShardedSet<int, 16, TIntToShard>;
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(CompactSetTest, Insert)
+{
+ TSet set;
+
+ for (int i = 0; i < 4; i++) {
+ set.insert(i);
+ }
+
+ for (int i = 0; i < 4; i++) {
+ set.insert(i);
+ }
+
+ EXPECT_EQ(4u, set.size());
+
+ for (int i = 0; i < 4; i++)
+ EXPECT_EQ(1u, set.count(i));
+
+ EXPECT_EQ(0u, set.count(4));
+}
+
+TEST(CompactSetTest, Erase)
+{
+ TSet set;
+
+ for (int i = 0; i < 8; i++) {
+ set.insert(i);
+ }
+
+ EXPECT_EQ(8u, set.size());
+
+ // Remove elements one by one and check if all other elements are still there.
+ for (int i = 0; i < 8; i++) {
+ EXPECT_EQ(1u, set.count(i));
+ EXPECT_TRUE(set.erase(i));
+ EXPECT_EQ(0u, set.count(i));
+ EXPECT_EQ(8u - i - 1, set.size());
+ for (int j = i + 1; j < 8; j++) {
+ EXPECT_EQ(1u, set.count(j));
+ }
+ }
+
+ EXPECT_EQ(0u, set.count(8));
+}
+
+TEST(CompactSetTest, StressTest)
+{
+ TSet set;
+
+ constexpr int Iterations = 1'000'000;
+ constexpr int Values = 128;
+
+ THashSet<int> values;
+
+ auto checkEverything = [&] {
+ EXPECT_EQ(values.size(), set.size());
+ EXPECT_EQ(values.empty(), set.empty());
+ EXPECT_EQ(values, THashSet<int>(set.begin(), set.end()));
+
+ std::array<THashSet<int>, 16> shards;
+ for (int value : values) {
+ shards[value % 16].insert(value);
+ }
+ for (int shardIndex = 0; shardIndex < 16; ++shardIndex) {
+ EXPECT_EQ(shards[shardIndex], set.Shard(shardIndex));
+ }
+
+ for (int value = 0; value < Values; ++value) {
+ EXPECT_EQ(values.contains(value), set.contains(value));
+ EXPECT_EQ(values.count(value), set.count(value));
+ }
+ };
+
+ std::mt19937_64 rng(42);
+
+ for (int iteration = 0; iteration < Iterations; ++iteration) {
+ if (rng() % 100 == 0) {
+ set.clear();
+ values.clear();
+ checkEverything();
+ }
+
+ int value = rng() % Values;
+ if (rng() % 2 == 0) {
+ set.insert(value);
+ values.insert(value);
+ } else {
+ set.erase(value);
+ values.erase(value);
+ }
+
+ checkEverything();
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT