diff options
author | max42 <max42@yandex-team.com> | 2023-06-30 03:37:03 +0300 |
---|---|---|
committer | max42 <max42@yandex-team.com> | 2023-06-30 03:37:03 +0300 |
commit | fac2bd72b4b31ec3238292caf8fb2a8aaa6d6c4a (patch) | |
tree | b8cbc1deb00309c7f1a7ab6df520a76cf0b5c6d7 /library/cpp/yt/containers | |
parent | 7bf166b1a7ed0af927f230022b245af618e998c1 (diff) | |
download | ydb-fac2bd72b4b31ec3238292caf8fb2a8aaa6d6c4a.tar.gz |
YT-19324: move YT provider to ydb/library/yql
This commit is formed by the following script: https://paste.yandex-team.ru/6f92e4b8-efc5-4d34-948b-15ee2accd7e7/text.
This commit has zero effect on all projects that depend on YQL.
The summary of changes:
- `yql/providers/yt -> ydb/library/yql/providers/yt `- the whole implementation of YT provider is moved into YDB code base for further export as a part of YT YQL plugin shared library;
- `yql/providers/stat/{expr_nodes,uploader} -> ydb/library/yql/providers/stat/{expr_nodes,uploader}` - a small interface without implementation and the description of stat expr nodes;
- `yql/core/extract_predicate/ut -> ydb/library/yql/core/extract_predicate/ut`;
- `yql/core/{ut,ut_common} -> ydb/library/yql/core/{ut,ut_common}`;
- `yql/core` is gone;
- `yql/library/url_preprocessing -> ydb/library/yql/core/url_preprocessing`.
**NB**: all new targets inside `ydb/` are under `IF (NOT CMAKE_EXPORT)` clause which disables them from open-source cmake generation and ya make build. They will be enabled in the subsequent commits.
Diffstat (limited to 'library/cpp/yt/containers')
-rw-r--r-- | library/cpp/yt/containers/sharded_set-inl.h | 217 | ||||
-rw-r--r-- | library/cpp/yt/containers/sharded_set.h | 69 | ||||
-rw-r--r-- | library/cpp/yt/containers/unittests/sharded_set_ut.cpp | 121 | ||||
-rw-r--r-- | library/cpp/yt/containers/unittests/ya.make | 15 |
4 files changed, 422 insertions, 0 deletions
diff --git a/library/cpp/yt/containers/sharded_set-inl.h b/library/cpp/yt/containers/sharded_set-inl.h new file mode 100644 index 0000000000..67d5be58c6 --- /dev/null +++ b/library/cpp/yt/containers/sharded_set-inl.h @@ -0,0 +1,217 @@ +#ifndef SHARDED_SET_INL_H_ +#error "Direct inclusion of this file is not allowed, include sharded_set.h" +// For the sake of sane code completion. +#include "sharded_set.h" +#endif + +#include <library/cpp/yt/assert/assert.h> + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +template <class T, int N, class F, class S> +class TShardedSet<T, N, F, S>::const_iterator +{ +private: + friend class TShardedSet<T, N, F, S>; + + using TOwner = TShardedSet<T, N, F, S>; + using TShardIterator = typename S::const_iterator; + + const TOwner* const Owner_; + + int ShardIndex_; + TShardIterator ShardIterator_; + + const_iterator( + const TOwner* owner, + int shardIndex, + TShardIterator shardIterator) + : Owner_(owner) + , ShardIndex_(shardIndex) + , ShardIterator_(shardIterator) + { } + + bool IsValid() const + { + return ShardIterator_ != Owner_->Shards_[ShardIndex_].end(); + } + + void FastForward() + { + while (ShardIndex_ != N - 1 && !IsValid()) { + ++ShardIndex_; + ShardIterator_ = Owner_->Shards_[ShardIndex_].begin(); + } + } + +public: + using difference_type = typename std::iterator_traits<TShardIterator>::difference_type; + using value_type = typename std::iterator_traits<TShardIterator>::value_type; + using pointer = typename std::iterator_traits<TShardIterator>::pointer; + using reference = typename std::iterator_traits<TShardIterator>::reference; + using iterator_category = std::forward_iterator_tag; + + const_iterator& operator++() + { + ++ShardIterator_; + FastForward(); + + return *this; + } + + const_iterator operator++(int) + { + auto result = *this; + + ++ShardIterator_; + FastForward(); + + return result; + } + + bool operator==(const const_iterator& rhs) const + { + return + ShardIndex_ == rhs.ShardIndex_ && + ShardIterator_ == rhs.ShardIterator_; + } + + bool operator!=(const const_iterator& rhs) const + { + return !(*this == rhs); + } + + const T& operator*() const + { + return *ShardIterator_; + } + + const T* operator->() const + { + return &operator*(); + } +}; + +//////////////////////////////////////////////////////////////////////////////// + +template <class T, int N, class F, class S> +TShardedSet<T, N, F, S>::TShardedSet(F elementToShard) + : ElementToShard_(elementToShard) +{ } + +template <class T, int N, class F, class S> +bool TShardedSet<T, N, F, S>::empty() const +{ + return size() == 0; +} + +template <class T, int N, class F, class S> +typename TShardedSet<T, N, F, S>::size_type TShardedSet<T, N, F, S>::size() const +{ + size_type result = 0; + for (const auto& shard : Shards_) { + result += shard.size(); + } + + return result; +} + +template <class T, int N, class F, class S> +const T& TShardedSet<T, N, F, S>::front() const +{ + return *begin(); +} + +template <class T, int N, class F, class S> +typename TShardedSet<T, N, F, S>::size_type TShardedSet<T, N, F, S>::count(const T& value) const +{ + return GetShard(value).count(value); +} + +template <class T, int N, class F, class S> +bool TShardedSet<T, N, F, S>::contains(const T& value) const +{ + return GetShard(value).contains(value); +} + +template <class T, int N, class F, class S> +std::pair<typename TShardedSet<T, N, F, S>::const_iterator, bool> TShardedSet<T, N, F, S>::insert(const T& value) +{ + auto shardIndex = ElementToShard_(value); + auto& shard = Shards_[shardIndex]; + auto [shardIterator, inserted] = shard.insert(value); + + const_iterator iterator(this, shardIndex, shardIterator); + return {iterator, inserted}; +} + +template <class T, int N, class F, class S> +bool TShardedSet<T, N, F, S>::erase(const T& value) +{ + return GetShard(value).erase(value); +} + +template <class T, int N, class F, class S> +void TShardedSet<T, N, F, S>::clear() +{ + for (auto& shard : Shards_) { + shard.clear(); + } +} + +template <class T, int N, class F, class S> +typename TShardedSet<T, N, F, S>::const_iterator TShardedSet<T, N, F, S>::begin() const +{ + const_iterator iterator(this, /*shardIndex*/ 0, /*shardIterator*/ Shards_[0].begin()); + iterator.FastForward(); + + return iterator; +} + +template <class T, int N, class F, class S> +typename TShardedSet<T, N, F, S>::const_iterator TShardedSet<T, N, F, S>::cbegin() const +{ + return begin(); +} + +template <class T, int N, class F, class S> +typename TShardedSet<T, N, F, S>::const_iterator TShardedSet<T, N, F, S>::end() const +{ + return const_iterator(this, /*shardIndex*/ N - 1, /*shardIterator*/ Shards_[N - 1].end()); +} + +template <class T, int N, class F, class S> +typename TShardedSet<T, N, F, S>::const_iterator TShardedSet<T, N, F, S>::cend() const +{ + return end(); +} + +template <class T, int N, class F, class S> +const S& TShardedSet<T, N, F, S>::Shard(int shardIndex) const +{ + return Shards_[shardIndex]; +} + +template <class T, int N, class F, class S> +S& TShardedSet<T, N, F, S>::MutableShard(int shardIndex) +{ + return Shards_[shardIndex]; +} + +template <class T, int N, class F, class S> +S& TShardedSet<T, N, F, S>::GetShard(const T& value) +{ + return Shards_[ElementToShard_(value)]; +} + +template <class T, int N, class F, class S> +const S& TShardedSet<T, N, F, S>::GetShard(const T& value) const +{ + return Shards_[ElementToShard_(value)]; +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yt/containers/sharded_set.h b/library/cpp/yt/containers/sharded_set.h new file mode 100644 index 0000000000..fa24893aa4 --- /dev/null +++ b/library/cpp/yt/containers/sharded_set.h @@ -0,0 +1,69 @@ +#pragma once + +#include <util/generic/hash_set.h> + +#include <array> +#include <cstddef> +#include <utility> + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +//! A set that stores elements divided into fixed amount of shards. +//! Provides access to whole set and particular shards. +//! The interface is pretty minimalistic, feel free to extend it when needed. +template <class T, int N, class F, class S = THashSet<T>> +class TShardedSet +{ +public: + using size_type = size_t; + using difference_type = ptrdiff_t; + + using value_type = T; + + class const_iterator; + + explicit TShardedSet(F elementToShard = F()); + + [[nodiscard]] bool empty() const; + + size_type size() const; + + const T& front() const; + + size_type count(const T& value) const; + + bool contains(const T& value) const; + + std::pair<const_iterator, bool> insert(const T& value); + + bool erase(const T& value); + + void clear(); + + const_iterator begin() const; + const_iterator cbegin() const; + + const_iterator end() const; + const_iterator cend() const; + + const S& Shard(int shardIndex) const; + S& MutableShard(int shardIndex); + +private: + std::array<S, N> Shards_; + + const F ElementToShard_; + + S& GetShard(const T& value); + const S& GetShard(const T& value) const; +}; + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT + +#define SHARDED_SET_INL_H_ +#include "sharded_set-inl.h" +#undef SHARDED_SET_INL_H_ diff --git a/library/cpp/yt/containers/unittests/sharded_set_ut.cpp b/library/cpp/yt/containers/unittests/sharded_set_ut.cpp new file mode 100644 index 0000000000..2c4f8c5935 --- /dev/null +++ b/library/cpp/yt/containers/unittests/sharded_set_ut.cpp @@ -0,0 +1,121 @@ +#include <library/cpp/yt/containers/sharded_set.h> + +#include <library/cpp/testing/gtest/gtest.h> + +#include <random> + +namespace NYT { +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +struct TIntToShard +{ + int operator()(int value) const + { + return value % 16; + } +}; + +using TSet = TShardedSet<int, 16, TIntToShard>; + +//////////////////////////////////////////////////////////////////////////////// + +TEST(CompactSetTest, Insert) +{ + TSet set; + + for (int i = 0; i < 4; i++) { + set.insert(i); + } + + for (int i = 0; i < 4; i++) { + set.insert(i); + } + + EXPECT_EQ(4u, set.size()); + + for (int i = 0; i < 4; i++) + EXPECT_EQ(1u, set.count(i)); + + EXPECT_EQ(0u, set.count(4)); +} + +TEST(CompactSetTest, Erase) +{ + TSet set; + + for (int i = 0; i < 8; i++) { + set.insert(i); + } + + EXPECT_EQ(8u, set.size()); + + // Remove elements one by one and check if all other elements are still there. + for (int i = 0; i < 8; i++) { + EXPECT_EQ(1u, set.count(i)); + EXPECT_TRUE(set.erase(i)); + EXPECT_EQ(0u, set.count(i)); + EXPECT_EQ(8u - i - 1, set.size()); + for (int j = i + 1; j < 8; j++) { + EXPECT_EQ(1u, set.count(j)); + } + } + + EXPECT_EQ(0u, set.count(8)); +} + +TEST(CompactSetTest, StressTest) +{ + TSet set; + + constexpr int Iterations = 1'000'000; + constexpr int Values = 128; + + THashSet<int> values; + + auto checkEverything = [&] { + EXPECT_EQ(values.size(), set.size()); + EXPECT_EQ(values.empty(), set.empty()); + EXPECT_EQ(values, THashSet<int>(set.begin(), set.end())); + + std::array<THashSet<int>, 16> shards; + for (int value : values) { + shards[value % 16].insert(value); + } + for (int shardIndex = 0; shardIndex < 16; ++shardIndex) { + EXPECT_EQ(shards[shardIndex], set.Shard(shardIndex)); + } + + for (int value = 0; value < Values; ++value) { + EXPECT_EQ(values.contains(value), set.contains(value)); + EXPECT_EQ(values.count(value), set.count(value)); + } + }; + + std::mt19937_64 rng(42); + + for (int iteration = 0; iteration < Iterations; ++iteration) { + if (rng() % 100 == 0) { + set.clear(); + values.clear(); + checkEverything(); + } + + int value = rng() % Values; + if (rng() % 2 == 0) { + set.insert(value); + values.insert(value); + } else { + set.erase(value); + values.erase(value); + } + + checkEverything(); + } +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace +} // namespace NYT diff --git a/library/cpp/yt/containers/unittests/ya.make b/library/cpp/yt/containers/unittests/ya.make new file mode 100644 index 0000000000..3e7cfd4311 --- /dev/null +++ b/library/cpp/yt/containers/unittests/ya.make @@ -0,0 +1,15 @@ +GTEST(unittester-containers) + +INCLUDE(${ARCADIA_ROOT}/library/cpp/yt/ya_cpp.make.inc) + +SRCS( + sharded_set_ut.cpp +) + +PEERDIR( + library/cpp/yt/containers + + library/cpp/testing/gtest +) + +END() |