diff options
author | Ruslan Kovalev <ruslan.a.kovalev@gmail.com> | 2022-02-10 16:46:45 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:45 +0300 |
commit | 9123176b341b6f2658cff5132482b8237c1416c8 (patch) | |
tree | 49e222ea1c5804306084bb3ae065bb702625360f /library/cpp/codecs/sample.h | |
parent | 59e19371de37995fcb36beb16cd6ec030af960bc (diff) | |
download | ydb-9123176b341b6f2658cff5132482b8237c1416c8.tar.gz |
Restoring authorship annotation for Ruslan Kovalev <ruslan.a.kovalev@gmail.com>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/codecs/sample.h')
-rw-r--r-- | library/cpp/codecs/sample.h | 156 |
1 files changed, 78 insertions, 78 deletions
diff --git a/library/cpp/codecs/sample.h b/library/cpp/codecs/sample.h index bce37e6a2c..15f03afcc5 100644 --- a/library/cpp/codecs/sample.h +++ b/library/cpp/codecs/sample.h @@ -1,89 +1,89 @@ -#pragma once - +#pragma once + #include <library/cpp/deprecated/accessors/accessors.h> - -#include <util/generic/buffer.h> -#include <util/generic/vector.h> -#include <util/random/fast.h> -#include <util/random/shuffle.h> - -#include <functional> -#include <type_traits> - -namespace NCodecs { - class ISequenceReader { - public: - virtual bool NextRegion(TStringBuf& s) = 0; - - virtual ~ISequenceReader() = default; - }; - - template <class TValue> - TStringBuf ValueToStringBuf(TValue&& t) { - return TStringBuf{NAccessors::Begin(t), NAccessors::End(t)}; - } - - template <class TIter> + +#include <util/generic/buffer.h> +#include <util/generic/vector.h> +#include <util/random/fast.h> +#include <util/random/shuffle.h> + +#include <functional> +#include <type_traits> + +namespace NCodecs { + class ISequenceReader { + public: + virtual bool NextRegion(TStringBuf& s) = 0; + + virtual ~ISequenceReader() = default; + }; + + template <class TValue> + TStringBuf ValueToStringBuf(TValue&& t) { + return TStringBuf{NAccessors::Begin(t), NAccessors::End(t)}; + } + + template <class TIter> TStringBuf IterToStringBuf(TIter iter) { - return ValueToStringBuf(*iter); - } - - template <class TItem> + return ValueToStringBuf(*iter); + } + + template <class TItem> class TSimpleSequenceReader: public ISequenceReader { const TVector<TItem>& Items; - size_t Idx = 0; - - public: + size_t Idx = 0; + + public: TSimpleSequenceReader(const TVector<TItem>& items) - : Items(items) + : Items(items) { } - - bool NextRegion(TStringBuf& s) override { - if (Idx >= Items.size()) { - return false; - } - - s = ValueToStringBuf(Items[Idx++]); - return true; - } - }; - - template <class TIter, class TGetter> - size_t GetInputSize(TIter begin, TIter end, TGetter getter) { - size_t totalBytes = 0; - for (TIter iter = begin; iter != end; ++iter) { - totalBytes += getter(iter).size(); - } - return totalBytes; - } - - template <class TIter> - size_t GetInputSize(TIter begin, TIter end) { - return GetInputSize(begin, end, IterToStringBuf<TIter>); - } - - template <class TIter, class TGetter> + + bool NextRegion(TStringBuf& s) override { + if (Idx >= Items.size()) { + return false; + } + + s = ValueToStringBuf(Items[Idx++]); + return true; + } + }; + + template <class TIter, class TGetter> + size_t GetInputSize(TIter begin, TIter end, TGetter getter) { + size_t totalBytes = 0; + for (TIter iter = begin; iter != end; ++iter) { + totalBytes += getter(iter).size(); + } + return totalBytes; + } + + template <class TIter> + size_t GetInputSize(TIter begin, TIter end) { + return GetInputSize(begin, end, IterToStringBuf<TIter>); + } + + template <class TIter, class TGetter> TVector<TBuffer> GetSample(TIter begin, TIter end, size_t sampleSizeBytes, TGetter getter) { - TFastRng64 rng{0x1ce1f2e507541a05, 0x07d45659, 0x7b8771030dd9917e, 0x2d6636ce}; - - size_t totalBytes = GetInputSize(begin, end, getter); - double sampleProb = (double)sampleSizeBytes / Max<size_t>(1, totalBytes); - + TFastRng64 rng{0x1ce1f2e507541a05, 0x07d45659, 0x7b8771030dd9917e, 0x2d6636ce}; + + size_t totalBytes = GetInputSize(begin, end, getter); + double sampleProb = (double)sampleSizeBytes / Max<size_t>(1, totalBytes); + TVector<TBuffer> result; - for (TIter iter = begin; iter != end; ++iter) { - if (sampleProb >= 1 || rng.GenRandReal1() < sampleProb) { - TStringBuf reg = getter(iter); + for (TIter iter = begin; iter != end; ++iter) { + if (sampleProb >= 1 || rng.GenRandReal1() < sampleProb) { + TStringBuf reg = getter(iter); result.emplace_back(reg.data(), reg.size()); - } - } - Shuffle(result.begin(), result.end(), rng); - return result; - } - - template <class TIter> + } + } + Shuffle(result.begin(), result.end(), rng); + return result; + } + + template <class TIter> TVector<TBuffer> GetSample(TIter begin, TIter end, size_t sampleSizeBytes) { - return GetSample(begin, end, sampleSizeBytes, IterToStringBuf<TIter>); - } - -} + return GetSample(begin, end, sampleSizeBytes, IterToStringBuf<TIter>); + } + +} |