aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/codecs/sample.h
diff options
context:
space:
mode:
authorRuslan Kovalev <ruslan.a.kovalev@gmail.com>2022-02-10 16:46:45 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:46:45 +0300
commit9123176b341b6f2658cff5132482b8237c1416c8 (patch)
tree49e222ea1c5804306084bb3ae065bb702625360f /library/cpp/codecs/sample.h
parent59e19371de37995fcb36beb16cd6ec030af960bc (diff)
downloadydb-9123176b341b6f2658cff5132482b8237c1416c8.tar.gz
Restoring authorship annotation for Ruslan Kovalev <ruslan.a.kovalev@gmail.com>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/codecs/sample.h')
-rw-r--r--library/cpp/codecs/sample.h156
1 files changed, 78 insertions, 78 deletions
diff --git a/library/cpp/codecs/sample.h b/library/cpp/codecs/sample.h
index bce37e6a2c..15f03afcc5 100644
--- a/library/cpp/codecs/sample.h
+++ b/library/cpp/codecs/sample.h
@@ -1,89 +1,89 @@
-#pragma once
-
+#pragma once
+
#include <library/cpp/deprecated/accessors/accessors.h>
-
-#include <util/generic/buffer.h>
-#include <util/generic/vector.h>
-#include <util/random/fast.h>
-#include <util/random/shuffle.h>
-
-#include <functional>
-#include <type_traits>
-
-namespace NCodecs {
- class ISequenceReader {
- public:
- virtual bool NextRegion(TStringBuf& s) = 0;
-
- virtual ~ISequenceReader() = default;
- };
-
- template <class TValue>
- TStringBuf ValueToStringBuf(TValue&& t) {
- return TStringBuf{NAccessors::Begin(t), NAccessors::End(t)};
- }
-
- template <class TIter>
+
+#include <util/generic/buffer.h>
+#include <util/generic/vector.h>
+#include <util/random/fast.h>
+#include <util/random/shuffle.h>
+
+#include <functional>
+#include <type_traits>
+
+namespace NCodecs {
+ class ISequenceReader {
+ public:
+ virtual bool NextRegion(TStringBuf& s) = 0;
+
+ virtual ~ISequenceReader() = default;
+ };
+
+ template <class TValue>
+ TStringBuf ValueToStringBuf(TValue&& t) {
+ return TStringBuf{NAccessors::Begin(t), NAccessors::End(t)};
+ }
+
+ template <class TIter>
TStringBuf IterToStringBuf(TIter iter) {
- return ValueToStringBuf(*iter);
- }
-
- template <class TItem>
+ return ValueToStringBuf(*iter);
+ }
+
+ template <class TItem>
class TSimpleSequenceReader: public ISequenceReader {
const TVector<TItem>& Items;
- size_t Idx = 0;
-
- public:
+ size_t Idx = 0;
+
+ public:
TSimpleSequenceReader(const TVector<TItem>& items)
- : Items(items)
+ : Items(items)
{
}
-
- bool NextRegion(TStringBuf& s) override {
- if (Idx >= Items.size()) {
- return false;
- }
-
- s = ValueToStringBuf(Items[Idx++]);
- return true;
- }
- };
-
- template <class TIter, class TGetter>
- size_t GetInputSize(TIter begin, TIter end, TGetter getter) {
- size_t totalBytes = 0;
- for (TIter iter = begin; iter != end; ++iter) {
- totalBytes += getter(iter).size();
- }
- return totalBytes;
- }
-
- template <class TIter>
- size_t GetInputSize(TIter begin, TIter end) {
- return GetInputSize(begin, end, IterToStringBuf<TIter>);
- }
-
- template <class TIter, class TGetter>
+
+ bool NextRegion(TStringBuf& s) override {
+ if (Idx >= Items.size()) {
+ return false;
+ }
+
+ s = ValueToStringBuf(Items[Idx++]);
+ return true;
+ }
+ };
+
+ template <class TIter, class TGetter>
+ size_t GetInputSize(TIter begin, TIter end, TGetter getter) {
+ size_t totalBytes = 0;
+ for (TIter iter = begin; iter != end; ++iter) {
+ totalBytes += getter(iter).size();
+ }
+ return totalBytes;
+ }
+
+ template <class TIter>
+ size_t GetInputSize(TIter begin, TIter end) {
+ return GetInputSize(begin, end, IterToStringBuf<TIter>);
+ }
+
+ template <class TIter, class TGetter>
TVector<TBuffer> GetSample(TIter begin, TIter end, size_t sampleSizeBytes, TGetter getter) {
- TFastRng64 rng{0x1ce1f2e507541a05, 0x07d45659, 0x7b8771030dd9917e, 0x2d6636ce};
-
- size_t totalBytes = GetInputSize(begin, end, getter);
- double sampleProb = (double)sampleSizeBytes / Max<size_t>(1, totalBytes);
-
+ TFastRng64 rng{0x1ce1f2e507541a05, 0x07d45659, 0x7b8771030dd9917e, 0x2d6636ce};
+
+ size_t totalBytes = GetInputSize(begin, end, getter);
+ double sampleProb = (double)sampleSizeBytes / Max<size_t>(1, totalBytes);
+
TVector<TBuffer> result;
- for (TIter iter = begin; iter != end; ++iter) {
- if (sampleProb >= 1 || rng.GenRandReal1() < sampleProb) {
- TStringBuf reg = getter(iter);
+ for (TIter iter = begin; iter != end; ++iter) {
+ if (sampleProb >= 1 || rng.GenRandReal1() < sampleProb) {
+ TStringBuf reg = getter(iter);
result.emplace_back(reg.data(), reg.size());
- }
- }
- Shuffle(result.begin(), result.end(), rng);
- return result;
- }
-
- template <class TIter>
+ }
+ }
+ Shuffle(result.begin(), result.end(), rng);
+ return result;
+ }
+
+ template <class TIter>
TVector<TBuffer> GetSample(TIter begin, TIter end, size_t sampleSizeBytes) {
- return GetSample(begin, end, sampleSizeBytes, IterToStringBuf<TIter>);
- }
-
-}
+ return GetSample(begin, end, sampleSizeBytes, IterToStringBuf<TIter>);
+ }
+
+}