diff options
author | v-mak33v <v-mak33v@yandex-team.ru> | 2022-02-10 16:50:50 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:50:50 +0300 |
commit | b563943b467b077146b7f7dab9d0d31f786d1515 (patch) | |
tree | df20336199dea4b560e7cfcca2dd92a28733b013 /library/cpp/hyperloglog | |
parent | ee3a91df74e77a27f869bc97a6a87b08313107be (diff) | |
download | ydb-b563943b467b077146b7f7dab9d0d31f786d1515.tar.gz |
Restoring authorship annotation for <v-mak33v@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/hyperloglog')
-rw-r--r-- | library/cpp/hyperloglog/hyperloglog.cpp | 32 | ||||
-rw-r--r-- | library/cpp/hyperloglog/hyperloglog.h | 80 |
2 files changed, 56 insertions, 56 deletions
diff --git a/library/cpp/hyperloglog/hyperloglog.cpp b/library/cpp/hyperloglog/hyperloglog.cpp index ec8352abe1..99ee73972f 100644 --- a/library/cpp/hyperloglog/hyperloglog.cpp +++ b/library/cpp/hyperloglog/hyperloglog.cpp @@ -87,12 +87,12 @@ namespace { } } - double RawEstimate(const ui8* counts, size_t size) { + double RawEstimate(const ui8* counts, size_t size) { double sum = {}; - for (size_t i = 0; i < size; ++i) { - sum += std::pow(2.0, -counts[i]); + for (size_t i = 0; i < size; ++i) { + sum += std::pow(2.0, -counts[i]); } - return EmpiricAlpha(size) * size * size / sum; + return EmpiricAlpha(size) * size * size / sum; } double LinearCounting(size_t registers, size_t zeroed) { @@ -100,38 +100,38 @@ namespace { } } -THyperLogLogBase::THyperLogLogBase(unsigned precision) - : Precision(precision) { +THyperLogLogBase::THyperLogLogBase(unsigned precision) + : Precision(precision) { Y_ENSURE(precision >= PRECISION_MIN && precision <= PRECISION_MAX); } -void THyperLogLogBase::Update(ui64 hash) { +void THyperLogLogBase::Update(ui64 hash) { const unsigned subHashBits = 8 * sizeof(hash) - Precision; const auto subHash = hash & MaskLowerBits(subHashBits); const auto leadingZeroes = subHash ? (subHashBits - GetValueBitCount(subHash)) : subHashBits; const ui8 weight = static_cast<ui8>(leadingZeroes + 1); const size_t reg = static_cast<size_t>(hash >> subHashBits); - RegistersRef[reg] = std::max(RegistersRef[reg], weight); + RegistersRef[reg] = std::max(RegistersRef[reg], weight); } -void THyperLogLogBase::Merge(const THyperLogLogBase& rh) { +void THyperLogLogBase::Merge(const THyperLogLogBase& rh) { Y_ENSURE(Precision == rh.Precision); - std::transform(RegistersRef.begin(), RegistersRef.end(), rh.RegistersRef.begin(), RegistersRef.begin(), [](ui8 l, ui8 r) { return std::max(l, r); }); + std::transform(RegistersRef.begin(), RegistersRef.end(), rh.RegistersRef.begin(), RegistersRef.begin(), [](ui8 l, ui8 r) { return std::max(l, r); }); } -ui64 THyperLogLogBase::Estimate() const { - const auto m = RegistersRef.size(); - const auto e = RawEstimate(RegistersRef.data(), m); +ui64 THyperLogLogBase::Estimate() const { + const auto m = RegistersRef.size(); + const auto e = RawEstimate(RegistersRef.data(), m); const auto e_ = e <= 5 * m ? (e - EstimateBias(e, Precision)) : e; - const auto v = std::count(RegistersRef.begin(), RegistersRef.end(), ui8(0)); + const auto v = std::count(RegistersRef.begin(), RegistersRef.end(), ui8(0)); const auto h = v != 0 ? LinearCounting(m, v) : e_; return h <= GetThreshold(Precision) ? h : e_; } -void THyperLogLogBase::Save(IOutputStream& out) const { +void THyperLogLogBase::Save(IOutputStream& out) const { out.Write(static_cast<char>(Precision)); - out.Write(RegistersRef.data(), RegistersRef.size() * sizeof(RegistersRef.front())); + out.Write(RegistersRef.data(), RegistersRef.size() * sizeof(RegistersRef.front())); } diff --git a/library/cpp/hyperloglog/hyperloglog.h b/library/cpp/hyperloglog/hyperloglog.h index e79ee0ed77..2b9725dc6e 100644 --- a/library/cpp/hyperloglog/hyperloglog.h +++ b/library/cpp/hyperloglog/hyperloglog.h @@ -1,64 +1,64 @@ #pragma once #include <util/system/types.h> -#include <util/stream/input.h> -#include <util/generic/array_ref.h> +#include <util/stream/input.h> +#include <util/generic/array_ref.h> #include <vector> class IOutputStream; -class THyperLogLogBase { -protected: - explicit THyperLogLogBase(unsigned precision); +class THyperLogLogBase { +protected: + explicit THyperLogLogBase(unsigned precision); public: static const constexpr unsigned PRECISION_MIN = 4; - + static const constexpr unsigned PRECISION_MAX = 18; void Update(ui64 hash); - void Merge(const THyperLogLogBase& rh); + void Merge(const THyperLogLogBase& rh); ui64 Estimate() const; void Save(IOutputStream& out) const; -protected: - unsigned Precision; - - TArrayRef<ui8> RegistersRef; -}; - -template <typename Alloc> -class THyperLogLogWithAlloc : public THyperLogLogBase { -private: - explicit THyperLogLogWithAlloc(unsigned precision) - : THyperLogLogBase(precision) { - Registers.resize(1u << precision); - RegistersRef = MakeArrayRef(Registers); - } - -public: - THyperLogLogWithAlloc(THyperLogLogWithAlloc&&) = default; - - THyperLogLogWithAlloc& operator=(THyperLogLogWithAlloc&&) = default; - - static THyperLogLogWithAlloc Create(unsigned precision) { - return THyperLogLogWithAlloc(precision); - } - - static THyperLogLogWithAlloc Load(IInputStream& in) { - char precision = {}; - Y_ENSURE(in.ReadChar(precision)); - auto res = Create(precision); - in.LoadOrFail(res.Registers.data(), res.Registers.size() * sizeof(res.Registers.front())); - return res; +protected: + unsigned Precision; + + TArrayRef<ui8> RegistersRef; +}; + +template <typename Alloc> +class THyperLogLogWithAlloc : public THyperLogLogBase { +private: + explicit THyperLogLogWithAlloc(unsigned precision) + : THyperLogLogBase(precision) { + Registers.resize(1u << precision); + RegistersRef = MakeArrayRef(Registers); } +public: + THyperLogLogWithAlloc(THyperLogLogWithAlloc&&) = default; + + THyperLogLogWithAlloc& operator=(THyperLogLogWithAlloc&&) = default; + + static THyperLogLogWithAlloc Create(unsigned precision) { + return THyperLogLogWithAlloc(precision); + } + + static THyperLogLogWithAlloc Load(IInputStream& in) { + char precision = {}; + Y_ENSURE(in.ReadChar(precision)); + auto res = Create(precision); + in.LoadOrFail(res.Registers.data(), res.Registers.size() * sizeof(res.Registers.front())); + return res; + } + private: - std::vector<ui8, Alloc> Registers; + std::vector<ui8, Alloc> Registers; }; - -using THyperLogLog = THyperLogLogWithAlloc<std::allocator<ui8>>; + +using THyperLogLog = THyperLogLogWithAlloc<std::allocator<ui8>>; |