aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/hyperloglog
diff options
context:
space:
mode:
authorv-mak33v <v-mak33v@yandex-team.ru>2022-02-10 16:50:50 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:50:50 +0300
commitb563943b467b077146b7f7dab9d0d31f786d1515 (patch)
treedf20336199dea4b560e7cfcca2dd92a28733b013 /library/cpp/hyperloglog
parentee3a91df74e77a27f869bc97a6a87b08313107be (diff)
downloadydb-b563943b467b077146b7f7dab9d0d31f786d1515.tar.gz
Restoring authorship annotation for <v-mak33v@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/hyperloglog')
-rw-r--r--library/cpp/hyperloglog/hyperloglog.cpp32
-rw-r--r--library/cpp/hyperloglog/hyperloglog.h80
2 files changed, 56 insertions, 56 deletions
diff --git a/library/cpp/hyperloglog/hyperloglog.cpp b/library/cpp/hyperloglog/hyperloglog.cpp
index ec8352abe1..99ee73972f 100644
--- a/library/cpp/hyperloglog/hyperloglog.cpp
+++ b/library/cpp/hyperloglog/hyperloglog.cpp
@@ -87,12 +87,12 @@ namespace {
}
}
- double RawEstimate(const ui8* counts, size_t size) {
+ double RawEstimate(const ui8* counts, size_t size) {
double sum = {};
- for (size_t i = 0; i < size; ++i) {
- sum += std::pow(2.0, -counts[i]);
+ for (size_t i = 0; i < size; ++i) {
+ sum += std::pow(2.0, -counts[i]);
}
- return EmpiricAlpha(size) * size * size / sum;
+ return EmpiricAlpha(size) * size * size / sum;
}
double LinearCounting(size_t registers, size_t zeroed) {
@@ -100,38 +100,38 @@ namespace {
}
}
-THyperLogLogBase::THyperLogLogBase(unsigned precision)
- : Precision(precision) {
+THyperLogLogBase::THyperLogLogBase(unsigned precision)
+ : Precision(precision) {
Y_ENSURE(precision >= PRECISION_MIN && precision <= PRECISION_MAX);
}
-void THyperLogLogBase::Update(ui64 hash) {
+void THyperLogLogBase::Update(ui64 hash) {
const unsigned subHashBits = 8 * sizeof(hash) - Precision;
const auto subHash = hash & MaskLowerBits(subHashBits);
const auto leadingZeroes = subHash ? (subHashBits - GetValueBitCount(subHash)) : subHashBits;
const ui8 weight = static_cast<ui8>(leadingZeroes + 1);
const size_t reg = static_cast<size_t>(hash >> subHashBits);
- RegistersRef[reg] = std::max(RegistersRef[reg], weight);
+ RegistersRef[reg] = std::max(RegistersRef[reg], weight);
}
-void THyperLogLogBase::Merge(const THyperLogLogBase& rh) {
+void THyperLogLogBase::Merge(const THyperLogLogBase& rh) {
Y_ENSURE(Precision == rh.Precision);
- std::transform(RegistersRef.begin(), RegistersRef.end(), rh.RegistersRef.begin(), RegistersRef.begin(), [](ui8 l, ui8 r) { return std::max(l, r); });
+ std::transform(RegistersRef.begin(), RegistersRef.end(), rh.RegistersRef.begin(), RegistersRef.begin(), [](ui8 l, ui8 r) { return std::max(l, r); });
}
-ui64 THyperLogLogBase::Estimate() const {
- const auto m = RegistersRef.size();
- const auto e = RawEstimate(RegistersRef.data(), m);
+ui64 THyperLogLogBase::Estimate() const {
+ const auto m = RegistersRef.size();
+ const auto e = RawEstimate(RegistersRef.data(), m);
const auto e_ = e <= 5 * m ? (e - EstimateBias(e, Precision)) : e;
- const auto v = std::count(RegistersRef.begin(), RegistersRef.end(), ui8(0));
+ const auto v = std::count(RegistersRef.begin(), RegistersRef.end(), ui8(0));
const auto h = v != 0 ? LinearCounting(m, v) : e_;
return h <= GetThreshold(Precision) ? h : e_;
}
-void THyperLogLogBase::Save(IOutputStream& out) const {
+void THyperLogLogBase::Save(IOutputStream& out) const {
out.Write(static_cast<char>(Precision));
- out.Write(RegistersRef.data(), RegistersRef.size() * sizeof(RegistersRef.front()));
+ out.Write(RegistersRef.data(), RegistersRef.size() * sizeof(RegistersRef.front()));
}
diff --git a/library/cpp/hyperloglog/hyperloglog.h b/library/cpp/hyperloglog/hyperloglog.h
index e79ee0ed77..2b9725dc6e 100644
--- a/library/cpp/hyperloglog/hyperloglog.h
+++ b/library/cpp/hyperloglog/hyperloglog.h
@@ -1,64 +1,64 @@
#pragma once
#include <util/system/types.h>
-#include <util/stream/input.h>
-#include <util/generic/array_ref.h>
+#include <util/stream/input.h>
+#include <util/generic/array_ref.h>
#include <vector>
class IOutputStream;
-class THyperLogLogBase {
-protected:
- explicit THyperLogLogBase(unsigned precision);
+class THyperLogLogBase {
+protected:
+ explicit THyperLogLogBase(unsigned precision);
public:
static const constexpr unsigned PRECISION_MIN = 4;
-
+
static const constexpr unsigned PRECISION_MAX = 18;
void Update(ui64 hash);
- void Merge(const THyperLogLogBase& rh);
+ void Merge(const THyperLogLogBase& rh);
ui64 Estimate() const;
void Save(IOutputStream& out) const;
-protected:
- unsigned Precision;
-
- TArrayRef<ui8> RegistersRef;
-};
-
-template <typename Alloc>
-class THyperLogLogWithAlloc : public THyperLogLogBase {
-private:
- explicit THyperLogLogWithAlloc(unsigned precision)
- : THyperLogLogBase(precision) {
- Registers.resize(1u << precision);
- RegistersRef = MakeArrayRef(Registers);
- }
-
-public:
- THyperLogLogWithAlloc(THyperLogLogWithAlloc&&) = default;
-
- THyperLogLogWithAlloc& operator=(THyperLogLogWithAlloc&&) = default;
-
- static THyperLogLogWithAlloc Create(unsigned precision) {
- return THyperLogLogWithAlloc(precision);
- }
-
- static THyperLogLogWithAlloc Load(IInputStream& in) {
- char precision = {};
- Y_ENSURE(in.ReadChar(precision));
- auto res = Create(precision);
- in.LoadOrFail(res.Registers.data(), res.Registers.size() * sizeof(res.Registers.front()));
- return res;
+protected:
+ unsigned Precision;
+
+ TArrayRef<ui8> RegistersRef;
+};
+
+template <typename Alloc>
+class THyperLogLogWithAlloc : public THyperLogLogBase {
+private:
+ explicit THyperLogLogWithAlloc(unsigned precision)
+ : THyperLogLogBase(precision) {
+ Registers.resize(1u << precision);
+ RegistersRef = MakeArrayRef(Registers);
}
+public:
+ THyperLogLogWithAlloc(THyperLogLogWithAlloc&&) = default;
+
+ THyperLogLogWithAlloc& operator=(THyperLogLogWithAlloc&&) = default;
+
+ static THyperLogLogWithAlloc Create(unsigned precision) {
+ return THyperLogLogWithAlloc(precision);
+ }
+
+ static THyperLogLogWithAlloc Load(IInputStream& in) {
+ char precision = {};
+ Y_ENSURE(in.ReadChar(precision));
+ auto res = Create(precision);
+ in.LoadOrFail(res.Registers.data(), res.Registers.size() * sizeof(res.Registers.front()));
+ return res;
+ }
+
private:
- std::vector<ui8, Alloc> Registers;
+ std::vector<ui8, Alloc> Registers;
};
-
-using THyperLogLog = THyperLogLogWithAlloc<std::allocator<ui8>>;
+
+using THyperLogLog = THyperLogLogWithAlloc<std::allocator<ui8>>;