1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
|
#include <yt/yt/core/test_framework/framework.h>
#include <yt/yt/core/misc/hyperloglog.h>
#include <yt/yt/core/misc/random.h>
#include <library/cpp/yt/farmhash/farm_hash.h>
namespace NYT {
namespace {
////////////////////////////////////////////////////////////////////////////////
class THyperLogLogTest
: public ::testing::Test
, public ::testing::WithParamInterface<std::tuple<int, int, int, int>>
{ };
std::pair<THyperLogLog<8>, int> GenerateHyperLogLog(
TRandomGenerator& rng,
int size,
int targetCardinality)
{
auto hll = THyperLogLog<8>();
int cardinality = 1;
ui64 n = 0;
hll.Add(FarmFingerprint(n));
for (int i = 0; i < size; i++) {
if (static_cast<int>(rng.Generate<ui64>() % size) < targetCardinality) {
cardinality++;
n += 1 + (rng.Generate<ui32>() % 100);
}
hll.Add(FarmFingerprint(n));
}
return std::pair(hll, cardinality);
}
TEST_P(THyperLogLogTest, Random)
{
auto param = GetParam();
auto seed = std::get<0>(param);
auto size = std::get<1>(param);
auto targetCardinality = std::get<2>(param);
auto iterations = std::get<3>(param);
auto rng = TRandomGenerator(seed);
auto error = 0.0;
for (int i = 0; i < iterations; i++) {
auto hll = GenerateHyperLogLog(
rng,
size,
targetCardinality);
auto err = ((double)hll.first.EstimateCardinality() - hll.second) / hll.second;
error += err;
}
auto meanError = error / iterations;
EXPECT_NEAR(meanError, 0, 0.05);
}
INSTANTIATE_TEST_SUITE_P(
HyperLogLogTest,
THyperLogLogTest,
::testing::Values(
std::tuple<int, int, int, int>(123, 100000, 200, 10),
std::tuple<int, int, int, int>(324, 100000, 1000, 10),
std::tuple<int, int, int, int>(653, 100000, 5000, 10),
std::tuple<int, int, int, int>(890, 100000, 10000, 10),
std::tuple<int, int, int, int>(278, 100000, 50000, 10)));
////////////////////////////////////////////////////////////////////////////////
} // namespace
} // namespace NYT
|