1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
|
#pragma once
#include <AggregateFunctions/ReservoirSamplerDeterministic.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
/** Quantile calculation with "reservoir sample" algorithm.
* It collects pseudorandom subset of limited size from a stream of values,
* and approximate quantile from it.
* The function accept second argument, named "determinator"
* and a hash function from it is calculated and used as a source for randomness
* to apply random sampling.
* The function is deterministic, but care should be taken with choose of "determinator" argument.
*/
template <typename Value>
struct QuantileReservoirSamplerDeterministic
{
using Data = ReservoirSamplerDeterministic<Value, ReservoirSamplerDeterministicOnEmpty::RETURN_NAN_OR_ZERO>;
Data data;
void add(const Value &)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method add without determinator is not implemented for ReservoirSamplerDeterministic");
}
template <typename Determinator>
void add(const Value & x, const Determinator & determinator)
{
data.insert(x, determinator);
}
void merge(const QuantileReservoirSamplerDeterministic & rhs)
{
data.merge(rhs.data);
}
void serialize(WriteBuffer & buf) const
{
data.write(buf);
}
void deserialize(ReadBuffer & buf)
{
data.read(buf);
}
/// Get the value of the `level` quantile. The level must be between 0 and 1.
Value get(Float64 level)
{
if (data.empty())
return {};
if constexpr (is_decimal<Value>)
return static_cast<typename Value::NativeType>(data.quantileInterpolated(level));
else
return static_cast<Value>(data.quantileInterpolated(level));
}
/// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
/// indices - an array of index levels such that the corresponding elements will go in ascending order.
void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result)
{
bool is_empty = data.empty();
for (size_t i = 0; i < size; ++i)
{
if (is_empty)
{
result[i] = Value{};
}
else
{
if constexpr (is_decimal<Value>)
result[indices[i]] = static_cast<typename Value::NativeType>(data.quantileInterpolated(levels[indices[i]]));
else
result[indices[i]] = static_cast<Value>(data.quantileInterpolated(levels[indices[i]]));
}
}
}
/// The same, but in the case of an empty state, NaN is returned.
Float64 getFloat(Float64 level)
{
return data.quantileInterpolated(level);
}
void getManyFloat(const Float64 * levels, const size_t * indices, size_t size, Float64 * result)
{
for (size_t i = 0; i < size; ++i)
result[indices[i]] = data.quantileInterpolated(levels[indices[i]]);
}
};
}
|