summaryrefslogtreecommitdiffstats
path: root/yql/essentials/udfs/common/histogram/histogram_udf.cpp
diff options
context:
space:
mode:
authorvvvv <[email protected]>2025-10-06 13:26:25 +0300
committervvvv <[email protected]>2025-10-06 14:06:25 +0300
commiteca8ce9cb1613d5c983185c4e43c20651a9638aa (patch)
tree61ee5ae779948e61af9a7691d19eaa2c09869121 /yql/essentials/udfs/common/histogram/histogram_udf.cpp
parent4adf7eecae16a9b228b28cc5f64c27ef69ad5ec2 (diff)
YQL-20086 udfs
init commit_hash:f9684778bf1ea956965f2360b80b91edb7d4ffbe
Diffstat (limited to 'yql/essentials/udfs/common/histogram/histogram_udf.cpp')
-rw-r--r--yql/essentials/udfs/common/histogram/histogram_udf.cpp1529
1 files changed, 768 insertions, 761 deletions
diff --git a/yql/essentials/udfs/common/histogram/histogram_udf.cpp b/yql/essentials/udfs/common/histogram/histogram_udf.cpp
index 731b5956ed8..283e243396b 100644
--- a/yql/essentials/udfs/common/histogram/histogram_udf.cpp
+++ b/yql/essentials/udfs/common/histogram/histogram_udf.cpp
@@ -43,461 +43,220 @@ namespace {
XX(Merge, arg)
#define DECLARE_HISTOGRAM_RESOURCE_NAME(name) extern const char name##HistogramResourceName[] = "Histogram." #name;
- HISTOGRAM_ALGORITHMS_MAP(DECLARE_HISTOGRAM_RESOURCE_NAME)
- DECLARE_HISTOGRAM_RESOURCE_NAME(Linear)
- DECLARE_HISTOGRAM_RESOURCE_NAME(Logarithmic)
-
- class TLinearHistogram: public TAdaptiveWardHistogram {
- public:
- TLinearHistogram(double step, double begin, double end)
- : TAdaptiveWardHistogram(1ULL << 24)
- , Step_(step)
- , Begin_(begin)
- , End_(end)
- {
- }
-
- void Add(double value, double weight) override {
- if (value < Begin_) {
- value = Begin_;
- } else if (value > End_) {
- value = End_;
- } else {
- value = std::floor(value / Step_ + 0.5) * Step_;
- }
- TAdaptiveWardHistogram::Add(value, weight);
- }
-
- void Add(const THistoRec&) override {
- Y_ABORT("Not implemented");
- }
-
- protected:
- double Step_;
- double Begin_;
- double End_;
- };
-
- class TLogarithmicHistogram: public TLinearHistogram {
- public:
- TLogarithmicHistogram(double step, double begin, double end)
- : TLinearHistogram(step, begin, end)
- {
- }
-
- void Add(double value, double weight) override {
- double base = std::log(value) / std::log(Step_);
- double prev = std::pow(Step_, std::floor(base));
- double next = std::pow(Step_, std::ceil(base));
- if (std::abs(value - next) > std::abs(value - prev)) {
- value = prev;
- } else {
- value = next;
- }
-
- if (value < Begin_) {
- value = Begin_;
- } else if (value > End_) {
- value = End_;
- }
-
- if (!std::isnan(value)) {
- TAdaptiveWardHistogram::Add(value, weight);
- }
- }
-
- void Add(const THistoRec&) override {
- Y_ABORT("Not implemented");
- }
- };
-
- template <typename THistogramType, const char* ResourceName>
- class THistogram_Create: public TBoxedValue {
- public:
- THistogram_Create(TSourcePosition pos)
- : Pos_(pos)
- {}
-
- typedef TBoxedResource<THistogramType, ResourceName> THistogramResource;
-
- static const TStringRef& Name() {
- static auto name = TString(ResourceName).substr(10) + "Histogram_Create";
- static auto nameRef = TStringRef(name);
- return nameRef;
- }
-
- private:
- TUnboxedValue Run(
- const IValueBuilder* valueBuilder,
- const TUnboxedValuePod* args) const override {
- try {
- Y_UNUSED(valueBuilder);
- THolder<THistogramResource> histogram(new THistogramResource(args[2].Get<ui32>()));
- histogram->Get()->Add(args[0].Get<double>(), args[1].Get<double>());
- return TUnboxedValuePod(histogram.Release());
- } catch (const std::exception& e) {
- UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str());
- }
- }
+HISTOGRAM_ALGORITHMS_MAP(DECLARE_HISTOGRAM_RESOURCE_NAME)
+DECLARE_HISTOGRAM_RESOURCE_NAME(Linear)
+DECLARE_HISTOGRAM_RESOURCE_NAME(Logarithmic)
+
+class TLinearHistogram: public TAdaptiveWardHistogram {
+public:
+ TLinearHistogram(double step, double begin, double end)
+ : TAdaptiveWardHistogram(1ULL << 24)
+ , Step_(step)
+ , Begin_(begin)
+ , End_(end)
+ {
+ }
- public:
- static bool DeclareSignature(
- const TStringRef& name,
- TType* userType,
- IFunctionTypeInfoBuilder& builder,
- bool typesOnly) {
- Y_UNUSED(userType);
- if (Name() == name) {
- builder.SimpleSignature<TResource<ResourceName>(double, double, ui32)>();
- if (!typesOnly) {
- builder.Implementation(new THistogram_Create<THistogramType, ResourceName>(builder.GetSourcePosition()));
- }
- return true;
- } else {
- return false;
- }
+ void Add(double value, double weight) override {
+ if (value < Begin_) {
+ value = Begin_;
+ } else if (value > End_) {
+ value = End_;
+ } else {
+ value = std::floor(value / Step_ + 0.5) * Step_;
}
+ TAdaptiveWardHistogram::Add(value, weight);
+ }
- private:
- TSourcePosition Pos_;
- };
+ void Add(const THistoRec&) override {
+ Y_ABORT("Not implemented");
+ }
- template <typename THistogramType, const char* ResourceName>
- class THistogram_AddValue: public TBoxedValue {
- public:
- THistogram_AddValue(TSourcePosition pos)
- : Pos_(pos)
- {}
-
- typedef TBoxedResource<THistogramType, ResourceName> THistogramResource;
-
- static const TStringRef& Name() {
- static auto name = TString(ResourceName).substr(10) + "Histogram_AddValue";
- static auto nameRef = TStringRef(name);
- return nameRef;
- }
-
- private:
- TUnboxedValue Run(
- const IValueBuilder* valueBuilder,
- const TUnboxedValuePod* args) const override {
- try {
- Y_UNUSED(valueBuilder);
- THistogramResource* resource = static_cast<THistogramResource*>(args[0].AsBoxed().Get());
- resource->Get()->Add(args[1].Get<double>(), args[2].Get<double>());
- return TUnboxedValuePod(args[0]);
- } catch (const std::exception& e) {
- UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str());
- }
- }
+protected:
+ double Step_;
+ double Begin_;
+ double End_;
+};
+
+class TLogarithmicHistogram: public TLinearHistogram {
+public:
+ TLogarithmicHistogram(double step, double begin, double end)
+ : TLinearHistogram(step, begin, end)
+ {
+ }
- public:
- static bool DeclareSignature(
- const TStringRef& name,
- TType* userType,
- IFunctionTypeInfoBuilder& builder,
- bool typesOnly) {
- Y_UNUSED(userType);
- if (Name() == name) {
- builder.SimpleSignature<TResource<ResourceName>(TResource<ResourceName>, double, double)>();
- if (!typesOnly) {
- builder.Implementation(new THistogram_AddValue<THistogramType, ResourceName>(builder.GetSourcePosition()));
- }
- return true;
- } else {
- return false;
- }
+ void Add(double value, double weight) override {
+ double base = std::log(value) / std::log(Step_);
+ double prev = std::pow(Step_, std::floor(base));
+ double next = std::pow(Step_, std::ceil(base));
+ if (std::abs(value - next) > std::abs(value - prev)) {
+ value = prev;
+ } else {
+ value = next;
}
- private:
- TSourcePosition Pos_;
- };
-
- template <typename THistogramType, const char* ResourceName>
- class THistogram_Serialize: public TBoxedValue {
- public:
- THistogram_Serialize(TSourcePosition pos)
- : Pos_(pos)
- {}
-
- typedef TBoxedResource<THistogramType, ResourceName> THistogramResource;
-
- static const TStringRef& Name() {
- static auto name = TString(ResourceName).substr(10) + "Histogram_Serialize";
- static auto nameRef = TStringRef(name);
- return nameRef;
- }
-
- private:
- TUnboxedValue Run(
- const IValueBuilder* valueBuilder,
- const TUnboxedValuePod* args) const override {
- try {
- THistogram proto;
- TString result;
- static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get()->ToProto(proto);
- Y_PROTOBUF_SUPPRESS_NODISCARD proto.SerializeToString(&result);
- return valueBuilder->NewString(result);
- } catch (const std::exception& e) {
- UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str());
- }
+ if (value < Begin_) {
+ value = Begin_;
+ } else if (value > End_) {
+ value = End_;
}
- public:
- static bool DeclareSignature(
- const TStringRef& name,
- TType* userType,
- IFunctionTypeInfoBuilder& builder,
- bool typesOnly) {
- Y_UNUSED(userType);
- if (Name() == name) {
- builder.SimpleSignature<char*(TResource<ResourceName>)>();
- if (!typesOnly) {
- builder.Implementation(new THistogram_Serialize<THistogramType, ResourceName>(builder.GetSourcePosition()));
- }
- return true;
- } else {
- return false;
- }
+ if (!std::isnan(value)) {
+ TAdaptiveWardHistogram::Add(value, weight);
}
+ }
- private:
- TSourcePosition Pos_;
- };
+ void Add(const THistoRec&) override {
+ Y_ABORT("Not implemented");
+ }
+};
+
+template <typename THistogramType, const char* ResourceName>
+class THistogram_Create: public TBoxedValue {
+public:
+ THistogram_Create(TSourcePosition pos)
+ : Pos_(pos)
+ {
+ }
- template <typename THistogramType, const char* ResourceName>
- class THistogram_Deserialize: public TBoxedValue {
- public:
- THistogram_Deserialize(TSourcePosition pos)
- : Pos_(pos)
- {}
-
- typedef TBoxedResource<THistogramType, ResourceName> THistogramResource;
-
- static const TStringRef& Name() {
- static auto name = TString(ResourceName).substr(10) + "Histogram_Deserialize";
- static auto nameRef = TStringRef(name);
- return nameRef;
- }
-
- private:
- TUnboxedValue Run(
- const IValueBuilder* valueBuilder,
- const TUnboxedValuePod* args) const override {
- try {
- Y_UNUSED(valueBuilder);
- THistogram proto;
- Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef()));
- THolder<THistogramResource> histogram(new THistogramResource(args[1].Get<ui32>()));
- histogram->Get()->FromProto(proto);
- return TUnboxedValuePod(histogram.Release());
- } catch (const std::exception& e) {
- UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str());
- }
- }
+ typedef TBoxedResource<THistogramType, ResourceName> THistogramResource;
- public:
- static bool DeclareSignature(
- const TStringRef& name,
- TType* userType,
- IFunctionTypeInfoBuilder& builder,
- bool typesOnly) {
- Y_UNUSED(userType);
- if (Name() == name) {
- builder.SimpleSignature<TResource<ResourceName>(char*, ui32)>();
- if (!typesOnly) {
- builder.Implementation(new THistogram_Deserialize<THistogramType, ResourceName>(builder.GetSourcePosition()));
- }
- return true;
- } else {
- return false;
- }
- }
-
- private:
- TSourcePosition Pos_;
- };
+ static const TStringRef& Name() {
+ static auto name = TString(ResourceName).substr(10) + "Histogram_Create";
+ static auto nameRef = TStringRef(name);
+ return nameRef;
+ }
- template <typename THistogramType, const char* ResourceName>
- class THistogram_Merge: public TBoxedValue {
- public:
- THistogram_Merge(TSourcePosition pos)
- : Pos_(pos)
- {}
-
- typedef TBoxedResource<THistogramType, ResourceName> THistogramResource;
-
- static const TStringRef& Name() {
- static auto name = TString(ResourceName).substr(10) + "Histogram_Merge";
- static auto nameRef = TStringRef(name);
- return nameRef;
- }
-
- private:
- TUnboxedValue Run(
- const IValueBuilder* valueBuilder,
- const TUnboxedValuePod* args) const override {
- try {
- Y_UNUSED(valueBuilder);
- THistogram proto;
- static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get()->ToProto(proto);
- static_cast<THistogramResource*>(args[1].AsBoxed().Get())->Get()->Merge(proto, 1.0);
- return TUnboxedValuePod(args[1]);
- } catch (const std::exception& e) {
- UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str());
- }
+private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ try {
+ Y_UNUSED(valueBuilder);
+ THolder<THistogramResource> histogram(new THistogramResource(args[2].Get<ui32>()));
+ histogram->Get()->Add(args[0].Get<double>(), args[1].Get<double>());
+ return TUnboxedValuePod(histogram.Release());
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str());
}
+ }
- public:
- static bool DeclareSignature(
- const TStringRef& name,
- TType* userType,
- IFunctionTypeInfoBuilder& builder,
- bool typesOnly) {
- Y_UNUSED(userType);
- if (Name() == name) {
- builder.SimpleSignature<TResource<ResourceName>(TResource<ResourceName>, TResource<ResourceName>)>();
- if (!typesOnly) {
- builder.Implementation(new THistogram_Merge<THistogramType, ResourceName>(builder.GetSourcePosition()));
- }
- return true;
- } else {
- return false;
+public:
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ builder.SimpleSignature<TResource<ResourceName>(double, double, ui32)>();
+ if (!typesOnly) {
+ builder.Implementation(new THistogram_Create<THistogramType, ResourceName>(builder.GetSourcePosition()));
}
+ return true;
+ } else {
+ return false;
}
+ }
- private:
- TSourcePosition Pos_;
- };
-
- struct THistogramIndexes {
- static constexpr ui32 BinFieldsCount = 2U;
- static constexpr ui32 ResultFieldsCount = 5U;
-
- THistogramIndexes(IFunctionTypeInfoBuilder& builder) {
- const auto binStructType = builder.Struct(BinFieldsCount)->AddField<double>("Position", &Position).AddField<double>("Frequency", &Frequency).Build();
- const auto binsList = builder.List()->Item(binStructType).Build();
- ResultStructType = builder.Struct(ResultFieldsCount)->AddField<char*>("Kind", &Kind).AddField<double>("Min", &Min).AddField<double>("Max", &Max).AddField<double>("WeightsSum", &WeightsSum).AddField("Bins", binsList, &Bins).Build();
- }
-
- ui32 Kind;
- ui32 Min;
- ui32 Max;
- ui32 WeightsSum;
- ui32 Bins;
-
- ui32 Position;
- ui32 Frequency;
+private:
+ TSourcePosition Pos_;
+};
- TType* ResultStructType;
- };
+template <typename THistogramType, const char* ResourceName>
+class THistogram_AddValue: public TBoxedValue {
+public:
+ THistogram_AddValue(TSourcePosition pos)
+ : Pos_(pos)
+ {
+ }
- template <typename THistogramType, const char* ResourceName>
- class THistogram_GetResult: public TBoxedValue {
- public:
- typedef TBoxedResource<THistogramType, ResourceName> THistogramResource;
+ typedef TBoxedResource<THistogramType, ResourceName> THistogramResource;
- THistogram_GetResult(const THistogramIndexes& histogramIndexes, TSourcePosition pos)
- : HistogramIndexes_(histogramIndexes)
- , Pos_(pos)
- {
- }
+ static const TStringRef& Name() {
+ static auto name = TString(ResourceName).substr(10) + "Histogram_AddValue";
+ static auto nameRef = TStringRef(name);
+ return nameRef;
+ }
- static const TStringRef& Name() {
- static auto name = TString(ResourceName).substr(10) + "Histogram_GetResult";
- static auto nameRef = TStringRef(name);
- return nameRef;
+private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ try {
+ Y_UNUSED(valueBuilder);
+ THistogramResource* resource = static_cast<THistogramResource*>(args[0].AsBoxed().Get());
+ resource->Get()->Add(args[1].Get<double>(), args[2].Get<double>());
+ return TUnboxedValuePod(args[0]);
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str());
}
+ }
- private:
- TUnboxedValue Run(
- const IValueBuilder* valueBuilder,
- const TUnboxedValuePod* args) const override {
- THistogram proto;
- auto histogram = static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get();
- histogram->ToProto(proto);
-
- auto size = proto.FreqSize();
- TUnboxedValue* fields = nullptr;
- auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields);
- fields[HistogramIndexes_.Kind] = valueBuilder->NewString(TStringBuf(ResourceName).Skip(10));
- if (size) {
- TUnboxedValue* items = nullptr;
- fields[HistogramIndexes_.Bins] = valueBuilder->NewArray(size, items);
- fields[HistogramIndexes_.Min] = TUnboxedValuePod(static_cast<double>(histogram->GetMinValue()));
- fields[HistogramIndexes_.Max] = TUnboxedValuePod(static_cast<double>(histogram->GetMaxValue()));
- fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(static_cast<double>(histogram->GetSum()));
- for (ui64 i = 0; i < size; ++i) {
- TUnboxedValue* binFields = nullptr;
- *items++ = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields);
- binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(static_cast<double>(proto.GetFreq(i)));
- binFields[HistogramIndexes_.Position] = TUnboxedValuePod(static_cast<double>(proto.GetPosition(i)));
- }
- } else {
- fields[HistogramIndexes_.Bins] = valueBuilder->NewEmptyList();
- fields[HistogramIndexes_.Min] = TUnboxedValuePod(0.0);
- fields[HistogramIndexes_.Max] = TUnboxedValuePod(0.0);
- fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(0.0);
+public:
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ builder.SimpleSignature<TResource<ResourceName>(TResource<ResourceName>, double, double)>();
+ if (!typesOnly) {
+ builder.Implementation(new THistogram_AddValue<THistogramType, ResourceName>(builder.GetSourcePosition()));
}
-
- return result;
+ return true;
+ } else {
+ return false;
}
+ }
- public:
- static bool DeclareSignature(
- const TStringRef& name,
- TType* userType,
- IFunctionTypeInfoBuilder& builder,
- bool typesOnly) {
- Y_UNUSED(userType);
- if (Name() == name) {
- auto resource = builder.Resource(TStringRef(ResourceName, std::strlen(ResourceName)));
+private:
+ TSourcePosition Pos_;
+};
- THistogramIndexes histogramIndexes(builder);
+template <typename THistogramType, const char* ResourceName>
+class THistogram_Serialize: public TBoxedValue {
+public:
+ THistogram_Serialize(TSourcePosition pos)
+ : Pos_(pos)
+ {
+ }
- builder.Args()->Add(resource).Done().Returns(histogramIndexes.ResultStructType);
+ typedef TBoxedResource<THistogramType, ResourceName> THistogramResource;
- if (!typesOnly) {
- builder.Implementation(new THistogram_GetResult<THistogramType, ResourceName>(histogramIndexes, builder.GetSourcePosition()));
- }
- return true;
- } else {
- return false;
- }
- }
-
- private:
- const THistogramIndexes HistogramIndexes_;
- TSourcePosition Pos_;
- };
+ static const TStringRef& Name() {
+ static auto name = TString(ResourceName).substr(10) + "Histogram_Serialize";
+ static auto nameRef = TStringRef(name);
+ return nameRef;
+ }
- template <>
- TUnboxedValue THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::Run(
+private:
+ TUnboxedValue Run(
const IValueBuilder* valueBuilder,
- const TUnboxedValuePod* args) const {
- using THistogramResource = THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::THistogramResource;
+ const TUnboxedValuePod* args) const override {
try {
- Y_UNUSED(valueBuilder);
- THolder<THistogramResource> histogram(new THistogramResource(
- args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>()));
- histogram->Get()->Add(args[0].Get<double>(), 1.0);
- return TUnboxedValuePod(histogram.Release());
+ THistogram proto;
+ TString result;
+ static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get()->ToProto(proto);
+ Y_PROTOBUF_SUPPRESS_NODISCARD proto.SerializeToString(&result);
+ return valueBuilder->NewString(result);
} catch (const std::exception& e) {
UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str());
}
}
- template <>
- bool THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::DeclareSignature(
+public:
+ static bool DeclareSignature(
const TStringRef& name,
TType* userType,
IFunctionTypeInfoBuilder& builder,
bool typesOnly) {
Y_UNUSED(userType);
if (Name() == name) {
- builder.SimpleSignature<TResource<LinearHistogramResourceName>(double, double, double, double)>();
+ builder.SimpleSignature<char*(TResource<ResourceName>)>();
if (!typesOnly) {
- builder.Implementation(new THistogram_Create<TLinearHistogram, LinearHistogramResourceName>(builder.GetSourcePosition()));
+ builder.Implementation(new THistogram_Serialize<THistogramType, ResourceName>(builder.GetSourcePosition()));
}
return true;
} else {
@@ -505,17 +264,35 @@ namespace {
}
}
- template <>
- TUnboxedValue THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::Run(
+private:
+ TSourcePosition Pos_;
+};
+
+template <typename THistogramType, const char* ResourceName>
+class THistogram_Deserialize: public TBoxedValue {
+public:
+ THistogram_Deserialize(TSourcePosition pos)
+ : Pos_(pos)
+ {
+ }
+
+ typedef TBoxedResource<THistogramType, ResourceName> THistogramResource;
+
+ static const TStringRef& Name() {
+ static auto name = TString(ResourceName).substr(10) + "Histogram_Deserialize";
+ static auto nameRef = TStringRef(name);
+ return nameRef;
+ }
+
+private:
+ TUnboxedValue Run(
const IValueBuilder* valueBuilder,
- const TUnboxedValuePod* args) const {
- using THistogramResource = THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::THistogramResource;
+ const TUnboxedValuePod* args) const override {
try {
Y_UNUSED(valueBuilder);
THistogram proto;
Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef()));
- THolder<THistogramResource> histogram(
- new THistogramResource(args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>()));
+ THolder<THistogramResource> histogram(new THistogramResource(args[1].Get<ui32>()));
histogram->Get()->FromProto(proto);
return TUnboxedValuePod(histogram.Release());
} catch (const std::exception& e) {
@@ -523,17 +300,17 @@ namespace {
}
}
- template <>
- bool THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::DeclareSignature(
+public:
+ static bool DeclareSignature(
const TStringRef& name,
TType* userType,
IFunctionTypeInfoBuilder& builder,
bool typesOnly) {
Y_UNUSED(userType);
if (Name() == name) {
- builder.SimpleSignature<TResource<LinearHistogramResourceName>(char*, double, double, double)>();
+ builder.SimpleSignature<TResource<ResourceName>(char*, ui32)>();
if (!typesOnly) {
- builder.Implementation(new THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>(builder.GetSourcePosition()));
+ builder.Implementation(new THistogram_Deserialize<THistogramType, ResourceName>(builder.GetSourcePosition()));
}
return true;
} else {
@@ -541,33 +318,52 @@ namespace {
}
}
- template <>
- TUnboxedValue THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::Run(
+private:
+ TSourcePosition Pos_;
+};
+
+template <typename THistogramType, const char* ResourceName>
+class THistogram_Merge: public TBoxedValue {
+public:
+ THistogram_Merge(TSourcePosition pos)
+ : Pos_(pos)
+ {
+ }
+
+ typedef TBoxedResource<THistogramType, ResourceName> THistogramResource;
+
+ static const TStringRef& Name() {
+ static auto name = TString(ResourceName).substr(10) + "Histogram_Merge";
+ static auto nameRef = TStringRef(name);
+ return nameRef;
+ }
+
+private:
+ TUnboxedValue Run(
const IValueBuilder* valueBuilder,
- const TUnboxedValuePod* args) const {
- using THistogramResource = THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::THistogramResource;
+ const TUnboxedValuePod* args) const override {
try {
Y_UNUSED(valueBuilder);
- THolder<THistogramResource> histogram(new THistogramResource(
- args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>()));
- histogram->Get()->Add(args[0].Get<double>(), 1.0);
- return TUnboxedValuePod(histogram.Release());
+ THistogram proto;
+ static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get()->ToProto(proto);
+ static_cast<THistogramResource*>(args[1].AsBoxed().Get())->Get()->Merge(proto, 1.0);
+ return TUnboxedValuePod(args[1]);
} catch (const std::exception& e) {
UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str());
}
}
- template <>
- bool THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::DeclareSignature(
+public:
+ static bool DeclareSignature(
const TStringRef& name,
TType* userType,
IFunctionTypeInfoBuilder& builder,
bool typesOnly) {
Y_UNUSED(userType);
if (Name() == name) {
- builder.SimpleSignature<TResource<LogarithmicHistogramResourceName>(double, double, double, double)>();
+ builder.SimpleSignature<TResource<ResourceName>(TResource<ResourceName>, TResource<ResourceName>)>();
if (!typesOnly) {
- builder.Implementation(new THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>(builder.GetSourcePosition()));
+ builder.Implementation(new THistogram_Merge<THistogramType, ResourceName>(builder.GetSourcePosition()));
}
return true;
} else {
@@ -575,35 +371,99 @@ namespace {
}
}
- template <>
- TUnboxedValue THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::Run(
+private:
+ TSourcePosition Pos_;
+};
+
+struct THistogramIndexes {
+ static constexpr ui32 BinFieldsCount = 2U;
+ static constexpr ui32 ResultFieldsCount = 5U;
+
+ THistogramIndexes(IFunctionTypeInfoBuilder& builder) {
+ const auto binStructType = builder.Struct(BinFieldsCount)->AddField<double>("Position", &Position).AddField<double>("Frequency", &Frequency).Build();
+ const auto binsList = builder.List()->Item(binStructType).Build();
+ ResultStructType = builder.Struct(ResultFieldsCount)->AddField<char*>("Kind", &Kind).AddField<double>("Min", &Min).AddField<double>("Max", &Max).AddField<double>("WeightsSum", &WeightsSum).AddField("Bins", binsList, &Bins).Build();
+ }
+
+ ui32 Kind;
+ ui32 Min;
+ ui32 Max;
+ ui32 WeightsSum;
+ ui32 Bins;
+
+ ui32 Position;
+ ui32 Frequency;
+
+ TType* ResultStructType;
+};
+
+template <typename THistogramType, const char* ResourceName>
+class THistogram_GetResult: public TBoxedValue {
+public:
+ typedef TBoxedResource<THistogramType, ResourceName> THistogramResource;
+
+ THistogram_GetResult(const THistogramIndexes& histogramIndexes, TSourcePosition pos)
+ : HistogramIndexes_(histogramIndexes)
+ , Pos_(pos)
+ {
+ }
+
+ static const TStringRef& Name() {
+ static auto name = TString(ResourceName).substr(10) + "Histogram_GetResult";
+ static auto nameRef = TStringRef(name);
+ return nameRef;
+ }
+
+private:
+ TUnboxedValue Run(
const IValueBuilder* valueBuilder,
- const TUnboxedValuePod* args) const {
- using THistogramResource = THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::THistogramResource;
- try {
- Y_UNUSED(valueBuilder);
- THistogram proto;
- Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef()));
- THolder<THistogramResource> histogram(
- new THistogramResource(args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>()));
- histogram->Get()->FromProto(proto);
- return TUnboxedValuePod(histogram.Release());
- } catch (const std::exception& e) {
- UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str());
+ const TUnboxedValuePod* args) const override {
+ THistogram proto;
+ auto histogram = static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get();
+ histogram->ToProto(proto);
+
+ auto size = proto.FreqSize();
+ TUnboxedValue* fields = nullptr;
+ auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields);
+ fields[HistogramIndexes_.Kind] = valueBuilder->NewString(TStringBuf(ResourceName).Skip(10));
+ if (size) {
+ TUnboxedValue* items = nullptr;
+ fields[HistogramIndexes_.Bins] = valueBuilder->NewArray(size, items);
+ fields[HistogramIndexes_.Min] = TUnboxedValuePod(static_cast<double>(histogram->GetMinValue()));
+ fields[HistogramIndexes_.Max] = TUnboxedValuePod(static_cast<double>(histogram->GetMaxValue()));
+ fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(static_cast<double>(histogram->GetSum()));
+ for (ui64 i = 0; i < size; ++i) {
+ TUnboxedValue* binFields = nullptr;
+ *items++ = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields);
+ binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(static_cast<double>(proto.GetFreq(i)));
+ binFields[HistogramIndexes_.Position] = TUnboxedValuePod(static_cast<double>(proto.GetPosition(i)));
+ }
+ } else {
+ fields[HistogramIndexes_.Bins] = valueBuilder->NewEmptyList();
+ fields[HistogramIndexes_.Min] = TUnboxedValuePod(0.0);
+ fields[HistogramIndexes_.Max] = TUnboxedValuePod(0.0);
+ fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(0.0);
}
+
+ return result;
}
- template <>
- bool THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::DeclareSignature(
+public:
+ static bool DeclareSignature(
const TStringRef& name,
TType* userType,
IFunctionTypeInfoBuilder& builder,
bool typesOnly) {
Y_UNUSED(userType);
if (Name() == name) {
- builder.SimpleSignature<TResource<LogarithmicHistogramResourceName>(char*, double, double, double)>();
+ auto resource = builder.Resource(TStringRef(ResourceName, std::strlen(ResourceName)));
+
+ THistogramIndexes histogramIndexes(builder);
+
+ builder.Args()->Add(resource).Done().Returns(histogramIndexes.ResultStructType);
+
if (!typesOnly) {
- builder.Implementation(new THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>(builder.GetSourcePosition()));
+ builder.Implementation(new THistogram_GetResult<THistogramType, ResourceName>(histogramIndexes, builder.GetSourcePosition()));
}
return true;
} else {
@@ -611,352 +471,499 @@ namespace {
}
}
- class THistogramPrint: public TBoxedValue {
- public:
- THistogramPrint(const THistogramIndexes& histogramIndexes)
- : HistogramIndexes_(histogramIndexes)
- {
+private:
+ const THistogramIndexes HistogramIndexes_;
+ TSourcePosition Pos_;
+};
+
+template <>
+TUnboxedValue THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const {
+ using THistogramResource = THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::THistogramResource;
+ try {
+ Y_UNUSED(valueBuilder);
+ THolder<THistogramResource> histogram(new THistogramResource(
+ args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>()));
+ histogram->Get()->Add(args[0].Get<double>(), 1.0);
+ return TUnboxedValuePod(histogram.Release());
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str());
+ }
+}
+
+template <>
+bool THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ builder.SimpleSignature<TResource<LinearHistogramResourceName>(double, double, double, double)>();
+ if (!typesOnly) {
+ builder.Implementation(new THistogram_Create<TLinearHistogram, LinearHistogramResourceName>(builder.GetSourcePosition()));
+ }
+ return true;
+ } else {
+ return false;
+ }
+}
+
+template <>
+TUnboxedValue THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const {
+ using THistogramResource = THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::THistogramResource;
+ try {
+ Y_UNUSED(valueBuilder);
+ THistogram proto;
+ Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef()));
+ THolder<THistogramResource> histogram(
+ new THistogramResource(args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>()));
+ histogram->Get()->FromProto(proto);
+ return TUnboxedValuePod(histogram.Release());
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str());
+ }
+}
+
+template <>
+bool THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ builder.SimpleSignature<TResource<LinearHistogramResourceName>(char*, double, double, double)>();
+ if (!typesOnly) {
+ builder.Implementation(new THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>(builder.GetSourcePosition()));
}
+ return true;
+ } else {
+ return false;
+ }
+}
- static const TStringRef& Name() {
- static auto name = TStringRef::Of("Print");
- return name;
+template <>
+TUnboxedValue THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const {
+ using THistogramResource = THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::THistogramResource;
+ try {
+ Y_UNUSED(valueBuilder);
+ THolder<THistogramResource> histogram(new THistogramResource(
+ args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>()));
+ histogram->Get()->Add(args[0].Get<double>(), 1.0);
+ return TUnboxedValuePod(histogram.Release());
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str());
+ }
+}
+
+template <>
+bool THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ builder.SimpleSignature<TResource<LogarithmicHistogramResourceName>(double, double, double, double)>();
+ if (!typesOnly) {
+ builder.Implementation(new THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>(builder.GetSourcePosition()));
}
+ return true;
+ } else {
+ return false;
+ }
+}
- TUnboxedValue Run(
- const IValueBuilder* valueBuilder,
- const TUnboxedValuePod* args) const override {
- auto kind = args[0].GetElement(HistogramIndexes_.Kind);
- auto bins = args[0].GetElement(HistogramIndexes_.Bins);
- double min = args[0].GetElement(HistogramIndexes_.Min).Get<double>();
- double max = args[0].GetElement(HistogramIndexes_.Max).Get<double>();
- double weightsSum = args[0].GetElement(HistogramIndexes_.WeightsSum).Get<double>();
- auto binsIterator = bins.GetListIterator();
+template <>
+TUnboxedValue THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const {
+ using THistogramResource = THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::THistogramResource;
+ try {
+ Y_UNUSED(valueBuilder);
+ THistogram proto;
+ Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef()));
+ THolder<THistogramResource> histogram(
+ new THistogramResource(args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>()));
+ histogram->Get()->FromProto(proto);
+ return TUnboxedValuePod(histogram.Release());
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str());
+ }
+}
- TStringBuilder result;
- result << "Kind: " << (TStringBuf)kind.AsStringRef() << ' ';
- result << Sprintf("Bins: %" PRIu64 " WeightsSum: %.3f Min: %.3f Max: %.3f",
- bins.GetListLength(), weightsSum, min, max);
- double maxFrequency = 0.0;
- size_t maxPositionLength = 0;
- size_t maxFrequencyLength = 0;
- const ui8 bars = args[1].GetOrDefault<ui8>(25);
+template <>
+bool THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ builder.SimpleSignature<TResource<LogarithmicHistogramResourceName>(char*, double, double, double)>();
+ if (!typesOnly) {
+ builder.Implementation(new THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>(builder.GetSourcePosition()));
+ }
+ return true;
+ } else {
+ return false;
+ }
+}
- for (TUnboxedValue current; binsIterator.Next(current);) {
- if (bars) {
- double frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>();
- if (frequency > maxFrequency) {
- maxFrequency = frequency;
- }
- }
- size_t positionLength = Sprintf("%.3f", current.GetElement(HistogramIndexes_.Position).Get<double>()).length();
- size_t frequencyLength = Sprintf("%.3f", current.GetElement(HistogramIndexes_.Frequency).Get<double>()).length();
+class THistogramPrint: public TBoxedValue {
+public:
+ THistogramPrint(const THistogramIndexes& histogramIndexes)
+ : HistogramIndexes_(histogramIndexes)
+ {
+ }
- if (positionLength > maxPositionLength) {
- maxPositionLength = positionLength;
- }
- if (frequencyLength > maxFrequencyLength) {
- maxFrequencyLength = frequencyLength;
- }
- }
+ static const TStringRef& Name() {
+ static auto name = TStringRef::Of("Print");
+ return name;
+ }
- binsIterator = bins.GetListIterator();
- for (TUnboxedValue current; binsIterator.Next(current);) {
- double position = current.GetElement(HistogramIndexes_.Position).Get<double>();
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ auto kind = args[0].GetElement(HistogramIndexes_.Kind);
+ auto bins = args[0].GetElement(HistogramIndexes_.Bins);
+ double min = args[0].GetElement(HistogramIndexes_.Min).Get<double>();
+ double max = args[0].GetElement(HistogramIndexes_.Max).Get<double>();
+ double weightsSum = args[0].GetElement(HistogramIndexes_.WeightsSum).Get<double>();
+ auto binsIterator = bins.GetListIterator();
+
+ TStringBuilder result;
+ result << "Kind: " << (TStringBuf)kind.AsStringRef() << ' ';
+ result << Sprintf("Bins: %" PRIu64 " WeightsSum: %.3f Min: %.3f Max: %.3f",
+ bins.GetListLength(), weightsSum, min, max);
+ double maxFrequency = 0.0;
+ size_t maxPositionLength = 0;
+ size_t maxFrequencyLength = 0;
+ const ui8 bars = args[1].GetOrDefault<ui8>(25);
+
+ for (TUnboxedValue current; binsIterator.Next(current);) {
+ if (bars) {
double frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>();
- result << "\n";
- if (bars && maxFrequency > 0) {
- ui8 filledBars = static_cast<ui8>(bars * frequency / maxFrequency);
- for (ui8 i = 0; i < bars; ++i) {
- if (i < filledBars) {
- result << "█";
- } else {
- result << "░";
- }
- }
+ if (frequency > maxFrequency) {
+ maxFrequency = frequency;
}
- result << " P: " << LeftPad(Sprintf("%.3f", position), maxPositionLength);
- result << " F: " << LeftPad(Sprintf("%.3f", frequency), maxFrequencyLength);
}
+ size_t positionLength = Sprintf("%.3f", current.GetElement(HistogramIndexes_.Position).Get<double>()).length();
+ size_t frequencyLength = Sprintf("%.3f", current.GetElement(HistogramIndexes_.Frequency).Get<double>()).length();
- return valueBuilder->NewString(result);
+ if (positionLength > maxPositionLength) {
+ maxPositionLength = positionLength;
+ }
+ if (frequencyLength > maxFrequencyLength) {
+ maxFrequencyLength = frequencyLength;
+ }
}
- static bool DeclareSignature(
- const TStringRef& name,
- TType* userType,
- IFunctionTypeInfoBuilder& builder,
- bool typesOnly) {
- Y_UNUSED(userType);
- if (Name() == name) {
- THistogramIndexes histogramIndexes(builder);
- auto optionalUi8 = builder.Optional()->Item<ui8>().Build();
-
- builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalUi8).Done().OptionalArgs(1).Returns<char*>();
-
- if (!typesOnly) {
- builder.Implementation(new THistogramPrint(histogramIndexes));
+ binsIterator = bins.GetListIterator();
+ for (TUnboxedValue current; binsIterator.Next(current);) {
+ double position = current.GetElement(HistogramIndexes_.Position).Get<double>();
+ double frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>();
+ result << "\n";
+ if (bars && maxFrequency > 0) {
+ ui8 filledBars = static_cast<ui8>(bars * frequency / maxFrequency);
+ for (ui8 i = 0; i < bars; ++i) {
+ if (i < filledBars) {
+ result << "█";
+ } else {
+ result << "░";
+ }
}
- builder.IsStrict();
- return true;
- } else {
- return false;
}
+ result << " P: " << LeftPad(Sprintf("%.3f", position), maxPositionLength);
+ result << " F: " << LeftPad(Sprintf("%.3f", frequency), maxFrequencyLength);
}
- private:
- const THistogramIndexes HistogramIndexes_;
- };
+ return valueBuilder->NewString(result);
+ }
- class THistogramToCumulativeDistributionFunction: public TBoxedValue {
- public:
- THistogramToCumulativeDistributionFunction(const THistogramIndexes& histogramIndexes)
- : HistogramIndexes_(histogramIndexes)
- {
- }
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ THistogramIndexes histogramIndexes(builder);
+ auto optionalUi8 = builder.Optional()->Item<ui8>().Build();
- static const TStringRef& Name() {
- static auto name = TStringRef::Of("ToCumulativeDistributionFunction");
- return name;
- }
+ builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalUi8).Done().OptionalArgs(1).Returns<char*>();
- TUnboxedValue Run(
- const IValueBuilder* valueBuilder,
- const TUnboxedValuePod* args) const override {
- TUnboxedValue* fields = nullptr;
- auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields);
- auto bins = args[0].GetElement(HistogramIndexes_.Bins);
- double minValue = args[0].GetElement(HistogramIndexes_.Min).Get<double>();
- double maxValue = args[0].GetElement(HistogramIndexes_.Max).Get<double>();
- double sum = 0.0;
- double weightsSum = 0.0;
- std::vector<TUnboxedValue> resultBins;
- if (bins.HasFastListLength())
- resultBins.reserve(bins.GetListLength());
- const auto binsIterator = bins.GetListIterator();
- for (TUnboxedValue current; binsIterator.Next(current);) {
- TUnboxedValue* binFields = nullptr;
- auto resultCurrent = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields);
- const auto frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>();
- sum += frequency;
- weightsSum += sum;
- binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(sum);
- binFields[HistogramIndexes_.Position] = current.GetElement(HistogramIndexes_.Position);
- resultBins.emplace_back(std::move(resultCurrent));
+ if (!typesOnly) {
+ builder.Implementation(new THistogramPrint(histogramIndexes));
}
-
- auto kind = args[0].GetElement(HistogramIndexes_.Kind);
- fields[HistogramIndexes_.Kind] = valueBuilder->AppendString(kind, "Cdf");
- fields[HistogramIndexes_.Bins] = valueBuilder->NewList(resultBins.data(), resultBins.size());
- fields[HistogramIndexes_.Max] = TUnboxedValuePod(maxValue);
- fields[HistogramIndexes_.Min] = TUnboxedValuePod(minValue);
- fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(weightsSum);
- return result;
+ builder.IsStrict();
+ return true;
+ } else {
+ return false;
}
+ }
- static bool DeclareSignature(
- const TStringRef& name,
- TType* userType,
- IFunctionTypeInfoBuilder& builder,
- bool typesOnly) {
- Y_UNUSED(userType);
- if (Name() == name) {
- THistogramIndexes histogramIndexes(builder);
+private:
+ const THistogramIndexes HistogramIndexes_;
+};
- builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Done().Returns(histogramIndexes.ResultStructType);
+class THistogramToCumulativeDistributionFunction: public TBoxedValue {
+public:
+ THistogramToCumulativeDistributionFunction(const THistogramIndexes& histogramIndexes)
+ : HistogramIndexes_(histogramIndexes)
+ {
+ }
- if (!typesOnly) {
- builder.Implementation(new THistogramToCumulativeDistributionFunction(histogramIndexes));
- }
- builder.IsStrict();
- return true;
- } else {
- return false;
- }
+ static const TStringRef& Name() {
+ static auto name = TStringRef::Of("ToCumulativeDistributionFunction");
+ return name;
+ }
+
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ TUnboxedValue* fields = nullptr;
+ auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields);
+ auto bins = args[0].GetElement(HistogramIndexes_.Bins);
+ double minValue = args[0].GetElement(HistogramIndexes_.Min).Get<double>();
+ double maxValue = args[0].GetElement(HistogramIndexes_.Max).Get<double>();
+ double sum = 0.0;
+ double weightsSum = 0.0;
+ std::vector<TUnboxedValue> resultBins;
+ if (bins.HasFastListLength()) {
+ resultBins.reserve(bins.GetListLength());
+ }
+ const auto binsIterator = bins.GetListIterator();
+ for (TUnboxedValue current; binsIterator.Next(current);) {
+ TUnboxedValue* binFields = nullptr;
+ auto resultCurrent = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields);
+ const auto frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>();
+ sum += frequency;
+ weightsSum += sum;
+ binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(sum);
+ binFields[HistogramIndexes_.Position] = current.GetElement(HistogramIndexes_.Position);
+ resultBins.emplace_back(std::move(resultCurrent));
}
- private:
- const THistogramIndexes HistogramIndexes_;
- };
+ auto kind = args[0].GetElement(HistogramIndexes_.Kind);
+ fields[HistogramIndexes_.Kind] = valueBuilder->AppendString(kind, "Cdf");
+ fields[HistogramIndexes_.Bins] = valueBuilder->NewList(resultBins.data(), resultBins.size());
+ fields[HistogramIndexes_.Max] = TUnboxedValuePod(maxValue);
+ fields[HistogramIndexes_.Min] = TUnboxedValuePod(minValue);
+ fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(weightsSum);
+ return result;
+ }
- class THistogramNormalize: public TBoxedValue {
- public:
- THistogramNormalize(const THistogramIndexes& histogramIndexes)
- : HistogramIndexes_(histogramIndexes)
- {
- }
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ THistogramIndexes histogramIndexes(builder);
- static const TStringRef& Name() {
- static auto name = TStringRef::Of("Normalize");
- return name;
- }
+ builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Done().Returns(histogramIndexes.ResultStructType);
- TUnboxedValue Run(
- const IValueBuilder* valueBuilder,
- const TUnboxedValuePod* args) const override {
- TUnboxedValue* fields = nullptr;
- auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields);
- auto bins = args[0].GetElement(HistogramIndexes_.Bins);
- double minValue = args[0].GetElement(HistogramIndexes_.Min).Get<double>();
- double maxValue = args[0].GetElement(HistogramIndexes_.Max).Get<double>();
- double area = args[1].GetOrDefault<double>(100.0);
- bool cdfNormalization = args[2].GetOrDefault<bool>(false);
- double sum = 0.0;
- double weightsSum = 0.0;
- double lastBinFrequency = 0.0;
- std::vector<TUnboxedValue> resultBins;
- if (bins.HasFastListLength())
- resultBins.reserve(bins.GetListLength());
- auto binsIterator = bins.GetListIterator();
- for (TUnboxedValue current; binsIterator.Next(current);) {
- sum += current.GetElement(HistogramIndexes_.Frequency).Get<double>();
- lastBinFrequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>();
- }
- binsIterator = bins.GetListIterator();
- for (TUnboxedValue current; binsIterator.Next(current);) {
- TUnboxedValue* binFields = nullptr;
- auto resultCurrent = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields);
- double frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>();
- if (cdfNormalization) {
- frequency = area * frequency / lastBinFrequency;
- } else {
- frequency = area * frequency / sum;
- }
- weightsSum += frequency;
- binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(frequency);
- binFields[HistogramIndexes_.Position] = current.GetElement(HistogramIndexes_.Position);
- resultBins.emplace_back(std::move(resultCurrent));
+ if (!typesOnly) {
+ builder.Implementation(new THistogramToCumulativeDistributionFunction(histogramIndexes));
}
+ builder.IsStrict();
+ return true;
+ } else {
+ return false;
+ }
+ }
- TUnboxedValue kind = args[0].GetElement(HistogramIndexes_.Kind);
- if (cdfNormalization) {
- kind = valueBuilder->AppendString(kind, "Cdf");
- }
+private:
+ const THistogramIndexes HistogramIndexes_;
+};
- fields[HistogramIndexes_.Kind] = kind;
- fields[HistogramIndexes_.Bins] = valueBuilder->NewList(resultBins.data(), resultBins.size());
- fields[HistogramIndexes_.Max] = TUnboxedValuePod(maxValue);
- fields[HistogramIndexes_.Min] = TUnboxedValuePod(minValue);
- fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(weightsSum);
- return result;
- }
-
- static bool DeclareSignature(
- const TStringRef& name,
- TType* userType,
- IFunctionTypeInfoBuilder& builder,
- bool typesOnly) {
- Y_UNUSED(userType);
- if (Name() == name) {
- THistogramIndexes histogramIndexes(builder);
- auto optionalDouble = builder.Optional()->Item<double>().Build();
- auto optionalCdfNormalization = builder.Optional()->Item<bool>().Build();
- builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalDouble).Add(optionalCdfNormalization).Done().Returns(histogramIndexes.ResultStructType);
- builder.OptionalArgs(1);
- builder.OptionalArgs(2);
- if (!typesOnly) {
- builder.Implementation(new THistogramNormalize(histogramIndexes));
- }
- builder.IsStrict();
- return true;
+class THistogramNormalize: public TBoxedValue {
+public:
+ THistogramNormalize(const THistogramIndexes& histogramIndexes)
+ : HistogramIndexes_(histogramIndexes)
+ {
+ }
+
+ static const TStringRef& Name() {
+ static auto name = TStringRef::Of("Normalize");
+ return name;
+ }
+
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override {
+ TUnboxedValue* fields = nullptr;
+ auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields);
+ auto bins = args[0].GetElement(HistogramIndexes_.Bins);
+ double minValue = args[0].GetElement(HistogramIndexes_.Min).Get<double>();
+ double maxValue = args[0].GetElement(HistogramIndexes_.Max).Get<double>();
+ double area = args[1].GetOrDefault<double>(100.0);
+ bool cdfNormalization = args[2].GetOrDefault<bool>(false);
+ double sum = 0.0;
+ double weightsSum = 0.0;
+ double lastBinFrequency = 0.0;
+ std::vector<TUnboxedValue> resultBins;
+ if (bins.HasFastListLength()) {
+ resultBins.reserve(bins.GetListLength());
+ }
+ auto binsIterator = bins.GetListIterator();
+ for (TUnboxedValue current; binsIterator.Next(current);) {
+ sum += current.GetElement(HistogramIndexes_.Frequency).Get<double>();
+ lastBinFrequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>();
+ }
+ binsIterator = bins.GetListIterator();
+ for (TUnboxedValue current; binsIterator.Next(current);) {
+ TUnboxedValue* binFields = nullptr;
+ auto resultCurrent = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields);
+ double frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>();
+ if (cdfNormalization) {
+ frequency = area * frequency / lastBinFrequency;
} else {
- return false;
+ frequency = area * frequency / sum;
}
+ weightsSum += frequency;
+ binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(frequency);
+ binFields[HistogramIndexes_.Position] = current.GetElement(HistogramIndexes_.Position);
+ resultBins.emplace_back(std::move(resultCurrent));
}
- private:
- const THistogramIndexes HistogramIndexes_;
- };
+ TUnboxedValue kind = args[0].GetElement(HistogramIndexes_.Kind);
+ if (cdfNormalization) {
+ kind = valueBuilder->AppendString(kind, "Cdf");
+ }
- template <bool twoArgs>
- class THistogramMethodBase: public TBoxedValue {
- public:
- THistogramMethodBase(const THistogramIndexes& histogramIndexes, TSourcePosition pos)
- : HistogramIndexes_(histogramIndexes)
- , Pos_(pos)
- {
- }
-
- virtual TUnboxedValue GetResult(
- const THistogram& input,
- const TUnboxedValuePod* args) const = 0;
-
- TUnboxedValue Run(
- const IValueBuilder*,
- const TUnboxedValuePod* args) const override {
- try {
- auto bins = args[0].GetElement(HistogramIndexes_.Bins);
- double min = args[0].GetElement(HistogramIndexes_.Min).template Get<double>();
- double max = args[0].GetElement(HistogramIndexes_.Max).template Get<double>();
- auto binsIterator = bins.GetListIterator();
-
- THistogram histogram;
- histogram.SetType(HT_ADAPTIVE_HISTOGRAM);
- histogram.SetMinValue(min);
- histogram.SetMaxValue(max);
- for (TUnboxedValue current; binsIterator.Next(current);) {
- double frequency = current.GetElement(HistogramIndexes_.Frequency).template Get<double>();
- double position = current.GetElement(HistogramIndexes_.Position).template Get<double>();
- histogram.AddFreq(frequency);
- histogram.AddPosition(position);
- }
+ fields[HistogramIndexes_.Kind] = kind;
+ fields[HistogramIndexes_.Bins] = valueBuilder->NewList(resultBins.data(), resultBins.size());
+ fields[HistogramIndexes_.Max] = TUnboxedValuePod(maxValue);
+ fields[HistogramIndexes_.Min] = TUnboxedValuePod(minValue);
+ fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(weightsSum);
+ return result;
+ }
- return GetResult(histogram, args);
- } catch (const std::exception& e) {
- UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str());
+ static bool DeclareSignature(
+ const TStringRef& name,
+ TType* userType,
+ IFunctionTypeInfoBuilder& builder,
+ bool typesOnly) {
+ Y_UNUSED(userType);
+ if (Name() == name) {
+ THistogramIndexes histogramIndexes(builder);
+ auto optionalDouble = builder.Optional()->Item<double>().Build();
+ auto optionalCdfNormalization = builder.Optional()->Item<bool>().Build();
+ builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalDouble).Add(optionalCdfNormalization).Done().Returns(histogramIndexes.ResultStructType);
+ builder.OptionalArgs(1);
+ builder.OptionalArgs(2);
+ if (!typesOnly) {
+ builder.Implementation(new THistogramNormalize(histogramIndexes));
}
+ builder.IsStrict();
+ return true;
+ } else {
+ return false;
}
+ }
- static THistogramIndexes DeclareSignatureBase(IFunctionTypeInfoBuilder& builder) {
- THistogramIndexes histogramIndexes(builder);
+private:
+ const THistogramIndexes HistogramIndexes_;
+};
+
+template <bool twoArgs>
+class THistogramMethodBase: public TBoxedValue {
+public:
+ THistogramMethodBase(const THistogramIndexes& histogramIndexes, TSourcePosition pos)
+ : HistogramIndexes_(histogramIndexes)
+ , Pos_(pos)
+ {
+ }
- if (twoArgs) {
- builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add<double>().Add<double>().Done().Returns<double>();
- } else {
- builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add<double>().Done().Returns<double>();
+ virtual TUnboxedValue GetResult(
+ const THistogram& input,
+ const TUnboxedValuePod* args) const = 0;
+
+ TUnboxedValue Run(
+ const IValueBuilder*,
+ const TUnboxedValuePod* args) const override {
+ try {
+ auto bins = args[0].GetElement(HistogramIndexes_.Bins);
+ double min = args[0].GetElement(HistogramIndexes_.Min).template Get<double>();
+ double max = args[0].GetElement(HistogramIndexes_.Max).template Get<double>();
+ auto binsIterator = bins.GetListIterator();
+
+ THistogram histogram;
+ histogram.SetType(HT_ADAPTIVE_HISTOGRAM);
+ histogram.SetMinValue(min);
+ histogram.SetMaxValue(max);
+ for (TUnboxedValue current; binsIterator.Next(current);) {
+ double frequency = current.GetElement(HistogramIndexes_.Frequency).template Get<double>();
+ double position = current.GetElement(HistogramIndexes_.Position).template Get<double>();
+ histogram.AddFreq(frequency);
+ histogram.AddPosition(position);
}
- return histogramIndexes;
+
+ return GetResult(histogram, args);
+ } catch (const std::exception& e) {
+ UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str());
}
+ }
- protected:
- const THistogramIndexes HistogramIndexes_;
- TSourcePosition Pos_;
- };
+ static THistogramIndexes DeclareSignatureBase(IFunctionTypeInfoBuilder& builder) {
+ THistogramIndexes histogramIndexes(builder);
+
+ if (twoArgs) {
+ builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add<double>().Add<double>().Done().Returns<double>();
+ } else {
+ builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add<double>().Done().Returns<double>();
+ }
+ return histogramIndexes;
+ }
-#define DECLARE_ONE_DOUBLE_ARG_METHOD_UDF(name) \
- class T##name: public THistogramMethodBase<false> { \
- public: \
- T##name(const THistogramIndexes& histogramIndexes, TSourcePosition pos) \
- : THistogramMethodBase<false>(histogramIndexes, pos) { \
- } \
- static const TStringRef& Name() { \
- static auto name = TStringRef::Of(#name); \
- return name; \
- } \
- static bool DeclareSignature( \
- const TStringRef& name, \
- TType* userType, \
- IFunctionTypeInfoBuilder& builder, \
- bool typesOnly) { \
- Y_UNUSED(userType); \
- if (Name() == name) { \
- const auto& histogramIndexes = DeclareSignatureBase(builder); \
- if (!typesOnly) { \
- builder.Implementation(new T##name(histogramIndexes, \
- builder.GetSourcePosition())); \
- } \
- return true; \
- } else { \
- return false; \
- } \
- } \
- TUnboxedValue GetResult( \
- const THistogram& input, \
- const TUnboxedValuePod* args) const override { \
- TAdaptiveWardHistogram histo(input, input.FreqSize()); \
- double result = histo.name(args[1].Get<double>()); \
- return TUnboxedValuePod(result); \
- } \
+protected:
+ const THistogramIndexes HistogramIndexes_;
+ TSourcePosition Pos_;
+};
+
+#define DECLARE_ONE_DOUBLE_ARG_METHOD_UDF(name) \
+ class T##name: public THistogramMethodBase<false> { \
+ public: \
+ T##name(const THistogramIndexes& histogramIndexes, TSourcePosition pos) \
+ : THistogramMethodBase<false>(histogramIndexes, pos) { \
+ } \
+ static const TStringRef& Name() { \
+ static auto name = TStringRef::Of(#name); \
+ return name; \
+ } \
+ static bool DeclareSignature( \
+ const TStringRef& name, \
+ TType* userType, \
+ IFunctionTypeInfoBuilder& builder, \
+ bool typesOnly) { \
+ Y_UNUSED(userType); \
+ if (Name() == name) { \
+ const auto& histogramIndexes = DeclareSignatureBase(builder); \
+ if (!typesOnly) { \
+ builder.Implementation(new T##name(histogramIndexes, \
+ builder.GetSourcePosition())); \
+ } \
+ return true; \
+ } else { \
+ return false; \
+ } \
+ } \
+ TUnboxedValue GetResult( \
+ const THistogram& input, \
+ const TUnboxedValuePod* args) const override { \
+ TAdaptiveWardHistogram histo(input, input.FreqSize()); \
+ double result = histo.name(args[1].Get<double>()); \
+ return TUnboxedValuePod(result); \
+ } \
};
#define DECLARE_TWO_DOUBLE_ARG_METHOD_UDF(name) \
@@ -979,7 +986,7 @@ namespace {
const auto& histogramIndexes = DeclareSignatureBase(builder); \
if (!typesOnly) { \
builder.Implementation(new T##name(histogramIndexes, \
- builder.GetSourcePosition())); \
+ builder.GetSourcePosition())); \
} \
return true; \
} else { \
@@ -1001,18 +1008,18 @@ namespace {
#define DECLARE_HISTOGRAM_UDFS(name) \
HISTOGRAM_FUNCTION_MAP(DECLARE_HISTOGRAM_UDF, name)
- HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(DECLARE_ONE_DOUBLE_ARG_METHOD_UDF)
- HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(DECLARE_TWO_DOUBLE_ARG_METHOD_UDF)
-
- SIMPLE_MODULE(THistogramModule,
- HISTOGRAM_ALGORITHMS_MAP(DECLARE_HISTOGRAM_UDFS)
- HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(REGISTER_METHOD_UDF)
- HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(REGISTER_METHOD_UDF)
- DECLARE_HISTOGRAM_UDFS(Linear)
- DECLARE_HISTOGRAM_UDFS(Logarithmic)
- THistogramPrint,
- THistogramNormalize,
- THistogramToCumulativeDistributionFunction)
-}
+HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(DECLARE_ONE_DOUBLE_ARG_METHOD_UDF)
+HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(DECLARE_TWO_DOUBLE_ARG_METHOD_UDF)
+
+SIMPLE_MODULE(THistogramModule,
+ HISTOGRAM_ALGORITHMS_MAP(DECLARE_HISTOGRAM_UDFS)
+ HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(REGISTER_METHOD_UDF)
+ HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(REGISTER_METHOD_UDF)
+ DECLARE_HISTOGRAM_UDFS(Linear)
+ DECLARE_HISTOGRAM_UDFS(Logarithmic)
+ THistogramPrint,
+ THistogramNormalize,
+ THistogramToCumulativeDistributionFunction)
+} // namespace
REGISTER_MODULES(THistogramModule)