diff options
| author | vvvv <[email protected]> | 2025-10-06 13:26:25 +0300 |
|---|---|---|
| committer | vvvv <[email protected]> | 2025-10-06 14:06:25 +0300 |
| commit | eca8ce9cb1613d5c983185c4e43c20651a9638aa (patch) | |
| tree | 61ee5ae779948e61af9a7691d19eaa2c09869121 /yql/essentials/udfs/common/histogram/histogram_udf.cpp | |
| parent | 4adf7eecae16a9b228b28cc5f64c27ef69ad5ec2 (diff) | |
YQL-20086 udfs
init
commit_hash:f9684778bf1ea956965f2360b80b91edb7d4ffbe
Diffstat (limited to 'yql/essentials/udfs/common/histogram/histogram_udf.cpp')
| -rw-r--r-- | yql/essentials/udfs/common/histogram/histogram_udf.cpp | 1529 |
1 files changed, 768 insertions, 761 deletions
diff --git a/yql/essentials/udfs/common/histogram/histogram_udf.cpp b/yql/essentials/udfs/common/histogram/histogram_udf.cpp index 731b5956ed8..283e243396b 100644 --- a/yql/essentials/udfs/common/histogram/histogram_udf.cpp +++ b/yql/essentials/udfs/common/histogram/histogram_udf.cpp @@ -43,461 +43,220 @@ namespace { XX(Merge, arg) #define DECLARE_HISTOGRAM_RESOURCE_NAME(name) extern const char name##HistogramResourceName[] = "Histogram." #name; - HISTOGRAM_ALGORITHMS_MAP(DECLARE_HISTOGRAM_RESOURCE_NAME) - DECLARE_HISTOGRAM_RESOURCE_NAME(Linear) - DECLARE_HISTOGRAM_RESOURCE_NAME(Logarithmic) - - class TLinearHistogram: public TAdaptiveWardHistogram { - public: - TLinearHistogram(double step, double begin, double end) - : TAdaptiveWardHistogram(1ULL << 24) - , Step_(step) - , Begin_(begin) - , End_(end) - { - } - - void Add(double value, double weight) override { - if (value < Begin_) { - value = Begin_; - } else if (value > End_) { - value = End_; - } else { - value = std::floor(value / Step_ + 0.5) * Step_; - } - TAdaptiveWardHistogram::Add(value, weight); - } - - void Add(const THistoRec&) override { - Y_ABORT("Not implemented"); - } - - protected: - double Step_; - double Begin_; - double End_; - }; - - class TLogarithmicHistogram: public TLinearHistogram { - public: - TLogarithmicHistogram(double step, double begin, double end) - : TLinearHistogram(step, begin, end) - { - } - - void Add(double value, double weight) override { - double base = std::log(value) / std::log(Step_); - double prev = std::pow(Step_, std::floor(base)); - double next = std::pow(Step_, std::ceil(base)); - if (std::abs(value - next) > std::abs(value - prev)) { - value = prev; - } else { - value = next; - } - - if (value < Begin_) { - value = Begin_; - } else if (value > End_) { - value = End_; - } - - if (!std::isnan(value)) { - TAdaptiveWardHistogram::Add(value, weight); - } - } - - void Add(const THistoRec&) override { - Y_ABORT("Not implemented"); - } - }; - - template <typename THistogramType, const char* ResourceName> - class THistogram_Create: public TBoxedValue { - public: - THistogram_Create(TSourcePosition pos) - : Pos_(pos) - {} - - typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; - - static const TStringRef& Name() { - static auto name = TString(ResourceName).substr(10) + "Histogram_Create"; - static auto nameRef = TStringRef(name); - return nameRef; - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - try { - Y_UNUSED(valueBuilder); - THolder<THistogramResource> histogram(new THistogramResource(args[2].Get<ui32>())); - histogram->Get()->Add(args[0].Get<double>(), args[1].Get<double>()); - return TUnboxedValuePod(histogram.Release()); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } - } +HISTOGRAM_ALGORITHMS_MAP(DECLARE_HISTOGRAM_RESOURCE_NAME) +DECLARE_HISTOGRAM_RESOURCE_NAME(Linear) +DECLARE_HISTOGRAM_RESOURCE_NAME(Logarithmic) + +class TLinearHistogram: public TAdaptiveWardHistogram { +public: + TLinearHistogram(double step, double begin, double end) + : TAdaptiveWardHistogram(1ULL << 24) + , Step_(step) + , Begin_(begin) + , End_(end) + { + } - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<TResource<ResourceName>(double, double, ui32)>(); - if (!typesOnly) { - builder.Implementation(new THistogram_Create<THistogramType, ResourceName>(builder.GetSourcePosition())); - } - return true; - } else { - return false; - } + void Add(double value, double weight) override { + if (value < Begin_) { + value = Begin_; + } else if (value > End_) { + value = End_; + } else { + value = std::floor(value / Step_ + 0.5) * Step_; } + TAdaptiveWardHistogram::Add(value, weight); + } - private: - TSourcePosition Pos_; - }; + void Add(const THistoRec&) override { + Y_ABORT("Not implemented"); + } - template <typename THistogramType, const char* ResourceName> - class THistogram_AddValue: public TBoxedValue { - public: - THistogram_AddValue(TSourcePosition pos) - : Pos_(pos) - {} - - typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; - - static const TStringRef& Name() { - static auto name = TString(ResourceName).substr(10) + "Histogram_AddValue"; - static auto nameRef = TStringRef(name); - return nameRef; - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - try { - Y_UNUSED(valueBuilder); - THistogramResource* resource = static_cast<THistogramResource*>(args[0].AsBoxed().Get()); - resource->Get()->Add(args[1].Get<double>(), args[2].Get<double>()); - return TUnboxedValuePod(args[0]); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } - } +protected: + double Step_; + double Begin_; + double End_; +}; + +class TLogarithmicHistogram: public TLinearHistogram { +public: + TLogarithmicHistogram(double step, double begin, double end) + : TLinearHistogram(step, begin, end) + { + } - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<TResource<ResourceName>(TResource<ResourceName>, double, double)>(); - if (!typesOnly) { - builder.Implementation(new THistogram_AddValue<THistogramType, ResourceName>(builder.GetSourcePosition())); - } - return true; - } else { - return false; - } + void Add(double value, double weight) override { + double base = std::log(value) / std::log(Step_); + double prev = std::pow(Step_, std::floor(base)); + double next = std::pow(Step_, std::ceil(base)); + if (std::abs(value - next) > std::abs(value - prev)) { + value = prev; + } else { + value = next; } - private: - TSourcePosition Pos_; - }; - - template <typename THistogramType, const char* ResourceName> - class THistogram_Serialize: public TBoxedValue { - public: - THistogram_Serialize(TSourcePosition pos) - : Pos_(pos) - {} - - typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; - - static const TStringRef& Name() { - static auto name = TString(ResourceName).substr(10) + "Histogram_Serialize"; - static auto nameRef = TStringRef(name); - return nameRef; - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - try { - THistogram proto; - TString result; - static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get()->ToProto(proto); - Y_PROTOBUF_SUPPRESS_NODISCARD proto.SerializeToString(&result); - return valueBuilder->NewString(result); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + if (value < Begin_) { + value = Begin_; + } else if (value > End_) { + value = End_; } - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<char*(TResource<ResourceName>)>(); - if (!typesOnly) { - builder.Implementation(new THistogram_Serialize<THistogramType, ResourceName>(builder.GetSourcePosition())); - } - return true; - } else { - return false; - } + if (!std::isnan(value)) { + TAdaptiveWardHistogram::Add(value, weight); } + } - private: - TSourcePosition Pos_; - }; + void Add(const THistoRec&) override { + Y_ABORT("Not implemented"); + } +}; + +template <typename THistogramType, const char* ResourceName> +class THistogram_Create: public TBoxedValue { +public: + THistogram_Create(TSourcePosition pos) + : Pos_(pos) + { + } - template <typename THistogramType, const char* ResourceName> - class THistogram_Deserialize: public TBoxedValue { - public: - THistogram_Deserialize(TSourcePosition pos) - : Pos_(pos) - {} - - typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; - - static const TStringRef& Name() { - static auto name = TString(ResourceName).substr(10) + "Histogram_Deserialize"; - static auto nameRef = TStringRef(name); - return nameRef; - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - try { - Y_UNUSED(valueBuilder); - THistogram proto; - Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef())); - THolder<THistogramResource> histogram(new THistogramResource(args[1].Get<ui32>())); - histogram->Get()->FromProto(proto); - return TUnboxedValuePod(histogram.Release()); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } - } + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<TResource<ResourceName>(char*, ui32)>(); - if (!typesOnly) { - builder.Implementation(new THistogram_Deserialize<THistogramType, ResourceName>(builder.GetSourcePosition())); - } - return true; - } else { - return false; - } - } - - private: - TSourcePosition Pos_; - }; + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_Create"; + static auto nameRef = TStringRef(name); + return nameRef; + } - template <typename THistogramType, const char* ResourceName> - class THistogram_Merge: public TBoxedValue { - public: - THistogram_Merge(TSourcePosition pos) - : Pos_(pos) - {} - - typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; - - static const TStringRef& Name() { - static auto name = TString(ResourceName).substr(10) + "Histogram_Merge"; - static auto nameRef = TStringRef(name); - return nameRef; - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - try { - Y_UNUSED(valueBuilder); - THistogram proto; - static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get()->ToProto(proto); - static_cast<THistogramResource*>(args[1].AsBoxed().Get())->Get()->Merge(proto, 1.0); - return TUnboxedValuePod(args[1]); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + Y_UNUSED(valueBuilder); + THolder<THistogramResource> histogram(new THistogramResource(args[2].Get<ui32>())); + histogram->Get()->Add(args[0].Get<double>(), args[1].Get<double>()); + return TUnboxedValuePod(histogram.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<TResource<ResourceName>(TResource<ResourceName>, TResource<ResourceName>)>(); - if (!typesOnly) { - builder.Implementation(new THistogram_Merge<THistogramType, ResourceName>(builder.GetSourcePosition())); - } - return true; - } else { - return false; +public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<ResourceName>(double, double, ui32)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_Create<THistogramType, ResourceName>(builder.GetSourcePosition())); } + return true; + } else { + return false; } + } - private: - TSourcePosition Pos_; - }; - - struct THistogramIndexes { - static constexpr ui32 BinFieldsCount = 2U; - static constexpr ui32 ResultFieldsCount = 5U; - - THistogramIndexes(IFunctionTypeInfoBuilder& builder) { - const auto binStructType = builder.Struct(BinFieldsCount)->AddField<double>("Position", &Position).AddField<double>("Frequency", &Frequency).Build(); - const auto binsList = builder.List()->Item(binStructType).Build(); - ResultStructType = builder.Struct(ResultFieldsCount)->AddField<char*>("Kind", &Kind).AddField<double>("Min", &Min).AddField<double>("Max", &Max).AddField<double>("WeightsSum", &WeightsSum).AddField("Bins", binsList, &Bins).Build(); - } - - ui32 Kind; - ui32 Min; - ui32 Max; - ui32 WeightsSum; - ui32 Bins; - - ui32 Position; - ui32 Frequency; +private: + TSourcePosition Pos_; +}; - TType* ResultStructType; - }; +template <typename THistogramType, const char* ResourceName> +class THistogram_AddValue: public TBoxedValue { +public: + THistogram_AddValue(TSourcePosition pos) + : Pos_(pos) + { + } - template <typename THistogramType, const char* ResourceName> - class THistogram_GetResult: public TBoxedValue { - public: - typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; - THistogram_GetResult(const THistogramIndexes& histogramIndexes, TSourcePosition pos) - : HistogramIndexes_(histogramIndexes) - , Pos_(pos) - { - } + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_AddValue"; + static auto nameRef = TStringRef(name); + return nameRef; + } - static const TStringRef& Name() { - static auto name = TString(ResourceName).substr(10) + "Histogram_GetResult"; - static auto nameRef = TStringRef(name); - return nameRef; +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + Y_UNUSED(valueBuilder); + THistogramResource* resource = static_cast<THistogramResource*>(args[0].AsBoxed().Get()); + resource->Get()->Add(args[1].Get<double>(), args[2].Get<double>()); + return TUnboxedValuePod(args[0]); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - THistogram proto; - auto histogram = static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get(); - histogram->ToProto(proto); - - auto size = proto.FreqSize(); - TUnboxedValue* fields = nullptr; - auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields); - fields[HistogramIndexes_.Kind] = valueBuilder->NewString(TStringBuf(ResourceName).Skip(10)); - if (size) { - TUnboxedValue* items = nullptr; - fields[HistogramIndexes_.Bins] = valueBuilder->NewArray(size, items); - fields[HistogramIndexes_.Min] = TUnboxedValuePod(static_cast<double>(histogram->GetMinValue())); - fields[HistogramIndexes_.Max] = TUnboxedValuePod(static_cast<double>(histogram->GetMaxValue())); - fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(static_cast<double>(histogram->GetSum())); - for (ui64 i = 0; i < size; ++i) { - TUnboxedValue* binFields = nullptr; - *items++ = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields); - binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(static_cast<double>(proto.GetFreq(i))); - binFields[HistogramIndexes_.Position] = TUnboxedValuePod(static_cast<double>(proto.GetPosition(i))); - } - } else { - fields[HistogramIndexes_.Bins] = valueBuilder->NewEmptyList(); - fields[HistogramIndexes_.Min] = TUnboxedValuePod(0.0); - fields[HistogramIndexes_.Max] = TUnboxedValuePod(0.0); - fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(0.0); +public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<ResourceName>(TResource<ResourceName>, double, double)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_AddValue<THistogramType, ResourceName>(builder.GetSourcePosition())); } - - return result; + return true; + } else { + return false; } + } - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - auto resource = builder.Resource(TStringRef(ResourceName, std::strlen(ResourceName))); +private: + TSourcePosition Pos_; +}; - THistogramIndexes histogramIndexes(builder); +template <typename THistogramType, const char* ResourceName> +class THistogram_Serialize: public TBoxedValue { +public: + THistogram_Serialize(TSourcePosition pos) + : Pos_(pos) + { + } - builder.Args()->Add(resource).Done().Returns(histogramIndexes.ResultStructType); + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; - if (!typesOnly) { - builder.Implementation(new THistogram_GetResult<THistogramType, ResourceName>(histogramIndexes, builder.GetSourcePosition())); - } - return true; - } else { - return false; - } - } - - private: - const THistogramIndexes HistogramIndexes_; - TSourcePosition Pos_; - }; + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_Serialize"; + static auto nameRef = TStringRef(name); + return nameRef; + } - template <> - TUnboxedValue THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::Run( +private: + TUnboxedValue Run( const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const { - using THistogramResource = THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::THistogramResource; + const TUnboxedValuePod* args) const override { try { - Y_UNUSED(valueBuilder); - THolder<THistogramResource> histogram(new THistogramResource( - args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); - histogram->Get()->Add(args[0].Get<double>(), 1.0); - return TUnboxedValuePod(histogram.Release()); + THistogram proto; + TString result; + static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get()->ToProto(proto); + Y_PROTOBUF_SUPPRESS_NODISCARD proto.SerializeToString(&result); + return valueBuilder->NewString(result); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } } - template <> - bool THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::DeclareSignature( +public: + static bool DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { - builder.SimpleSignature<TResource<LinearHistogramResourceName>(double, double, double, double)>(); + builder.SimpleSignature<char*(TResource<ResourceName>)>(); if (!typesOnly) { - builder.Implementation(new THistogram_Create<TLinearHistogram, LinearHistogramResourceName>(builder.GetSourcePosition())); + builder.Implementation(new THistogram_Serialize<THistogramType, ResourceName>(builder.GetSourcePosition())); } return true; } else { @@ -505,17 +264,35 @@ namespace { } } - template <> - TUnboxedValue THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::Run( +private: + TSourcePosition Pos_; +}; + +template <typename THistogramType, const char* ResourceName> +class THistogram_Deserialize: public TBoxedValue { +public: + THistogram_Deserialize(TSourcePosition pos) + : Pos_(pos) + { + } + + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; + + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_Deserialize"; + static auto nameRef = TStringRef(name); + return nameRef; + } + +private: + TUnboxedValue Run( const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const { - using THistogramResource = THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::THistogramResource; + const TUnboxedValuePod* args) const override { try { Y_UNUSED(valueBuilder); THistogram proto; Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef())); - THolder<THistogramResource> histogram( - new THistogramResource(args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); + THolder<THistogramResource> histogram(new THistogramResource(args[1].Get<ui32>())); histogram->Get()->FromProto(proto); return TUnboxedValuePod(histogram.Release()); } catch (const std::exception& e) { @@ -523,17 +300,17 @@ namespace { } } - template <> - bool THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::DeclareSignature( +public: + static bool DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { - builder.SimpleSignature<TResource<LinearHistogramResourceName>(char*, double, double, double)>(); + builder.SimpleSignature<TResource<ResourceName>(char*, ui32)>(); if (!typesOnly) { - builder.Implementation(new THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>(builder.GetSourcePosition())); + builder.Implementation(new THistogram_Deserialize<THistogramType, ResourceName>(builder.GetSourcePosition())); } return true; } else { @@ -541,33 +318,52 @@ namespace { } } - template <> - TUnboxedValue THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::Run( +private: + TSourcePosition Pos_; +}; + +template <typename THistogramType, const char* ResourceName> +class THistogram_Merge: public TBoxedValue { +public: + THistogram_Merge(TSourcePosition pos) + : Pos_(pos) + { + } + + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; + + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_Merge"; + static auto nameRef = TStringRef(name); + return nameRef; + } + +private: + TUnboxedValue Run( const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const { - using THistogramResource = THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::THistogramResource; + const TUnboxedValuePod* args) const override { try { Y_UNUSED(valueBuilder); - THolder<THistogramResource> histogram(new THistogramResource( - args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); - histogram->Get()->Add(args[0].Get<double>(), 1.0); - return TUnboxedValuePod(histogram.Release()); + THistogram proto; + static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get()->ToProto(proto); + static_cast<THistogramResource*>(args[1].AsBoxed().Get())->Get()->Merge(proto, 1.0); + return TUnboxedValuePod(args[1]); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } } - template <> - bool THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::DeclareSignature( +public: + static bool DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { - builder.SimpleSignature<TResource<LogarithmicHistogramResourceName>(double, double, double, double)>(); + builder.SimpleSignature<TResource<ResourceName>(TResource<ResourceName>, TResource<ResourceName>)>(); if (!typesOnly) { - builder.Implementation(new THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>(builder.GetSourcePosition())); + builder.Implementation(new THistogram_Merge<THistogramType, ResourceName>(builder.GetSourcePosition())); } return true; } else { @@ -575,35 +371,99 @@ namespace { } } - template <> - TUnboxedValue THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::Run( +private: + TSourcePosition Pos_; +}; + +struct THistogramIndexes { + static constexpr ui32 BinFieldsCount = 2U; + static constexpr ui32 ResultFieldsCount = 5U; + + THistogramIndexes(IFunctionTypeInfoBuilder& builder) { + const auto binStructType = builder.Struct(BinFieldsCount)->AddField<double>("Position", &Position).AddField<double>("Frequency", &Frequency).Build(); + const auto binsList = builder.List()->Item(binStructType).Build(); + ResultStructType = builder.Struct(ResultFieldsCount)->AddField<char*>("Kind", &Kind).AddField<double>("Min", &Min).AddField<double>("Max", &Max).AddField<double>("WeightsSum", &WeightsSum).AddField("Bins", binsList, &Bins).Build(); + } + + ui32 Kind; + ui32 Min; + ui32 Max; + ui32 WeightsSum; + ui32 Bins; + + ui32 Position; + ui32 Frequency; + + TType* ResultStructType; +}; + +template <typename THistogramType, const char* ResourceName> +class THistogram_GetResult: public TBoxedValue { +public: + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; + + THistogram_GetResult(const THistogramIndexes& histogramIndexes, TSourcePosition pos) + : HistogramIndexes_(histogramIndexes) + , Pos_(pos) + { + } + + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_GetResult"; + static auto nameRef = TStringRef(name); + return nameRef; + } + +private: + TUnboxedValue Run( const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const { - using THistogramResource = THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::THistogramResource; - try { - Y_UNUSED(valueBuilder); - THistogram proto; - Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef())); - THolder<THistogramResource> histogram( - new THistogramResource(args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); - histogram->Get()->FromProto(proto); - return TUnboxedValuePod(histogram.Release()); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + const TUnboxedValuePod* args) const override { + THistogram proto; + auto histogram = static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get(); + histogram->ToProto(proto); + + auto size = proto.FreqSize(); + TUnboxedValue* fields = nullptr; + auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields); + fields[HistogramIndexes_.Kind] = valueBuilder->NewString(TStringBuf(ResourceName).Skip(10)); + if (size) { + TUnboxedValue* items = nullptr; + fields[HistogramIndexes_.Bins] = valueBuilder->NewArray(size, items); + fields[HistogramIndexes_.Min] = TUnboxedValuePod(static_cast<double>(histogram->GetMinValue())); + fields[HistogramIndexes_.Max] = TUnboxedValuePod(static_cast<double>(histogram->GetMaxValue())); + fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(static_cast<double>(histogram->GetSum())); + for (ui64 i = 0; i < size; ++i) { + TUnboxedValue* binFields = nullptr; + *items++ = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields); + binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(static_cast<double>(proto.GetFreq(i))); + binFields[HistogramIndexes_.Position] = TUnboxedValuePod(static_cast<double>(proto.GetPosition(i))); + } + } else { + fields[HistogramIndexes_.Bins] = valueBuilder->NewEmptyList(); + fields[HistogramIndexes_.Min] = TUnboxedValuePod(0.0); + fields[HistogramIndexes_.Max] = TUnboxedValuePod(0.0); + fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(0.0); } + + return result; } - template <> - bool THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::DeclareSignature( +public: + static bool DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { - builder.SimpleSignature<TResource<LogarithmicHistogramResourceName>(char*, double, double, double)>(); + auto resource = builder.Resource(TStringRef(ResourceName, std::strlen(ResourceName))); + + THistogramIndexes histogramIndexes(builder); + + builder.Args()->Add(resource).Done().Returns(histogramIndexes.ResultStructType); + if (!typesOnly) { - builder.Implementation(new THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>(builder.GetSourcePosition())); + builder.Implementation(new THistogram_GetResult<THistogramType, ResourceName>(histogramIndexes, builder.GetSourcePosition())); } return true; } else { @@ -611,352 +471,499 @@ namespace { } } - class THistogramPrint: public TBoxedValue { - public: - THistogramPrint(const THistogramIndexes& histogramIndexes) - : HistogramIndexes_(histogramIndexes) - { +private: + const THistogramIndexes HistogramIndexes_; + TSourcePosition Pos_; +}; + +template <> +TUnboxedValue THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + using THistogramResource = THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::THistogramResource; + try { + Y_UNUSED(valueBuilder); + THolder<THistogramResource> histogram(new THistogramResource( + args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); + histogram->Get()->Add(args[0].Get<double>(), 1.0); + return TUnboxedValuePod(histogram.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } +} + +template <> +bool THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<LinearHistogramResourceName>(double, double, double, double)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_Create<TLinearHistogram, LinearHistogramResourceName>(builder.GetSourcePosition())); + } + return true; + } else { + return false; + } +} + +template <> +TUnboxedValue THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + using THistogramResource = THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::THistogramResource; + try { + Y_UNUSED(valueBuilder); + THistogram proto; + Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef())); + THolder<THistogramResource> histogram( + new THistogramResource(args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); + histogram->Get()->FromProto(proto); + return TUnboxedValuePod(histogram.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } +} + +template <> +bool THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<LinearHistogramResourceName>(char*, double, double, double)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>(builder.GetSourcePosition())); } + return true; + } else { + return false; + } +} - static const TStringRef& Name() { - static auto name = TStringRef::Of("Print"); - return name; +template <> +TUnboxedValue THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + using THistogramResource = THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::THistogramResource; + try { + Y_UNUSED(valueBuilder); + THolder<THistogramResource> histogram(new THistogramResource( + args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); + histogram->Get()->Add(args[0].Get<double>(), 1.0); + return TUnboxedValuePod(histogram.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } +} + +template <> +bool THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<LogarithmicHistogramResourceName>(double, double, double, double)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>(builder.GetSourcePosition())); } + return true; + } else { + return false; + } +} - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - auto kind = args[0].GetElement(HistogramIndexes_.Kind); - auto bins = args[0].GetElement(HistogramIndexes_.Bins); - double min = args[0].GetElement(HistogramIndexes_.Min).Get<double>(); - double max = args[0].GetElement(HistogramIndexes_.Max).Get<double>(); - double weightsSum = args[0].GetElement(HistogramIndexes_.WeightsSum).Get<double>(); - auto binsIterator = bins.GetListIterator(); +template <> +TUnboxedValue THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + using THistogramResource = THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::THistogramResource; + try { + Y_UNUSED(valueBuilder); + THistogram proto; + Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef())); + THolder<THistogramResource> histogram( + new THistogramResource(args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); + histogram->Get()->FromProto(proto); + return TUnboxedValuePod(histogram.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } +} - TStringBuilder result; - result << "Kind: " << (TStringBuf)kind.AsStringRef() << ' '; - result << Sprintf("Bins: %" PRIu64 " WeightsSum: %.3f Min: %.3f Max: %.3f", - bins.GetListLength(), weightsSum, min, max); - double maxFrequency = 0.0; - size_t maxPositionLength = 0; - size_t maxFrequencyLength = 0; - const ui8 bars = args[1].GetOrDefault<ui8>(25); +template <> +bool THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<LogarithmicHistogramResourceName>(char*, double, double, double)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>(builder.GetSourcePosition())); + } + return true; + } else { + return false; + } +} - for (TUnboxedValue current; binsIterator.Next(current);) { - if (bars) { - double frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); - if (frequency > maxFrequency) { - maxFrequency = frequency; - } - } - size_t positionLength = Sprintf("%.3f", current.GetElement(HistogramIndexes_.Position).Get<double>()).length(); - size_t frequencyLength = Sprintf("%.3f", current.GetElement(HistogramIndexes_.Frequency).Get<double>()).length(); +class THistogramPrint: public TBoxedValue { +public: + THistogramPrint(const THistogramIndexes& histogramIndexes) + : HistogramIndexes_(histogramIndexes) + { + } - if (positionLength > maxPositionLength) { - maxPositionLength = positionLength; - } - if (frequencyLength > maxFrequencyLength) { - maxFrequencyLength = frequencyLength; - } - } + static const TStringRef& Name() { + static auto name = TStringRef::Of("Print"); + return name; + } - binsIterator = bins.GetListIterator(); - for (TUnboxedValue current; binsIterator.Next(current);) { - double position = current.GetElement(HistogramIndexes_.Position).Get<double>(); + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + auto kind = args[0].GetElement(HistogramIndexes_.Kind); + auto bins = args[0].GetElement(HistogramIndexes_.Bins); + double min = args[0].GetElement(HistogramIndexes_.Min).Get<double>(); + double max = args[0].GetElement(HistogramIndexes_.Max).Get<double>(); + double weightsSum = args[0].GetElement(HistogramIndexes_.WeightsSum).Get<double>(); + auto binsIterator = bins.GetListIterator(); + + TStringBuilder result; + result << "Kind: " << (TStringBuf)kind.AsStringRef() << ' '; + result << Sprintf("Bins: %" PRIu64 " WeightsSum: %.3f Min: %.3f Max: %.3f", + bins.GetListLength(), weightsSum, min, max); + double maxFrequency = 0.0; + size_t maxPositionLength = 0; + size_t maxFrequencyLength = 0; + const ui8 bars = args[1].GetOrDefault<ui8>(25); + + for (TUnboxedValue current; binsIterator.Next(current);) { + if (bars) { double frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); - result << "\n"; - if (bars && maxFrequency > 0) { - ui8 filledBars = static_cast<ui8>(bars * frequency / maxFrequency); - for (ui8 i = 0; i < bars; ++i) { - if (i < filledBars) { - result << "█"; - } else { - result << "░"; - } - } + if (frequency > maxFrequency) { + maxFrequency = frequency; } - result << " P: " << LeftPad(Sprintf("%.3f", position), maxPositionLength); - result << " F: " << LeftPad(Sprintf("%.3f", frequency), maxFrequencyLength); } + size_t positionLength = Sprintf("%.3f", current.GetElement(HistogramIndexes_.Position).Get<double>()).length(); + size_t frequencyLength = Sprintf("%.3f", current.GetElement(HistogramIndexes_.Frequency).Get<double>()).length(); - return valueBuilder->NewString(result); + if (positionLength > maxPositionLength) { + maxPositionLength = positionLength; + } + if (frequencyLength > maxFrequencyLength) { + maxFrequencyLength = frequencyLength; + } } - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - THistogramIndexes histogramIndexes(builder); - auto optionalUi8 = builder.Optional()->Item<ui8>().Build(); - - builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalUi8).Done().OptionalArgs(1).Returns<char*>(); - - if (!typesOnly) { - builder.Implementation(new THistogramPrint(histogramIndexes)); + binsIterator = bins.GetListIterator(); + for (TUnboxedValue current; binsIterator.Next(current);) { + double position = current.GetElement(HistogramIndexes_.Position).Get<double>(); + double frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); + result << "\n"; + if (bars && maxFrequency > 0) { + ui8 filledBars = static_cast<ui8>(bars * frequency / maxFrequency); + for (ui8 i = 0; i < bars; ++i) { + if (i < filledBars) { + result << "█"; + } else { + result << "░"; + } } - builder.IsStrict(); - return true; - } else { - return false; } + result << " P: " << LeftPad(Sprintf("%.3f", position), maxPositionLength); + result << " F: " << LeftPad(Sprintf("%.3f", frequency), maxFrequencyLength); } - private: - const THistogramIndexes HistogramIndexes_; - }; + return valueBuilder->NewString(result); + } - class THistogramToCumulativeDistributionFunction: public TBoxedValue { - public: - THistogramToCumulativeDistributionFunction(const THistogramIndexes& histogramIndexes) - : HistogramIndexes_(histogramIndexes) - { - } + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + THistogramIndexes histogramIndexes(builder); + auto optionalUi8 = builder.Optional()->Item<ui8>().Build(); - static const TStringRef& Name() { - static auto name = TStringRef::Of("ToCumulativeDistributionFunction"); - return name; - } + builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalUi8).Done().OptionalArgs(1).Returns<char*>(); - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - TUnboxedValue* fields = nullptr; - auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields); - auto bins = args[0].GetElement(HistogramIndexes_.Bins); - double minValue = args[0].GetElement(HistogramIndexes_.Min).Get<double>(); - double maxValue = args[0].GetElement(HistogramIndexes_.Max).Get<double>(); - double sum = 0.0; - double weightsSum = 0.0; - std::vector<TUnboxedValue> resultBins; - if (bins.HasFastListLength()) - resultBins.reserve(bins.GetListLength()); - const auto binsIterator = bins.GetListIterator(); - for (TUnboxedValue current; binsIterator.Next(current);) { - TUnboxedValue* binFields = nullptr; - auto resultCurrent = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields); - const auto frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); - sum += frequency; - weightsSum += sum; - binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(sum); - binFields[HistogramIndexes_.Position] = current.GetElement(HistogramIndexes_.Position); - resultBins.emplace_back(std::move(resultCurrent)); + if (!typesOnly) { + builder.Implementation(new THistogramPrint(histogramIndexes)); } - - auto kind = args[0].GetElement(HistogramIndexes_.Kind); - fields[HistogramIndexes_.Kind] = valueBuilder->AppendString(kind, "Cdf"); - fields[HistogramIndexes_.Bins] = valueBuilder->NewList(resultBins.data(), resultBins.size()); - fields[HistogramIndexes_.Max] = TUnboxedValuePod(maxValue); - fields[HistogramIndexes_.Min] = TUnboxedValuePod(minValue); - fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(weightsSum); - return result; + builder.IsStrict(); + return true; + } else { + return false; } + } - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - THistogramIndexes histogramIndexes(builder); +private: + const THistogramIndexes HistogramIndexes_; +}; - builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Done().Returns(histogramIndexes.ResultStructType); +class THistogramToCumulativeDistributionFunction: public TBoxedValue { +public: + THistogramToCumulativeDistributionFunction(const THistogramIndexes& histogramIndexes) + : HistogramIndexes_(histogramIndexes) + { + } - if (!typesOnly) { - builder.Implementation(new THistogramToCumulativeDistributionFunction(histogramIndexes)); - } - builder.IsStrict(); - return true; - } else { - return false; - } + static const TStringRef& Name() { + static auto name = TStringRef::Of("ToCumulativeDistributionFunction"); + return name; + } + + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + TUnboxedValue* fields = nullptr; + auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields); + auto bins = args[0].GetElement(HistogramIndexes_.Bins); + double minValue = args[0].GetElement(HistogramIndexes_.Min).Get<double>(); + double maxValue = args[0].GetElement(HistogramIndexes_.Max).Get<double>(); + double sum = 0.0; + double weightsSum = 0.0; + std::vector<TUnboxedValue> resultBins; + if (bins.HasFastListLength()) { + resultBins.reserve(bins.GetListLength()); + } + const auto binsIterator = bins.GetListIterator(); + for (TUnboxedValue current; binsIterator.Next(current);) { + TUnboxedValue* binFields = nullptr; + auto resultCurrent = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields); + const auto frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); + sum += frequency; + weightsSum += sum; + binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(sum); + binFields[HistogramIndexes_.Position] = current.GetElement(HistogramIndexes_.Position); + resultBins.emplace_back(std::move(resultCurrent)); } - private: - const THistogramIndexes HistogramIndexes_; - }; + auto kind = args[0].GetElement(HistogramIndexes_.Kind); + fields[HistogramIndexes_.Kind] = valueBuilder->AppendString(kind, "Cdf"); + fields[HistogramIndexes_.Bins] = valueBuilder->NewList(resultBins.data(), resultBins.size()); + fields[HistogramIndexes_.Max] = TUnboxedValuePod(maxValue); + fields[HistogramIndexes_.Min] = TUnboxedValuePod(minValue); + fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(weightsSum); + return result; + } - class THistogramNormalize: public TBoxedValue { - public: - THistogramNormalize(const THistogramIndexes& histogramIndexes) - : HistogramIndexes_(histogramIndexes) - { - } + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + THistogramIndexes histogramIndexes(builder); - static const TStringRef& Name() { - static auto name = TStringRef::Of("Normalize"); - return name; - } + builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Done().Returns(histogramIndexes.ResultStructType); - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - TUnboxedValue* fields = nullptr; - auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields); - auto bins = args[0].GetElement(HistogramIndexes_.Bins); - double minValue = args[0].GetElement(HistogramIndexes_.Min).Get<double>(); - double maxValue = args[0].GetElement(HistogramIndexes_.Max).Get<double>(); - double area = args[1].GetOrDefault<double>(100.0); - bool cdfNormalization = args[2].GetOrDefault<bool>(false); - double sum = 0.0; - double weightsSum = 0.0; - double lastBinFrequency = 0.0; - std::vector<TUnboxedValue> resultBins; - if (bins.HasFastListLength()) - resultBins.reserve(bins.GetListLength()); - auto binsIterator = bins.GetListIterator(); - for (TUnboxedValue current; binsIterator.Next(current);) { - sum += current.GetElement(HistogramIndexes_.Frequency).Get<double>(); - lastBinFrequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); - } - binsIterator = bins.GetListIterator(); - for (TUnboxedValue current; binsIterator.Next(current);) { - TUnboxedValue* binFields = nullptr; - auto resultCurrent = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields); - double frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); - if (cdfNormalization) { - frequency = area * frequency / lastBinFrequency; - } else { - frequency = area * frequency / sum; - } - weightsSum += frequency; - binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(frequency); - binFields[HistogramIndexes_.Position] = current.GetElement(HistogramIndexes_.Position); - resultBins.emplace_back(std::move(resultCurrent)); + if (!typesOnly) { + builder.Implementation(new THistogramToCumulativeDistributionFunction(histogramIndexes)); } + builder.IsStrict(); + return true; + } else { + return false; + } + } - TUnboxedValue kind = args[0].GetElement(HistogramIndexes_.Kind); - if (cdfNormalization) { - kind = valueBuilder->AppendString(kind, "Cdf"); - } +private: + const THistogramIndexes HistogramIndexes_; +}; - fields[HistogramIndexes_.Kind] = kind; - fields[HistogramIndexes_.Bins] = valueBuilder->NewList(resultBins.data(), resultBins.size()); - fields[HistogramIndexes_.Max] = TUnboxedValuePod(maxValue); - fields[HistogramIndexes_.Min] = TUnboxedValuePod(minValue); - fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(weightsSum); - return result; - } - - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - THistogramIndexes histogramIndexes(builder); - auto optionalDouble = builder.Optional()->Item<double>().Build(); - auto optionalCdfNormalization = builder.Optional()->Item<bool>().Build(); - builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalDouble).Add(optionalCdfNormalization).Done().Returns(histogramIndexes.ResultStructType); - builder.OptionalArgs(1); - builder.OptionalArgs(2); - if (!typesOnly) { - builder.Implementation(new THistogramNormalize(histogramIndexes)); - } - builder.IsStrict(); - return true; +class THistogramNormalize: public TBoxedValue { +public: + THistogramNormalize(const THistogramIndexes& histogramIndexes) + : HistogramIndexes_(histogramIndexes) + { + } + + static const TStringRef& Name() { + static auto name = TStringRef::Of("Normalize"); + return name; + } + + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + TUnboxedValue* fields = nullptr; + auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields); + auto bins = args[0].GetElement(HistogramIndexes_.Bins); + double minValue = args[0].GetElement(HistogramIndexes_.Min).Get<double>(); + double maxValue = args[0].GetElement(HistogramIndexes_.Max).Get<double>(); + double area = args[1].GetOrDefault<double>(100.0); + bool cdfNormalization = args[2].GetOrDefault<bool>(false); + double sum = 0.0; + double weightsSum = 0.0; + double lastBinFrequency = 0.0; + std::vector<TUnboxedValue> resultBins; + if (bins.HasFastListLength()) { + resultBins.reserve(bins.GetListLength()); + } + auto binsIterator = bins.GetListIterator(); + for (TUnboxedValue current; binsIterator.Next(current);) { + sum += current.GetElement(HistogramIndexes_.Frequency).Get<double>(); + lastBinFrequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); + } + binsIterator = bins.GetListIterator(); + for (TUnboxedValue current; binsIterator.Next(current);) { + TUnboxedValue* binFields = nullptr; + auto resultCurrent = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields); + double frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); + if (cdfNormalization) { + frequency = area * frequency / lastBinFrequency; } else { - return false; + frequency = area * frequency / sum; } + weightsSum += frequency; + binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(frequency); + binFields[HistogramIndexes_.Position] = current.GetElement(HistogramIndexes_.Position); + resultBins.emplace_back(std::move(resultCurrent)); } - private: - const THistogramIndexes HistogramIndexes_; - }; + TUnboxedValue kind = args[0].GetElement(HistogramIndexes_.Kind); + if (cdfNormalization) { + kind = valueBuilder->AppendString(kind, "Cdf"); + } - template <bool twoArgs> - class THistogramMethodBase: public TBoxedValue { - public: - THistogramMethodBase(const THistogramIndexes& histogramIndexes, TSourcePosition pos) - : HistogramIndexes_(histogramIndexes) - , Pos_(pos) - { - } - - virtual TUnboxedValue GetResult( - const THistogram& input, - const TUnboxedValuePod* args) const = 0; - - TUnboxedValue Run( - const IValueBuilder*, - const TUnboxedValuePod* args) const override { - try { - auto bins = args[0].GetElement(HistogramIndexes_.Bins); - double min = args[0].GetElement(HistogramIndexes_.Min).template Get<double>(); - double max = args[0].GetElement(HistogramIndexes_.Max).template Get<double>(); - auto binsIterator = bins.GetListIterator(); - - THistogram histogram; - histogram.SetType(HT_ADAPTIVE_HISTOGRAM); - histogram.SetMinValue(min); - histogram.SetMaxValue(max); - for (TUnboxedValue current; binsIterator.Next(current);) { - double frequency = current.GetElement(HistogramIndexes_.Frequency).template Get<double>(); - double position = current.GetElement(HistogramIndexes_.Position).template Get<double>(); - histogram.AddFreq(frequency); - histogram.AddPosition(position); - } + fields[HistogramIndexes_.Kind] = kind; + fields[HistogramIndexes_.Bins] = valueBuilder->NewList(resultBins.data(), resultBins.size()); + fields[HistogramIndexes_.Max] = TUnboxedValuePod(maxValue); + fields[HistogramIndexes_.Min] = TUnboxedValuePod(minValue); + fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(weightsSum); + return result; + } - return GetResult(histogram, args); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + THistogramIndexes histogramIndexes(builder); + auto optionalDouble = builder.Optional()->Item<double>().Build(); + auto optionalCdfNormalization = builder.Optional()->Item<bool>().Build(); + builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalDouble).Add(optionalCdfNormalization).Done().Returns(histogramIndexes.ResultStructType); + builder.OptionalArgs(1); + builder.OptionalArgs(2); + if (!typesOnly) { + builder.Implementation(new THistogramNormalize(histogramIndexes)); } + builder.IsStrict(); + return true; + } else { + return false; } + } - static THistogramIndexes DeclareSignatureBase(IFunctionTypeInfoBuilder& builder) { - THistogramIndexes histogramIndexes(builder); +private: + const THistogramIndexes HistogramIndexes_; +}; + +template <bool twoArgs> +class THistogramMethodBase: public TBoxedValue { +public: + THistogramMethodBase(const THistogramIndexes& histogramIndexes, TSourcePosition pos) + : HistogramIndexes_(histogramIndexes) + , Pos_(pos) + { + } - if (twoArgs) { - builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add<double>().Add<double>().Done().Returns<double>(); - } else { - builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add<double>().Done().Returns<double>(); + virtual TUnboxedValue GetResult( + const THistogram& input, + const TUnboxedValuePod* args) const = 0; + + TUnboxedValue Run( + const IValueBuilder*, + const TUnboxedValuePod* args) const override { + try { + auto bins = args[0].GetElement(HistogramIndexes_.Bins); + double min = args[0].GetElement(HistogramIndexes_.Min).template Get<double>(); + double max = args[0].GetElement(HistogramIndexes_.Max).template Get<double>(); + auto binsIterator = bins.GetListIterator(); + + THistogram histogram; + histogram.SetType(HT_ADAPTIVE_HISTOGRAM); + histogram.SetMinValue(min); + histogram.SetMaxValue(max); + for (TUnboxedValue current; binsIterator.Next(current);) { + double frequency = current.GetElement(HistogramIndexes_.Frequency).template Get<double>(); + double position = current.GetElement(HistogramIndexes_.Position).template Get<double>(); + histogram.AddFreq(frequency); + histogram.AddPosition(position); } - return histogramIndexes; + + return GetResult(histogram, args); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - protected: - const THistogramIndexes HistogramIndexes_; - TSourcePosition Pos_; - }; + static THistogramIndexes DeclareSignatureBase(IFunctionTypeInfoBuilder& builder) { + THistogramIndexes histogramIndexes(builder); + + if (twoArgs) { + builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add<double>().Add<double>().Done().Returns<double>(); + } else { + builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add<double>().Done().Returns<double>(); + } + return histogramIndexes; + } -#define DECLARE_ONE_DOUBLE_ARG_METHOD_UDF(name) \ - class T##name: public THistogramMethodBase<false> { \ - public: \ - T##name(const THistogramIndexes& histogramIndexes, TSourcePosition pos) \ - : THistogramMethodBase<false>(histogramIndexes, pos) { \ - } \ - static const TStringRef& Name() { \ - static auto name = TStringRef::Of(#name); \ - return name; \ - } \ - static bool DeclareSignature( \ - const TStringRef& name, \ - TType* userType, \ - IFunctionTypeInfoBuilder& builder, \ - bool typesOnly) { \ - Y_UNUSED(userType); \ - if (Name() == name) { \ - const auto& histogramIndexes = DeclareSignatureBase(builder); \ - if (!typesOnly) { \ - builder.Implementation(new T##name(histogramIndexes, \ - builder.GetSourcePosition())); \ - } \ - return true; \ - } else { \ - return false; \ - } \ - } \ - TUnboxedValue GetResult( \ - const THistogram& input, \ - const TUnboxedValuePod* args) const override { \ - TAdaptiveWardHistogram histo(input, input.FreqSize()); \ - double result = histo.name(args[1].Get<double>()); \ - return TUnboxedValuePod(result); \ - } \ +protected: + const THistogramIndexes HistogramIndexes_; + TSourcePosition Pos_; +}; + +#define DECLARE_ONE_DOUBLE_ARG_METHOD_UDF(name) \ + class T##name: public THistogramMethodBase<false> { \ + public: \ + T##name(const THistogramIndexes& histogramIndexes, TSourcePosition pos) \ + : THistogramMethodBase<false>(histogramIndexes, pos) { \ + } \ + static const TStringRef& Name() { \ + static auto name = TStringRef::Of(#name); \ + return name; \ + } \ + static bool DeclareSignature( \ + const TStringRef& name, \ + TType* userType, \ + IFunctionTypeInfoBuilder& builder, \ + bool typesOnly) { \ + Y_UNUSED(userType); \ + if (Name() == name) { \ + const auto& histogramIndexes = DeclareSignatureBase(builder); \ + if (!typesOnly) { \ + builder.Implementation(new T##name(histogramIndexes, \ + builder.GetSourcePosition())); \ + } \ + return true; \ + } else { \ + return false; \ + } \ + } \ + TUnboxedValue GetResult( \ + const THistogram& input, \ + const TUnboxedValuePod* args) const override { \ + TAdaptiveWardHistogram histo(input, input.FreqSize()); \ + double result = histo.name(args[1].Get<double>()); \ + return TUnboxedValuePod(result); \ + } \ }; #define DECLARE_TWO_DOUBLE_ARG_METHOD_UDF(name) \ @@ -979,7 +986,7 @@ namespace { const auto& histogramIndexes = DeclareSignatureBase(builder); \ if (!typesOnly) { \ builder.Implementation(new T##name(histogramIndexes, \ - builder.GetSourcePosition())); \ + builder.GetSourcePosition())); \ } \ return true; \ } else { \ @@ -1001,18 +1008,18 @@ namespace { #define DECLARE_HISTOGRAM_UDFS(name) \ HISTOGRAM_FUNCTION_MAP(DECLARE_HISTOGRAM_UDF, name) - HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(DECLARE_ONE_DOUBLE_ARG_METHOD_UDF) - HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(DECLARE_TWO_DOUBLE_ARG_METHOD_UDF) - - SIMPLE_MODULE(THistogramModule, - HISTOGRAM_ALGORITHMS_MAP(DECLARE_HISTOGRAM_UDFS) - HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(REGISTER_METHOD_UDF) - HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(REGISTER_METHOD_UDF) - DECLARE_HISTOGRAM_UDFS(Linear) - DECLARE_HISTOGRAM_UDFS(Logarithmic) - THistogramPrint, - THistogramNormalize, - THistogramToCumulativeDistributionFunction) -} +HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(DECLARE_ONE_DOUBLE_ARG_METHOD_UDF) +HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(DECLARE_TWO_DOUBLE_ARG_METHOD_UDF) + +SIMPLE_MODULE(THistogramModule, + HISTOGRAM_ALGORITHMS_MAP(DECLARE_HISTOGRAM_UDFS) + HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(REGISTER_METHOD_UDF) + HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(REGISTER_METHOD_UDF) + DECLARE_HISTOGRAM_UDFS(Linear) + DECLARE_HISTOGRAM_UDFS(Logarithmic) + THistogramPrint, + THistogramNormalize, + THistogramToCumulativeDistributionFunction) +} // namespace REGISTER_MODULES(THistogramModule) |
