Restoring authorship annotation for <alex-sh@yandex-team.ru>. Commit 1 of 2.

author: alex-sh <alex-sh@yandex-team.ru> 2022-02-10 16:50:03 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:50:03 +0300
commit: 3196904c9f5bf7aff7374eeadcb0671589581f61 (patch)
tree: d13114a178799aeb203a4b3b43dd7fb0c4f6975f /library/cpp/linear_regression/linear_regression.h
parent: d154d11651ea533127249184148c3f023e2c6d0a (diff)
download: ydb-3196904c9f5bf7aff7374eeadcb0671589581f61.tar.gz
1 files changed, 288 insertions, 288 deletions
diff --git a/library/cpp/linear_regression/linear_regression.h b/library/cpp/linear_regression/linear_regression.h
index e57de5ff6c..44b0254d5d 100644
--- a/library/cpp/linear_regression/linear_regression.h
+++ b/library/cpp/linear_regression/linear_regression.h
@@ -1,342 +1,342 @@
-#pragma once
-
-#include "linear_model.h"
-#include "welford.h"
-
+#pragma once 
+ 
+#include "linear_model.h" 
+#include "welford.h" 
+ 
 #include <library/cpp/accurate_accumulate/accurate_accumulate.h>
-
-#include <util/generic/vector.h>
-#include <util/generic/hash.h>
+ 
+#include <util/generic/vector.h> 
+#include <util/generic/hash.h> 
 #include <util/generic/ymath.h>
-
-class TFastLinearRegressionSolver {
-private:
-    TKahanAccumulator<double> SumSquaredGoals;
-
+ 
+class TFastLinearRegressionSolver { 
+private: 
+    TKahanAccumulator<double> SumSquaredGoals; 
+ 
     TVector<double> LinearizedOLSMatrix;
     TVector<double> OLSVector;
 
-public:
+public: 
     bool Add(const TVector<double>& features, const double goal, const double weight = 1.);
-    TLinearModel Solve() const;
-    double SumSquaredErrors() const;
-};
-
-class TLinearRegressionSolver {
-private:
-    double GoalsMean = 0.;
-    double GoalsDeviation = 0.;
-
+    TLinearModel Solve() const; 
+    double SumSquaredErrors() const; 
+}; 
+ 
+class TLinearRegressionSolver { 
+private: 
+    double GoalsMean = 0.; 
+    double GoalsDeviation = 0.; 
+ 
     TVector<double> FeatureMeans;
     TVector<double> LastMeans;
     TVector<double> NewMeans;
     TVector<double> LinearizedOLSMatrix;
-
+ 
     TVector<double> OLSVector;
+ 
+    TKahanAccumulator<double> SumWeights; 
 
-    TKahanAccumulator<double> SumWeights;
-
-public:
+public: 
     bool Add(const TVector<double>& features, const double goal, const double weight = 1.);
-    TLinearModel Solve() const;
-    double SumSquaredErrors() const;
-};
-
-template <typename TStoreType>
-class TTypedFastSLRSolver {
-private:
-    TStoreType SumFeatures = TStoreType();
-    TStoreType SumSquaredFeatures = TStoreType();
-
-    TStoreType SumGoals = TStoreType();
-    TStoreType SumSquaredGoals = TStoreType();
-
-    TStoreType SumProducts = TStoreType();
-
-    TStoreType SumWeights = TStoreType();
-
-public:
+    TLinearModel Solve() const; 
+    double SumSquaredErrors() const; 
+}; 
+ 
+template <typename TStoreType> 
+class TTypedFastSLRSolver { 
+private: 
+    TStoreType SumFeatures = TStoreType(); 
+    TStoreType SumSquaredFeatures = TStoreType(); 
+ 
+    TStoreType SumGoals = TStoreType(); 
+    TStoreType SumSquaredGoals = TStoreType(); 
+ 
+    TStoreType SumProducts = TStoreType(); 
+ 
+    TStoreType SumWeights = TStoreType(); 
+
+public: 
     bool Add(const double feature, const double goal, const double weight = 1.) {
-        SumFeatures += feature * weight;
-        SumSquaredFeatures += feature * feature * weight;
-
-        SumGoals += goal * weight;
-        SumSquaredGoals += goal * goal * weight;
-
-        SumProducts += goal * feature * weight;
-
-        SumWeights += weight;
+        SumFeatures += feature * weight; 
+        SumSquaredFeatures += feature * feature * weight; 
+ 
+        SumGoals += goal * weight; 
+        SumSquaredGoals += goal * goal * weight; 
+ 
+        SumProducts += goal * feature * weight; 
+ 
+        SumWeights += weight; 
 
         return true;
-    }
-
-    template <typename TFloatType>
-    void Solve(TFloatType& factor, TFloatType& intercept, const double regularizationParameter = 0.1) const {
+    } 
+ 
+    template <typename TFloatType> 
+    void Solve(TFloatType& factor, TFloatType& intercept, const double regularizationParameter = 0.1) const { 
         if (!(double)SumGoals) {
-            factor = intercept = TFloatType();
-            return;
-        }
-
-        double productsDeviation, featuresDeviation;
-        SetupSolutionFactors(productsDeviation, featuresDeviation);
-
-        if (!featuresDeviation) {
-            factor = TFloatType();
+            factor = intercept = TFloatType(); 
+            return; 
+        } 
+ 
+        double productsDeviation, featuresDeviation; 
+        SetupSolutionFactors(productsDeviation, featuresDeviation); 
+ 
+        if (!featuresDeviation) { 
+            factor = TFloatType(); 
             intercept = (double)SumGoals / (double)SumWeights;
-            return;
-        }
-
-        factor = productsDeviation / (featuresDeviation + regularizationParameter);
+            return; 
+        } 
+ 
+        factor = productsDeviation / (featuresDeviation + regularizationParameter); 
         intercept = (double)SumGoals / (double)SumWeights - factor * (double)SumFeatures / (double)SumWeights;
-    }
-
-    double SumSquaredErrors(const double regularizationParameter = 0.1) const {
+    } 
+ 
+    double SumSquaredErrors(const double regularizationParameter = 0.1) const { 
         if (!(double)SumWeights) {
-            return 0.;
-        }
-
+            return 0.; 
+        } 
+ 
         const double sumGoalSquaredDeviations = (double)SumSquaredGoals - (double)SumGoals / (double)SumWeights * (double)SumGoals;
-
-        double productsDeviation, featuresDeviation;
-        SetupSolutionFactors(productsDeviation, featuresDeviation);
-        if (!featuresDeviation) {
-            return sumGoalSquaredDeviations;
-        }
-
-        const double factor = productsDeviation / (featuresDeviation + regularizationParameter);
-
-        const double sumSquaredErrors = factor * factor * featuresDeviation - 2 * factor * productsDeviation + sumGoalSquaredDeviations;
-        return Max(0., sumSquaredErrors);
-    }
-
-private:
-    void SetupSolutionFactors(double& productsDeviation, double& featuresDeviation) const {
+ 
+        double productsDeviation, featuresDeviation; 
+        SetupSolutionFactors(productsDeviation, featuresDeviation); 
+        if (!featuresDeviation) { 
+            return sumGoalSquaredDeviations; 
+        } 
+ 
+        const double factor = productsDeviation / (featuresDeviation + regularizationParameter); 
+ 
+        const double sumSquaredErrors = factor * factor * featuresDeviation - 2 * factor * productsDeviation + sumGoalSquaredDeviations; 
+        return Max(0., sumSquaredErrors); 
+    } 
+
+private: 
+    void SetupSolutionFactors(double& productsDeviation, double& featuresDeviation) const { 
         if (!(double)SumWeights) {
-            productsDeviation = featuresDeviation = 0.;
-            return;
-        }
-
+            productsDeviation = featuresDeviation = 0.; 
+            return; 
+        } 
+ 
         featuresDeviation = (double)SumSquaredFeatures - (double)SumFeatures / (double)SumWeights * (double)SumFeatures;
-        if (!featuresDeviation) {
-            return;
-        }
+        if (!featuresDeviation) { 
+            return; 
+        } 
         productsDeviation = (double)SumProducts - (double)SumFeatures / (double)SumWeights * (double)SumGoals;
-    }
-};
-
-using TFastSLRSolver = TTypedFastSLRSolver<double>;
+    } 
+}; 
+ 
+using TFastSLRSolver = TTypedFastSLRSolver<double>; 
 using TKahanSLRSolver = TTypedFastSLRSolver<TKahanAccumulator<double>>;
-
-class TSLRSolver {
-private:
-    double FeaturesMean = 0.;
-    double FeaturesDeviation = 0.;
-
-    double GoalsMean = 0.;
-    double GoalsDeviation = 0.;
-
-    TKahanAccumulator<double> SumWeights;
-
-    double Covariation = 0.;
-
-public:
+ 
+class TSLRSolver { 
+private: 
+    double FeaturesMean = 0.; 
+    double FeaturesDeviation = 0.; 
+ 
+    double GoalsMean = 0.; 
+    double GoalsDeviation = 0.; 
+ 
+    TKahanAccumulator<double> SumWeights; 
+ 
+    double Covariation = 0.; 
+
+public: 
     bool Add(const double feature, const double goal, const double weight = 1.);
-
-    bool Add(const double* featuresBegin, const double* featuresEnd, const double* goalsBegin);
-    bool Add(const double* featuresBegin, const double* featuresEnd, const double* goalsBegin, const double* weightsBegin);
-
+ 
+    bool Add(const double* featuresBegin, const double* featuresEnd, const double* goalsBegin); 
+    bool Add(const double* featuresBegin, const double* featuresEnd, const double* goalsBegin, const double* weightsBegin); 
+ 
     bool Add(const TVector<double>& features, const TVector<double>& goals) {
         Y_ASSERT(features.size() == goals.size());
         return Add(features.data(), features.data() + features.size(), goals.data());
-    }
-
+    } 
+ 
     bool Add(const TVector<double>& features, const TVector<double>& goals, const TVector<double>& weights) {
         Y_ASSERT(features.size() == goals.size() && features.size() == weights.size());
         return Add(features.data(), features.data() + features.size(), goals.data(), weights.data());
-    }
-
-    template <typename TFloatType>
-    void Solve(TFloatType& factor, TFloatType& intercept, const double regularizationParameter = 0.1) const {
-        if (!FeaturesDeviation) {
-            factor = 0.;
-            intercept = GoalsMean;
-            return;
-        }
-
-        factor = Covariation / (FeaturesDeviation + regularizationParameter);
-        intercept = GoalsMean - factor * FeaturesMean;
-    }
-
-    double SumSquaredErrors(const double regularizationParameter = 0.1) const;
-
-    double GetSumWeights() const {
-        return SumWeights.Get();
-    }
-};
-
-template <typename TSLRSolverType>
-class TTypedBestSLRSolver {
-private:
+    } 
+ 
+    template <typename TFloatType> 
+    void Solve(TFloatType& factor, TFloatType& intercept, const double regularizationParameter = 0.1) const { 
+        if (!FeaturesDeviation) { 
+            factor = 0.; 
+            intercept = GoalsMean; 
+            return; 
+        } 
+ 
+        factor = Covariation / (FeaturesDeviation + regularizationParameter); 
+        intercept = GoalsMean - factor * FeaturesMean; 
+    } 
+ 
+    double SumSquaredErrors(const double regularizationParameter = 0.1) const; 
+ 
+    double GetSumWeights() const { 
+        return SumWeights.Get(); 
+    } 
+}; 
+ 
+template <typename TSLRSolverType> 
+class TTypedBestSLRSolver { 
+private: 
     TVector<TSLRSolverType> SLRSolvers;
 
-public:
+public: 
     bool Add(const TVector<double>& features, const double goal, const double weight = 1.) {
-        if (SLRSolvers.empty()) {
-            SLRSolvers.resize(features.size());
-        }
-
-        for (size_t featureNumber = 0; featureNumber < features.size(); ++featureNumber) {
-            SLRSolvers[featureNumber].Add(features[featureNumber], goal, weight);
-        }
+        if (SLRSolvers.empty()) { 
+            SLRSolvers.resize(features.size()); 
+        } 
+ 
+        for (size_t featureNumber = 0; featureNumber < features.size(); ++featureNumber) { 
+            SLRSolvers[featureNumber].Add(features[featureNumber], goal, weight); 
+        } 
 
         return true;
-    }
-
-    TLinearModel Solve(const double regularizationParameter = 0.1) const {
-        const TSLRSolverType* bestSolver = nullptr;
-        for (const TSLRSolverType& solver : SLRSolvers) {
-            if (!bestSolver || solver.SumSquaredErrors(regularizationParameter) < bestSolver->SumSquaredErrors(regularizationParameter)) {
-                bestSolver = &solver;
-            }
-        }
-
+    } 
+ 
+    TLinearModel Solve(const double regularizationParameter = 0.1) const { 
+        const TSLRSolverType* bestSolver = nullptr; 
+        for (const TSLRSolverType& solver : SLRSolvers) { 
+            if (!bestSolver || solver.SumSquaredErrors(regularizationParameter) < bestSolver->SumSquaredErrors(regularizationParameter)) { 
+                bestSolver = &solver; 
+            } 
+        } 
+ 
         TVector<double> coefficients(SLRSolvers.size());
         double intercept = 0.0;
-        if (bestSolver) {
+        if (bestSolver) { 
             bestSolver->Solve(coefficients[bestSolver - SLRSolvers.begin()], intercept, regularizationParameter);
-        }
-
+        } 
+ 
         TLinearModel model(std::move(coefficients), intercept);
-        return model;
-    }
-
-    double SumSquaredErrors(const double regularizationParameter = 0.1) const {
-        if (SLRSolvers.empty()) {
-            return 0.;
-        }
-
-        double sse = SLRSolvers.begin()->SumSquaredErrors(regularizationParameter);
-        for (const TSLRSolver& solver : SLRSolvers) {
-            sse = Min(solver.SumSquaredErrors(regularizationParameter), sse);
-        }
-        return sse;
-    }
-};
-
-using TFastBestSLRSolver = TTypedBestSLRSolver<TFastSLRSolver>;
-using TKahanBestSLRSolver = TTypedBestSLRSolver<TKahanSLRSolver>;
-using TBestSLRSolver = TTypedBestSLRSolver<TSLRSolver>;
-
-enum ETransformationType {
-    TT_IDENTITY,
-    TT_SIGMA,
-};
-
-struct TTransformationParameters {
-    double RegressionFactor = 1.;
-    double RegressionIntercept = 0.;
-
-    double FeatureOffset = 0.;
-    double FeatureNormalizer = 1.;
-
+        return model; 
+    } 
+ 
+    double SumSquaredErrors(const double regularizationParameter = 0.1) const { 
+        if (SLRSolvers.empty()) { 
+            return 0.; 
+        } 
+ 
+        double sse = SLRSolvers.begin()->SumSquaredErrors(regularizationParameter); 
+        for (const TSLRSolver& solver : SLRSolvers) { 
+            sse = Min(solver.SumSquaredErrors(regularizationParameter), sse); 
+        } 
+        return sse; 
+    } 
+}; 
+ 
+using TFastBestSLRSolver = TTypedBestSLRSolver<TFastSLRSolver>; 
+using TKahanBestSLRSolver = TTypedBestSLRSolver<TKahanSLRSolver>; 
+using TBestSLRSolver = TTypedBestSLRSolver<TSLRSolver>; 
+ 
+enum ETransformationType { 
+    TT_IDENTITY, 
+    TT_SIGMA, 
+}; 
+ 
+struct TTransformationParameters { 
+    double RegressionFactor = 1.; 
+    double RegressionIntercept = 0.; 
+ 
+    double FeatureOffset = 0.; 
+    double FeatureNormalizer = 1.; 
+ 
     Y_SAVELOAD_DEFINE(RegressionFactor,
                       RegressionIntercept,
                       FeatureOffset,
                       FeatureNormalizer);
-};
-
-class TFeaturesTransformer {
-private:
-    ETransformationType TransformationType;
-    TTransformationParameters TransformationParameters;
-
-public:
+}; 
+ 
+class TFeaturesTransformer { 
+private: 
+    ETransformationType TransformationType; 
+    TTransformationParameters TransformationParameters; 
+
+public: 
     Y_SAVELOAD_DEFINE(TransformationType, TransformationParameters);
-
+ 
     TFeaturesTransformer() = default;
-
-    TFeaturesTransformer(const ETransformationType transformationType,
-                         const TTransformationParameters transformationParameters)
-        : TransformationType(transformationType)
-        , TransformationParameters(transformationParameters)
-    {
-    }
-
-    double Transformation(const double value) const {
-        switch (TransformationType) {
+ 
+    TFeaturesTransformer(const ETransformationType transformationType, 
+                         const TTransformationParameters transformationParameters) 
+        : TransformationType(transformationType) 
+        , TransformationParameters(transformationParameters) 
+    { 
+    } 
+ 
+    double Transformation(const double value) const { 
+        switch (TransformationType) { 
             case ETransformationType::TT_IDENTITY: {
-                return value;
-            }
+                return value; 
+            } 
             case ETransformationType::TT_SIGMA: {
-                const double valueWithoutOffset = value - TransformationParameters.FeatureOffset;
-                const double transformedValue = valueWithoutOffset / (fabs(valueWithoutOffset) + TransformationParameters.FeatureNormalizer);
-                return TransformationParameters.RegressionIntercept + TransformationParameters.RegressionFactor * transformedValue;
-            }
-        }
+                const double valueWithoutOffset = value - TransformationParameters.FeatureOffset; 
+                const double transformedValue = valueWithoutOffset / (fabs(valueWithoutOffset) + TransformationParameters.FeatureNormalizer); 
+                return TransformationParameters.RegressionIntercept + TransformationParameters.RegressionFactor * transformedValue; 
+            } 
+        } 
         Y_ASSERT(0);
-        return 0.;
-    }
-};
-
-class TFeaturesTransformerLearner {
-private:
-    struct TPoint {
-        float Argument;
-        float Target;
-    };
-
-    float MinimalArgument = Max<float>();
-    float MaximalArgument = Min<float>();
-
-    ETransformationType TransformationType;
+        return 0.; 
+    } 
+}; 
+ 
+class TFeaturesTransformerLearner { 
+private: 
+    struct TPoint { 
+        float Argument; 
+        float Target; 
+    }; 
+ 
+    float MinimalArgument = Max<float>(); 
+    float MaximalArgument = Min<float>(); 
+ 
+    ETransformationType TransformationType; 
     TVector<TPoint> Points;
 
-public:
-    TFeaturesTransformerLearner(const ETransformationType transformationType)
-        : TransformationType(transformationType)
-    {
-    }
-
-    void Add(const float argument, const float target) {
-        Points.push_back(TPoint{argument, target});
-        MinimalArgument = Min(MinimalArgument, argument);
-        MaximalArgument = Max(MaximalArgument, argument);
-    }
-
-    TFeaturesTransformer Solve(const size_t iterationsCount = 100);
-};
-
-class TFastFeaturesTransformerLearner {
-private:
-    ETransformationType TransformationType;
-
-    struct TBucket {
-        TMeanCalculator ArgumentsMean;
-        TMeanCalculator TargetsMean;
-    };
-
+public: 
+    TFeaturesTransformerLearner(const ETransformationType transformationType) 
+        : TransformationType(transformationType) 
+    { 
+    } 
+ 
+    void Add(const float argument, const float target) { 
+        Points.push_back(TPoint{argument, target}); 
+        MinimalArgument = Min(MinimalArgument, argument); 
+        MaximalArgument = Max(MaximalArgument, argument); 
+    } 
+ 
+    TFeaturesTransformer Solve(const size_t iterationsCount = 100); 
+}; 
+ 
+class TFastFeaturesTransformerLearner { 
+private: 
+    ETransformationType TransformationType; 
+ 
+    struct TBucket { 
+        TMeanCalculator ArgumentsMean; 
+        TMeanCalculator TargetsMean; 
+    }; 
+ 
     THashMap<double, TBucket> Buckets;
-    double Step;
-
-public:
-    TFastFeaturesTransformerLearner(const ETransformationType transformationType, const double step = 0.1)
-        : TransformationType(transformationType)
-        , Step(step)
-    {
-    }
-
-    void Add(const float argument, const float target) {
-        TBucket& bucket = Buckets[round(argument / Step)];
-        bucket.ArgumentsMean.Add(argument);
-        bucket.TargetsMean.Add(target);
-    }
-
-    TFeaturesTransformer Solve(const size_t iterationsCount = 100) {
-        TFeaturesTransformerLearner learner(TransformationType);
-        for (auto&& argumentWithBucket : Buckets) {
+    double Step; 
+
+public: 
+    TFastFeaturesTransformerLearner(const ETransformationType transformationType, const double step = 0.1) 
+        : TransformationType(transformationType) 
+        , Step(step) 
+    { 
+    } 
+ 
+    void Add(const float argument, const float target) { 
+        TBucket& bucket = Buckets[round(argument / Step)]; 
+        bucket.ArgumentsMean.Add(argument); 
+        bucket.TargetsMean.Add(target); 
+    } 
+ 
+    TFeaturesTransformer Solve(const size_t iterationsCount = 100) { 
+        TFeaturesTransformerLearner learner(TransformationType); 
+        for (auto&& argumentWithBucket : Buckets) { 
             const TBucket& bucket = argumentWithBucket.second;
-            learner.Add(bucket.ArgumentsMean.GetMean(), bucket.TargetsMean.GetMean());
-        }
-        return learner.Solve(iterationsCount);
-    }
-};
+            learner.Add(bucket.ArgumentsMean.GetMean(), bucket.TargetsMean.GetMean()); 
+        } 
+        return learner.Solve(iterationsCount); 
+    } 
+};
author	alex-sh <alex-sh@yandex-team.ru>	2022-02-10 16:50:03 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:50:03 +0300
commit	3196904c9f5bf7aff7374eeadcb0671589581f61 (patch)
tree	d13114a178799aeb203a4b3b43dd7fb0c4f6975f /library/cpp/linear_regression/linear_regression.h
parent	d154d11651ea533127249184148c3f023e2c6d0a (diff)
download	ydb-3196904c9f5bf7aff7374eeadcb0671589581f61.tar.gz