diff options
author | Anton Samokhvalov <pg83@yandex.ru> | 2022-02-10 16:45:17 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:17 +0300 |
commit | d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch) | |
tree | dd4bd3ca0f36b817e96812825ffaf10d645803f2 /library/cpp/linear_regression | |
parent | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff) | |
download | ydb-d3a398281c6fd1d3672036cb2d63f842d2cb28c5.tar.gz |
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/linear_regression')
-rw-r--r-- | library/cpp/linear_regression/benchmark/pool.cpp | 10 | ||||
-rw-r--r-- | library/cpp/linear_regression/benchmark/pool.h | 10 | ||||
-rw-r--r-- | library/cpp/linear_regression/linear_model.h | 62 | ||||
-rw-r--r-- | library/cpp/linear_regression/linear_regression.cpp | 38 | ||||
-rw-r--r-- | library/cpp/linear_regression/linear_regression.h | 46 | ||||
-rw-r--r-- | library/cpp/linear_regression/linear_regression_ut.cpp | 6 | ||||
-rw-r--r-- | library/cpp/linear_regression/welford.h | 10 |
7 files changed, 91 insertions, 91 deletions
diff --git a/library/cpp/linear_regression/benchmark/pool.cpp b/library/cpp/linear_regression/benchmark/pool.cpp index 12b547be89..7f2c6a7004 100644 --- a/library/cpp/linear_regression/benchmark/pool.cpp +++ b/library/cpp/linear_regression/benchmark/pool.cpp @@ -51,7 +51,7 @@ bool TPool::TCVIterator::IsValid() const { return Current != InstanceFoldNumbers.end(); } -const TInstance& TPool::TCVIterator::operator*() const { +const TInstance& TPool::TCVIterator::operator*() const { return ParentPool[Current - InstanceFoldNumbers.begin()]; } @@ -75,10 +75,10 @@ void TPool::TCVIterator::Advance() { bool TPool::TCVIterator::TakeCurrent() const { switch (IteratorType) { - case LearnIterator: - return *Current != TestFoldNumber; - case TestIterator: - return *Current == TestFoldNumber; + case LearnIterator: + return *Current != TestFoldNumber; + case TestIterator: + return *Current == TestFoldNumber; } return false; } diff --git a/library/cpp/linear_regression/benchmark/pool.h b/library/cpp/linear_regression/benchmark/pool.h index 4dcf7d7e9e..43288319c8 100644 --- a/library/cpp/linear_regression/benchmark/pool.h +++ b/library/cpp/linear_regression/benchmark/pool.h @@ -14,7 +14,7 @@ struct TInstance { static TInstance FromFeaturesString(const TString& featuresString); }; -struct TPool: public TVector<TInstance> { +struct TPool: public TVector<TInstance> { enum EIteratorType { LearnIterator, TestIterator, @@ -33,7 +33,7 @@ struct TPool: public TVector<TInstance> { const size_t* Current; TMersenne<ui64> RandomGenerator; - + public: TCVIterator(const TPool& parentPool, const size_t foldsCount, @@ -45,10 +45,10 @@ struct TPool: public TVector<TInstance> { bool IsValid() const; - const TInstance& operator*() const; - const TInstance* operator->() const; + const TInstance& operator*() const; + const TInstance* operator->() const; TPool::TCVIterator& operator++(); - + private: void Advance(); bool TakeCurrent() const; diff --git a/library/cpp/linear_regression/linear_model.h b/library/cpp/linear_regression/linear_model.h index 551bfe36dc..8bb050cff7 100644 --- a/library/cpp/linear_regression/linear_model.h +++ b/library/cpp/linear_regression/linear_model.h @@ -8,35 +8,35 @@ #include <utility> class TLinearModel { -private: - TVector<double> Coefficients; - double Intercept; - -public: - Y_SAVELOAD_DEFINE(Coefficients, Intercept); - - TLinearModel(TVector<double>&& coefficients, const double intercept) - : Coefficients(std::move(coefficients)) - , Intercept(intercept) - { - } - - explicit TLinearModel(size_t featuresCount = 0) - : Coefficients(featuresCount) - , Intercept(0.) - { - } - - const TVector<double>& GetCoefficients() const { - return Coefficients; - } - - double GetIntercept() const { - return Intercept; - } - - template <typename T> - double Prediction(const TVector<T>& features) const { - return InnerProduct(Coefficients, features, Intercept); - } +private: + TVector<double> Coefficients; + double Intercept; + +public: + Y_SAVELOAD_DEFINE(Coefficients, Intercept); + + TLinearModel(TVector<double>&& coefficients, const double intercept) + : Coefficients(std::move(coefficients)) + , Intercept(intercept) + { + } + + explicit TLinearModel(size_t featuresCount = 0) + : Coefficients(featuresCount) + , Intercept(0.) + { + } + + const TVector<double>& GetCoefficients() const { + return Coefficients; + } + + double GetIntercept() const { + return Intercept; + } + + template <typename T> + double Prediction(const TVector<T>& features) const { + return InnerProduct(Coefficients, features, Intercept); + } }; diff --git a/library/cpp/linear_regression/linear_regression.cpp b/library/cpp/linear_regression/linear_regression.cpp index 18ae939893..150f9d214e 100644 --- a/library/cpp/linear_regression/linear_regression.cpp +++ b/library/cpp/linear_regression/linear_regression.cpp @@ -4,8 +4,8 @@ #include <util/generic/ymath.h> #ifdef _sse2_ -#include <emmintrin.h> -#include <xmmintrin.h> +#include <emmintrin.h> +#include <xmmintrin.h> #endif #include <algorithm> @@ -68,8 +68,8 @@ bool TLinearRegressionSolver::Add(const TVector<double>& features, const double LastMeans[featureNumber] = weight * (feature - featureMean); featureMean += weight * (feature - featureMean) / SumWeights.Get(); - NewMeans[featureNumber] = feature - featureMean; - ; + NewMeans[featureNumber] = feature - featureMean; + ; } double* olsMatrixElement = LinearizedOLSMatrix.data(); @@ -169,7 +169,7 @@ bool TSLRSolver::Add(const double feature, const double goal, const double weigh bool TSLRSolver::Add(const double* featuresBegin, const double* featuresEnd, - const double* goalsBegin) { + const double* goalsBegin) { for (; featuresBegin != featuresEnd; ++featuresBegin, ++goalsBegin) { Add(*featuresBegin, *goalsBegin); } @@ -178,7 +178,7 @@ bool TSLRSolver::Add(const double* featuresBegin, bool TSLRSolver::Add(const double* featuresBegin, const double* featuresEnd, const double* goalsBegin, - const double* weightsBegin) { + const double* weightsBegin) { for (; featuresBegin != featuresEnd; ++featuresBegin, ++goalsBegin, ++weightsBegin) { Add(*featuresBegin, *goalsBegin, *weightsBegin); } @@ -198,7 +198,7 @@ namespace { const double regularizationThreshold, const double regularizationParameter, TVector<double>& decompositionTrace, - TVector<TVector<double>>& decompositionMatrix) { + TVector<TVector<double>>& decompositionMatrix) { const size_t featuresCount = decompositionTrace.size(); size_t olsMatrixElementIdx = 0; @@ -239,7 +239,7 @@ namespace { void LDLDecomposition(const TVector<double>& linearizedOLSMatrix, TVector<double>& decompositionTrace, - TVector<TVector<double>>& decompositionMatrix) { + TVector<TVector<double>>& decompositionMatrix) { const double regularizationThreshold = 1e-5; double regularizationParameter = 0.; @@ -247,14 +247,14 @@ namespace { regularizationThreshold, regularizationParameter, decompositionTrace, - decompositionMatrix)) { + decompositionMatrix)) { regularizationParameter = regularizationParameter ? 2 * regularizationParameter : 1e-5; } } - TVector<double> SolveLower(const TVector<TVector<double>>& decompositionMatrix, + TVector<double> SolveLower(const TVector<TVector<double>>& decompositionMatrix, const TVector<double>& decompositionTrace, - const TVector<double>& olsVector) { + const TVector<double>& olsVector) { const size_t featuresCount = olsVector.size(); TVector<double> solution(featuresCount); @@ -275,8 +275,8 @@ namespace { return solution; } - TVector<double> SolveUpper(const TVector<TVector<double>>& decompositionMatrix, - const TVector<double>& lowerSolution) { + TVector<double> SolveUpper(const TVector<TVector<double>>& decompositionMatrix, + const TVector<double>& lowerSolution) { const size_t featuresCount = lowerSolution.size(); TVector<double> solution(featuresCount); @@ -297,7 +297,7 @@ namespace { const size_t featuresCount = olsVector.size(); TVector<double> decompositionTrace(featuresCount); - TVector<TVector<double>> decompositionMatrix(featuresCount, TVector<double>(featuresCount)); + TVector<TVector<double>> decompositionMatrix(featuresCount, TVector<double>(featuresCount)); LDLDecomposition(olsMatrix, decompositionTrace, decompositionMatrix); @@ -307,7 +307,7 @@ namespace { double SumSquaredErrors(const TVector<double>& olsMatrix, const TVector<double>& olsVector, const TVector<double>& solution, - const double goalsDeviation) { + const double goalsDeviation) { const size_t featuresCount = olsVector.size(); double sumSquaredErrors = goalsDeviation; @@ -335,7 +335,7 @@ namespace { for (; leftFeature != featuresEnd; ++leftFeature, ++matrixElement) { const double weightedFeature = weight * *leftFeature; const double* rightFeature = leftFeature; - __m128d wf = {weightedFeature, weightedFeature}; + __m128d wf = {weightedFeature, weightedFeature}; for (size_t i = 0; i < unaligned; ++i, ++rightFeature, ++matrixElement) { *matrixElement += weightedFeature * *rightFeature; } @@ -397,10 +397,10 @@ namespace { TFeaturesTransformer TFeaturesTransformerLearner::Solve(const size_t iterationsCount /* = 100 */) { TTransformationParameters transformationParameters; - auto updateParameter = [this, &transformationParameters](double TTransformationParameters::*parameter, + auto updateParameter = [this, &transformationParameters](double TTransformationParameters::*parameter, const double left, - const double right) { - auto evalParameter = [this, &transformationParameters, parameter](double parameterValue) { + const double right) { + auto evalParameter = [this, &transformationParameters, parameter](double parameterValue) { transformationParameters.*parameter = parameterValue; TFeaturesTransformer transformer(TransformationType, transformationParameters); diff --git a/library/cpp/linear_regression/linear_regression.h b/library/cpp/linear_regression/linear_regression.h index 4a2684f506..e57de5ff6c 100644 --- a/library/cpp/linear_regression/linear_regression.h +++ b/library/cpp/linear_regression/linear_regression.h @@ -15,7 +15,7 @@ private: TVector<double> LinearizedOLSMatrix; TVector<double> OLSVector; - + public: bool Add(const TVector<double>& features, const double goal, const double weight = 1.); TLinearModel Solve() const; @@ -35,7 +35,7 @@ private: TVector<double> OLSVector; TKahanAccumulator<double> SumWeights; - + public: bool Add(const TVector<double>& features, const double goal, const double weight = 1.); TLinearModel Solve() const; @@ -54,7 +54,7 @@ private: TStoreType SumProducts = TStoreType(); TStoreType SumWeights = TStoreType(); - + public: bool Add(const double feature, const double goal, const double weight = 1.) { SumFeatures += feature * weight; @@ -72,7 +72,7 @@ public: template <typename TFloatType> void Solve(TFloatType& factor, TFloatType& intercept, const double regularizationParameter = 0.1) const { - if (!(double)SumGoals) { + if (!(double)SumGoals) { factor = intercept = TFloatType(); return; } @@ -82,20 +82,20 @@ public: if (!featuresDeviation) { factor = TFloatType(); - intercept = (double)SumGoals / (double)SumWeights; + intercept = (double)SumGoals / (double)SumWeights; return; } factor = productsDeviation / (featuresDeviation + regularizationParameter); - intercept = (double)SumGoals / (double)SumWeights - factor * (double)SumFeatures / (double)SumWeights; + intercept = (double)SumGoals / (double)SumWeights - factor * (double)SumFeatures / (double)SumWeights; } double SumSquaredErrors(const double regularizationParameter = 0.1) const { - if (!(double)SumWeights) { + if (!(double)SumWeights) { return 0.; } - const double sumGoalSquaredDeviations = (double)SumSquaredGoals - (double)SumGoals / (double)SumWeights * (double)SumGoals; + const double sumGoalSquaredDeviations = (double)SumSquaredGoals - (double)SumGoals / (double)SumWeights * (double)SumGoals; double productsDeviation, featuresDeviation; SetupSolutionFactors(productsDeviation, featuresDeviation); @@ -108,24 +108,24 @@ public: const double sumSquaredErrors = factor * factor * featuresDeviation - 2 * factor * productsDeviation + sumGoalSquaredDeviations; return Max(0., sumSquaredErrors); } - + private: void SetupSolutionFactors(double& productsDeviation, double& featuresDeviation) const { - if (!(double)SumWeights) { + if (!(double)SumWeights) { productsDeviation = featuresDeviation = 0.; return; } - featuresDeviation = (double)SumSquaredFeatures - (double)SumFeatures / (double)SumWeights * (double)SumFeatures; + featuresDeviation = (double)SumSquaredFeatures - (double)SumFeatures / (double)SumWeights * (double)SumFeatures; if (!featuresDeviation) { return; } - productsDeviation = (double)SumProducts - (double)SumFeatures / (double)SumWeights * (double)SumGoals; + productsDeviation = (double)SumProducts - (double)SumFeatures / (double)SumWeights * (double)SumGoals; } }; using TFastSLRSolver = TTypedFastSLRSolver<double>; -using TKahanSLRSolver = TTypedFastSLRSolver<TKahanAccumulator<double>>; +using TKahanSLRSolver = TTypedFastSLRSolver<TKahanAccumulator<double>>; class TSLRSolver { private: @@ -138,7 +138,7 @@ private: TKahanAccumulator<double> SumWeights; double Covariation = 0.; - + public: bool Add(const double feature, const double goal, const double weight = 1.); @@ -178,7 +178,7 @@ template <typename TSLRSolverType> class TTypedBestSLRSolver { private: TVector<TSLRSolverType> SLRSolvers; - + public: bool Add(const TVector<double>& features, const double goal, const double weight = 1.) { if (SLRSolvers.empty()) { @@ -240,16 +240,16 @@ struct TTransformationParameters { double FeatureNormalizer = 1.; Y_SAVELOAD_DEFINE(RegressionFactor, - RegressionIntercept, - FeatureOffset, - FeatureNormalizer); + RegressionIntercept, + FeatureOffset, + FeatureNormalizer); }; class TFeaturesTransformer { private: ETransformationType TransformationType; TTransformationParameters TransformationParameters; - + public: Y_SAVELOAD_DEFINE(TransformationType, TransformationParameters); @@ -264,10 +264,10 @@ public: double Transformation(const double value) const { switch (TransformationType) { - case ETransformationType::TT_IDENTITY: { + case ETransformationType::TT_IDENTITY: { return value; } - case ETransformationType::TT_SIGMA: { + case ETransformationType::TT_SIGMA: { const double valueWithoutOffset = value - TransformationParameters.FeatureOffset; const double transformedValue = valueWithoutOffset / (fabs(valueWithoutOffset) + TransformationParameters.FeatureNormalizer); return TransformationParameters.RegressionIntercept + TransformationParameters.RegressionFactor * transformedValue; @@ -290,7 +290,7 @@ private: ETransformationType TransformationType; TVector<TPoint> Points; - + public: TFeaturesTransformerLearner(const ETransformationType transformationType) : TransformationType(transformationType) @@ -317,7 +317,7 @@ private: THashMap<double, TBucket> Buckets; double Step; - + public: TFastFeaturesTransformerLearner(const ETransformationType transformationType, const double step = 0.1) : TransformationType(transformationType) diff --git a/library/cpp/linear_regression/linear_regression_ut.cpp b/library/cpp/linear_regression/linear_regression_ut.cpp index d6e179bc0c..e71a16b67a 100644 --- a/library/cpp/linear_regression/linear_regression_ut.cpp +++ b/library/cpp/linear_regression/linear_regression_ut.cpp @@ -3,7 +3,7 @@ #include <util/generic/vector.h> #include <util/generic/ymath.h> -#include <util/random/random.h> +#include <util/random/random.h> #include <util/system/defaults.h> @@ -200,14 +200,14 @@ Y_UNIT_TEST_SUITE(TLinearRegressionTest) { } const double intercept = 10; - TVector<TVector<double>> featuresMatrix; + TVector<TVector<double>> featuresMatrix; TVector<double> goals; TVector<double> weights; for (size_t instanceNumber = 0; instanceNumber < instancesCount; ++instanceNumber) { TVector<double> features; for (size_t featureNumber = 0; featureNumber < featuresCount; ++featureNumber) { - features.push_back(RandomNumber<double>()); + features.push_back(RandomNumber<double>()); } featuresMatrix.push_back(features); diff --git a/library/cpp/linear_regression/welford.h b/library/cpp/linear_regression/welford.h index 89b5ce39ea..ee865d6693 100644 --- a/library/cpp/linear_regression/welford.h +++ b/library/cpp/linear_regression/welford.h @@ -9,7 +9,7 @@ class TMeanCalculator { private: double Mean = 0.; TKahanAccumulator<double> SumWeights; - + public: Y_SAVELOAD_DEFINE(Mean, SumWeights); @@ -20,11 +20,11 @@ public: double GetSumWeights() const; void Reset(); - bool operator<(const TMeanCalculator& other) const { + bool operator<(const TMeanCalculator& other) const { return Mean < other.Mean; } - bool operator>(const TMeanCalculator& other) const { + bool operator>(const TMeanCalculator& other) const { return Mean > other.Mean; } }; @@ -38,7 +38,7 @@ private: double SecondValueMean = 0.; TKahanAccumulator<double> SumWeights; - + public: Y_SAVELOAD_DEFINE(Covariation, FirstValueMean, SecondValueMean, SumWeights); @@ -60,7 +60,7 @@ class TDeviationCalculator { private: double Deviation = 0.; TMeanCalculator MeanCalculator; - + public: Y_SAVELOAD_DEFINE(Deviation, MeanCalculator); |