diff options
author | Vlad Yaroslavlev <vladon@vladon.com> | 2022-02-10 16:46:25 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:25 +0300 |
commit | 344ea37b4a345701ab0e67de2266a1c1bd7baf2d (patch) | |
tree | 1a2c5ffcf89eb53ecd79dbc9bc0a195c27404d0c /library/cpp/linear_regression | |
parent | 706b83ed7de5a473436620367af31fc0ceecde07 (diff) | |
download | ydb-344ea37b4a345701ab0e67de2266a1c1bd7baf2d.tar.gz |
Restoring authorship annotation for Vlad Yaroslavlev <vladon@vladon.com>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/linear_regression')
-rw-r--r-- | library/cpp/linear_regression/benchmark/pool.cpp | 10 | ||||
-rw-r--r-- | library/cpp/linear_regression/benchmark/pool.h | 10 | ||||
-rw-r--r-- | library/cpp/linear_regression/linear_regression.cpp | 58 | ||||
-rw-r--r-- | library/cpp/linear_regression/linear_regression.h | 32 | ||||
-rw-r--r-- | library/cpp/linear_regression/linear_regression_ut.cpp | 32 | ||||
-rw-r--r-- | library/cpp/linear_regression/unimodal.cpp | 14 | ||||
-rw-r--r-- | library/cpp/linear_regression/unimodal.h | 12 |
7 files changed, 84 insertions, 84 deletions
diff --git a/library/cpp/linear_regression/benchmark/pool.cpp b/library/cpp/linear_regression/benchmark/pool.cpp index 5e014e575c..7f2c6a7004 100644 --- a/library/cpp/linear_regression/benchmark/pool.cpp +++ b/library/cpp/linear_regression/benchmark/pool.cpp @@ -3,7 +3,7 @@ #include <util/string/cast.h> #include <util/stream/file.h> -TInstance TInstance::FromFeaturesString(const TString& featuresString) { +TInstance TInstance::FromFeaturesString(const TString& featuresString) { TInstance instance; TStringBuf featuresStringBuf(featuresString); @@ -29,7 +29,7 @@ TPool::TCVIterator::TCVIterator(const TPool& parentPool, const size_t foldsCount } void TPool::TCVIterator::ResetShuffle() { - TVector<size_t> instanceNumbers(ParentPool.size()); + TVector<size_t> instanceNumbers(ParentPool.size()); for (size_t instanceNumber = 0; instanceNumber < ParentPool.size(); ++instanceNumber) { instanceNumbers[instanceNumber] = instanceNumber; } @@ -83,9 +83,9 @@ bool TPool::TCVIterator::TakeCurrent() const { return false; } -void TPool::ReadFromFeatures(const TString& featuresPath) { - TFileInput featuresIn(featuresPath); - TString featuresString; +void TPool::ReadFromFeatures(const TString& featuresPath) { + TFileInput featuresIn(featuresPath); + TString featuresString; while (featuresIn.ReadLine(featuresString)) { this->push_back(TInstance::FromFeaturesString(featuresString)); } diff --git a/library/cpp/linear_regression/benchmark/pool.h b/library/cpp/linear_regression/benchmark/pool.h index 88140b7dd1..43288319c8 100644 --- a/library/cpp/linear_regression/benchmark/pool.h +++ b/library/cpp/linear_regression/benchmark/pool.h @@ -1,17 +1,17 @@ #pragma once #include <util/generic/vector.h> -#include <util/generic/string.h> +#include <util/generic/string.h> #include <util/random/mersenne.h> #include <util/random/shuffle.h> struct TInstance { - TVector<double> Features; + TVector<double> Features; double Goal; double Weight; - static TInstance FromFeaturesString(const TString& featuresString); + static TInstance FromFeaturesString(const TString& featuresString); }; struct TPool: public TVector<TInstance> { @@ -29,7 +29,7 @@ struct TPool: public TVector<TInstance> { EIteratorType IteratorType; size_t TestFoldNumber; - TVector<size_t> InstanceFoldNumbers; + TVector<size_t> InstanceFoldNumbers; const size_t* Current; TMersenne<ui64> RandomGenerator; @@ -54,7 +54,7 @@ struct TPool: public TVector<TInstance> { bool TakeCurrent() const; }; - void ReadFromFeatures(const TString& featuresPath); + void ReadFromFeatures(const TString& featuresPath); TCVIterator CrossValidationIterator(const size_t foldsCount, const EIteratorType iteratorType) const; TPool InjurePool(const double injureFactir, const double injureOffset) const; diff --git a/library/cpp/linear_regression/linear_regression.cpp b/library/cpp/linear_regression/linear_regression.cpp index 6fc5a40178..150f9d214e 100644 --- a/library/cpp/linear_regression/linear_regression.cpp +++ b/library/cpp/linear_regression/linear_regression.cpp @@ -12,17 +12,17 @@ #include <functional> namespace { - inline void AddFeaturesProduct(const double weight, const TVector<double>& features, TVector<double>& linearizedOLSTriangleMatrix); + inline void AddFeaturesProduct(const double weight, const TVector<double>& features, TVector<double>& linearizedOLSTriangleMatrix); - TVector<double> Solve(const TVector<double>& olsMatrix, const TVector<double>& olsVector); + TVector<double> Solve(const TVector<double>& olsMatrix, const TVector<double>& olsVector); - double SumSquaredErrors(const TVector<double>& olsMatrix, - const TVector<double>& olsVector, - const TVector<double>& solution, + double SumSquaredErrors(const TVector<double>& olsMatrix, + const TVector<double>& olsVector, + const TVector<double>& solution, const double goalsDeviation); } -bool TFastLinearRegressionSolver::Add(const TVector<double>& features, const double goal, const double weight) { +bool TFastLinearRegressionSolver::Add(const TVector<double>& features, const double goal, const double weight) { const size_t featuresCount = features.size(); if (LinearizedOLSMatrix.empty()) { @@ -45,7 +45,7 @@ bool TFastLinearRegressionSolver::Add(const TVector<double>& features, const dou return true; } -bool TLinearRegressionSolver::Add(const TVector<double>& features, const double goal, const double weight) { +bool TLinearRegressionSolver::Add(const TVector<double>& features, const double goal, const double weight) { const size_t featuresCount = features.size(); if (FeatureMeans.empty()) { @@ -114,7 +114,7 @@ bool TLinearRegressionSolver::Add(const TVector<double>& features, const double } TLinearModel TFastLinearRegressionSolver::Solve() const { - TVector<double> coefficients = ::Solve(LinearizedOLSMatrix, OLSVector); + TVector<double> coefficients = ::Solve(LinearizedOLSMatrix, OLSVector); double intercept = 0.; if (!coefficients.empty()) { @@ -126,7 +126,7 @@ TLinearModel TFastLinearRegressionSolver::Solve() const { } TLinearModel TLinearRegressionSolver::Solve() const { - TVector<double> coefficients = ::Solve(LinearizedOLSMatrix, OLSVector); + TVector<double> coefficients = ::Solve(LinearizedOLSMatrix, OLSVector); double intercept = GoalsMean; const size_t featuresCount = OLSVector.size(); @@ -138,12 +138,12 @@ TLinearModel TLinearRegressionSolver::Solve() const { } double TFastLinearRegressionSolver::SumSquaredErrors() const { - const TVector<double> coefficients = ::Solve(LinearizedOLSMatrix, OLSVector); + const TVector<double> coefficients = ::Solve(LinearizedOLSMatrix, OLSVector); return ::SumSquaredErrors(LinearizedOLSMatrix, OLSVector, coefficients, SumSquaredGoals.Get()); } double TLinearRegressionSolver::SumSquaredErrors() const { - const TVector<double> coefficients = ::Solve(LinearizedOLSMatrix, OLSVector); + const TVector<double> coefficients = ::Solve(LinearizedOLSMatrix, OLSVector); return ::SumSquaredErrors(LinearizedOLSMatrix, OLSVector, coefficients, GoalsDeviation); } @@ -194,10 +194,10 @@ double TSLRSolver::SumSquaredErrors(const double regularizationParameter) const namespace { // LDL matrix decomposition, see http://en.wikipedia.org/wiki/Cholesky_decomposition#LDL_decomposition_2 - bool LDLDecomposition(const TVector<double>& linearizedOLSMatrix, + bool LDLDecomposition(const TVector<double>& linearizedOLSMatrix, const double regularizationThreshold, const double regularizationParameter, - TVector<double>& decompositionTrace, + TVector<double>& decompositionTrace, TVector<TVector<double>>& decompositionMatrix) { const size_t featuresCount = decompositionTrace.size(); @@ -206,7 +206,7 @@ namespace { double& decompositionTraceElement = decompositionTrace[rowNumber]; decompositionTraceElement = linearizedOLSMatrix[olsMatrixElementIdx] + regularizationParameter; - TVector<double>& decompositionRow = decompositionMatrix[rowNumber]; + TVector<double>& decompositionRow = decompositionMatrix[rowNumber]; for (size_t i = 0; i < rowNumber; ++i) { decompositionTraceElement -= decompositionRow[i] * decompositionRow[i] * decompositionTrace[i]; } @@ -218,7 +218,7 @@ namespace { ++olsMatrixElementIdx; decompositionRow[rowNumber] = 1.; for (size_t columnNumber = rowNumber + 1; columnNumber < featuresCount; ++columnNumber) { - TVector<double>& secondDecompositionRow = decompositionMatrix[columnNumber]; + TVector<double>& secondDecompositionRow = decompositionMatrix[columnNumber]; double& decompositionMatrixElement = secondDecompositionRow[rowNumber]; decompositionMatrixElement = linearizedOLSMatrix[olsMatrixElementIdx]; @@ -237,8 +237,8 @@ namespace { return true; } - void LDLDecomposition(const TVector<double>& linearizedOLSMatrix, - TVector<double>& decompositionTrace, + void LDLDecomposition(const TVector<double>& linearizedOLSMatrix, + TVector<double>& decompositionTrace, TVector<TVector<double>>& decompositionMatrix) { const double regularizationThreshold = 1e-5; double regularizationParameter = 0.; @@ -253,16 +253,16 @@ namespace { } TVector<double> SolveLower(const TVector<TVector<double>>& decompositionMatrix, - const TVector<double>& decompositionTrace, + const TVector<double>& decompositionTrace, const TVector<double>& olsVector) { const size_t featuresCount = olsVector.size(); - TVector<double> solution(featuresCount); + TVector<double> solution(featuresCount); for (size_t featureNumber = 0; featureNumber < featuresCount; ++featureNumber) { double& solutionElement = solution[featureNumber]; solutionElement = olsVector[featureNumber]; - const TVector<double>& decompositionRow = decompositionMatrix[featureNumber]; + const TVector<double>& decompositionRow = decompositionMatrix[featureNumber]; for (size_t i = 0; i < featureNumber; ++i) { solutionElement -= solution[i] * decompositionRow[i]; } @@ -279,12 +279,12 @@ namespace { const TVector<double>& lowerSolution) { const size_t featuresCount = lowerSolution.size(); - TVector<double> solution(featuresCount); + TVector<double> solution(featuresCount); for (size_t featureNumber = featuresCount; featureNumber > 0; --featureNumber) { double& solutionElement = solution[featureNumber - 1]; solutionElement = lowerSolution[featureNumber - 1]; - const TVector<double>& decompositionRow = decompositionMatrix[featureNumber - 1]; + const TVector<double>& decompositionRow = decompositionMatrix[featureNumber - 1]; for (size_t i = featureNumber; i < featuresCount; ++i) { solutionElement -= solution[i] * decompositionRow[i]; } @@ -293,10 +293,10 @@ namespace { return solution; } - TVector<double> Solve(const TVector<double>& olsMatrix, const TVector<double>& olsVector) { + TVector<double> Solve(const TVector<double>& olsMatrix, const TVector<double>& olsVector) { const size_t featuresCount = olsVector.size(); - TVector<double> decompositionTrace(featuresCount); + TVector<double> decompositionTrace(featuresCount); TVector<TVector<double>> decompositionMatrix(featuresCount, TVector<double>(featuresCount)); LDLDecomposition(olsMatrix, decompositionTrace, decompositionMatrix); @@ -304,9 +304,9 @@ namespace { return SolveUpper(decompositionMatrix, SolveLower(decompositionMatrix, decompositionTrace, olsVector)); } - double SumSquaredErrors(const TVector<double>& olsMatrix, - const TVector<double>& olsVector, - const TVector<double>& solution, + double SumSquaredErrors(const TVector<double>& olsMatrix, + const TVector<double>& olsVector, + const TVector<double>& solution, const double goalsDeviation) { const size_t featuresCount = olsVector.size(); @@ -325,7 +325,7 @@ namespace { } #ifdef _sse2_ - inline void AddFeaturesProduct(const double weight, const TVector<double>& features, TVector<double>& linearizedOLSTriangleMatrix) { + inline void AddFeaturesProduct(const double weight, const TVector<double>& features, TVector<double>& linearizedOLSTriangleMatrix) { const double* leftFeature = features.data(); const double* featuresEnd = features.data() + features.size(); double* matrixElement = linearizedOLSTriangleMatrix.data(); @@ -351,7 +351,7 @@ namespace { linearizedOLSTriangleMatrix.back() += weight; } #else - inline void AddFeaturesProduct(const double weight, const TVector<double>& features, TVector<double>& linearizedTriangleMatrix) { + inline void AddFeaturesProduct(const double weight, const TVector<double>& features, TVector<double>& linearizedTriangleMatrix) { const double* leftFeature = features.data(); const double* featuresEnd = features.data() + features.size(); double* matrixElement = linearizedTriangleMatrix.data(); diff --git a/library/cpp/linear_regression/linear_regression.h b/library/cpp/linear_regression/linear_regression.h index f1596fb024..e57de5ff6c 100644 --- a/library/cpp/linear_regression/linear_regression.h +++ b/library/cpp/linear_regression/linear_regression.h @@ -13,11 +13,11 @@ class TFastLinearRegressionSolver { private: TKahanAccumulator<double> SumSquaredGoals; - TVector<double> LinearizedOLSMatrix; - TVector<double> OLSVector; + TVector<double> LinearizedOLSMatrix; + TVector<double> OLSVector; public: - bool Add(const TVector<double>& features, const double goal, const double weight = 1.); + bool Add(const TVector<double>& features, const double goal, const double weight = 1.); TLinearModel Solve() const; double SumSquaredErrors() const; }; @@ -27,17 +27,17 @@ private: double GoalsMean = 0.; double GoalsDeviation = 0.; - TVector<double> FeatureMeans; - TVector<double> LastMeans; - TVector<double> NewMeans; - TVector<double> LinearizedOLSMatrix; + TVector<double> FeatureMeans; + TVector<double> LastMeans; + TVector<double> NewMeans; + TVector<double> LinearizedOLSMatrix; - TVector<double> OLSVector; + TVector<double> OLSVector; TKahanAccumulator<double> SumWeights; public: - bool Add(const TVector<double>& features, const double goal, const double weight = 1.); + bool Add(const TVector<double>& features, const double goal, const double weight = 1.); TLinearModel Solve() const; double SumSquaredErrors() const; }; @@ -145,12 +145,12 @@ public: bool Add(const double* featuresBegin, const double* featuresEnd, const double* goalsBegin); bool Add(const double* featuresBegin, const double* featuresEnd, const double* goalsBegin, const double* weightsBegin); - bool Add(const TVector<double>& features, const TVector<double>& goals) { + bool Add(const TVector<double>& features, const TVector<double>& goals) { Y_ASSERT(features.size() == goals.size()); return Add(features.data(), features.data() + features.size(), goals.data()); } - bool Add(const TVector<double>& features, const TVector<double>& goals, const TVector<double>& weights) { + bool Add(const TVector<double>& features, const TVector<double>& goals, const TVector<double>& weights) { Y_ASSERT(features.size() == goals.size() && features.size() == weights.size()); return Add(features.data(), features.data() + features.size(), goals.data(), weights.data()); } @@ -177,10 +177,10 @@ public: template <typename TSLRSolverType> class TTypedBestSLRSolver { private: - TVector<TSLRSolverType> SLRSolvers; + TVector<TSLRSolverType> SLRSolvers; public: - bool Add(const TVector<double>& features, const double goal, const double weight = 1.) { + bool Add(const TVector<double>& features, const double goal, const double weight = 1.) { if (SLRSolvers.empty()) { SLRSolvers.resize(features.size()); } @@ -200,7 +200,7 @@ public: } } - TVector<double> coefficients(SLRSolvers.size()); + TVector<double> coefficients(SLRSolvers.size()); double intercept = 0.0; if (bestSolver) { bestSolver->Solve(coefficients[bestSolver - SLRSolvers.begin()], intercept, regularizationParameter); @@ -289,7 +289,7 @@ private: float MaximalArgument = Min<float>(); ETransformationType TransformationType; - TVector<TPoint> Points; + TVector<TPoint> Points; public: TFeaturesTransformerLearner(const ETransformationType transformationType) @@ -315,7 +315,7 @@ private: TMeanCalculator TargetsMean; }; - THashMap<double, TBucket> Buckets; + THashMap<double, TBucket> Buckets; double Step; public: diff --git a/library/cpp/linear_regression/linear_regression_ut.cpp b/library/cpp/linear_regression/linear_regression_ut.cpp index 6915c3821d..e71a16b67a 100644 --- a/library/cpp/linear_regression/linear_regression_ut.cpp +++ b/library/cpp/linear_regression/linear_regression_ut.cpp @@ -15,8 +15,8 @@ namespace { Y_UNIT_TEST_SUITE(TLinearRegressionTest) { Y_UNIT_TEST(MeanAndDeviationTest) { - TVector<double> arguments; - TVector<double> weights; + TVector<double> arguments; + TVector<double> weights; const size_t argumentsCount = 100; for (size_t i = 0; i < argumentsCount; ++i) { @@ -78,9 +78,9 @@ Y_UNIT_TEST_SUITE(TLinearRegressionTest) { } Y_UNIT_TEST(CovariationTest) { - TVector<double> firstValues; - TVector<double> secondValues; - TVector<double> weights; + TVector<double> firstValues; + TVector<double> secondValues; + TVector<double> weights; const size_t argumentsCount = 100; for (size_t i = 0; i < argumentsCount; ++i) { @@ -130,9 +130,9 @@ Y_UNIT_TEST_SUITE(TLinearRegressionTest) { template <typename TSLRSolverType> void SLRTest() { - TVector<double> arguments; - TVector<double> weights; - TVector<double> goals; + TVector<double> arguments; + TVector<double> weights; + TVector<double> goals; const double factor = 2.; const double intercept = 105.; @@ -194,18 +194,18 @@ Y_UNIT_TEST_SUITE(TLinearRegressionTest) { const size_t instancesCount = 10000; const double randomError = 0.01; - TVector<double> coefficients; + TVector<double> coefficients; for (size_t featureNumber = 0; featureNumber < featuresCount; ++featureNumber) { coefficients.push_back(featureNumber); } const double intercept = 10; TVector<TVector<double>> featuresMatrix; - TVector<double> goals; - TVector<double> weights; + TVector<double> goals; + TVector<double> weights; for (size_t instanceNumber = 0; instanceNumber < instancesCount; ++instanceNumber) { - TVector<double> features; + TVector<double> features; for (size_t featureNumber = 0; featureNumber < featuresCount; ++featureNumber) { features.push_back(RandomNumber<double>()); } @@ -240,8 +240,8 @@ Y_UNIT_TEST_SUITE(TLinearRegressionTest) { } void TransformationTest(const ETransformationType transformationType, const size_t pointsCount) { - TVector<float> arguments; - TVector<float> goals; + TVector<float> arguments; + TVector<float> goals; const double regressionFactor = 10.; const double regressionIntercept = 100; @@ -300,8 +300,8 @@ Y_UNIT_TEST_SUITE(TLinearRegressionTest) { } Y_UNIT_TEST(ResetCalculatorTest) { - TVector<double> arguments; - TVector<double> weights; + TVector<double> arguments; + TVector<double> weights; const double eps = 1e-10; const size_t argumentsCount = 100; diff --git a/library/cpp/linear_regression/unimodal.cpp b/library/cpp/linear_regression/unimodal.cpp index bbca9e9463..729011012a 100644 --- a/library/cpp/linear_regression/unimodal.cpp +++ b/library/cpp/linear_regression/unimodal.cpp @@ -22,7 +22,7 @@ namespace { double SSE = 0.; - TOptimizationState(const TVector<double>& values) { + TOptimizationState(const TVector<double>& values) { SSE = InnerProduct(values, values); } @@ -44,7 +44,7 @@ double TGreedyParams::Point(const size_t step) const { return LowerBound * (1 - alpha) + UpperBound * alpha; } -double MakeUnimodal(TVector<double>& values, const TOptimizationParams& optimizationParams) { +double MakeUnimodal(TVector<double>& values, const TOptimizationParams& optimizationParams) { TOptimizationState state(values); TOptimizationState bestState = state; @@ -80,19 +80,19 @@ double MakeUnimodal(TVector<double>& values, const TOptimizationParams& optimiza return determination; } -double MakeUnimodal(TVector<double>& values) { +double MakeUnimodal(TVector<double>& values) { return MakeUnimodal(values, TOptimizationParams::Default(values)); } -double MakeUnimodal(TVector<double>& values, const TVector<double>& arguments, const TOptimizationParams& optimizationParams) { +double MakeUnimodal(TVector<double>& values, const TVector<double>& arguments, const TOptimizationParams& optimizationParams) { Y_ASSERT(values.size() == arguments.size()); - TMap<double, double> mapping; + TMap<double, double> mapping; for (size_t i = 0; i < values.size(); ++i) { mapping[arguments[i]] = values[i]; } - TVector<double> preparedValues; + TVector<double> preparedValues; preparedValues.reserve(mapping.size()); for (auto&& argWithValue : mapping) { @@ -113,6 +113,6 @@ double MakeUnimodal(TVector<double>& values, const TVector<double>& arguments, c return result; } -double MakeUnimodal(TVector<double>& values, const TVector<double>& arguments) { +double MakeUnimodal(TVector<double>& values, const TVector<double>& arguments) { return MakeUnimodal(values, arguments, TOptimizationParams::Default(values, arguments)); } diff --git a/library/cpp/linear_regression/unimodal.h b/library/cpp/linear_regression/unimodal.h index 21d6ae8782..e11b1118f6 100644 --- a/library/cpp/linear_regression/unimodal.h +++ b/library/cpp/linear_regression/unimodal.h @@ -21,7 +21,7 @@ struct TOptimizationParams { TOptimizationParams() = default; - static TOptimizationParams Default(const TVector<double>& values) { + static TOptimizationParams Default(const TVector<double>& values) { TOptimizationParams optimizationParams; optimizationParams.ModeParams.LowerBound = 0; @@ -35,7 +35,7 @@ struct TOptimizationParams { return optimizationParams; } - static TOptimizationParams Default(const TVector<double>& values, const TVector<double>& arguments) { + static TOptimizationParams Default(const TVector<double>& values, const TVector<double>& arguments) { Y_ASSERT(values.size() == arguments.size()); TOptimizationParams optimizationParams; @@ -52,8 +52,8 @@ struct TOptimizationParams { } }; -double MakeUnimodal(TVector<double>& values, const TOptimizationParams& optimizationParams); -double MakeUnimodal(TVector<double>& values); +double MakeUnimodal(TVector<double>& values, const TOptimizationParams& optimizationParams); +double MakeUnimodal(TVector<double>& values); -double MakeUnimodal(TVector<double>& values, const TVector<double>& arguments, const TOptimizationParams& optimizationParams); -double MakeUnimodal(TVector<double>& values, const TVector<double>& arguments); +double MakeUnimodal(TVector<double>& values, const TVector<double>& arguments, const TOptimizationParams& optimizationParams); +double MakeUnimodal(TVector<double>& values, const TVector<double>& arguments); |