aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/linear_regression
diff options
context:
space:
mode:
authorAnton Samokhvalov <pg83@yandex.ru>2022-02-10 16:45:17 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:17 +0300
commitd3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch)
treedd4bd3ca0f36b817e96812825ffaf10d645803f2 /library/cpp/linear_regression
parent72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff)
downloadydb-d3a398281c6fd1d3672036cb2d63f842d2cb28c5.tar.gz
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/linear_regression')
-rw-r--r--library/cpp/linear_regression/benchmark/pool.cpp10
-rw-r--r--library/cpp/linear_regression/benchmark/pool.h10
-rw-r--r--library/cpp/linear_regression/linear_model.h62
-rw-r--r--library/cpp/linear_regression/linear_regression.cpp38
-rw-r--r--library/cpp/linear_regression/linear_regression.h46
-rw-r--r--library/cpp/linear_regression/linear_regression_ut.cpp6
-rw-r--r--library/cpp/linear_regression/welford.h10
7 files changed, 91 insertions, 91 deletions
diff --git a/library/cpp/linear_regression/benchmark/pool.cpp b/library/cpp/linear_regression/benchmark/pool.cpp
index 12b547be89..7f2c6a7004 100644
--- a/library/cpp/linear_regression/benchmark/pool.cpp
+++ b/library/cpp/linear_regression/benchmark/pool.cpp
@@ -51,7 +51,7 @@ bool TPool::TCVIterator::IsValid() const {
return Current != InstanceFoldNumbers.end();
}
-const TInstance& TPool::TCVIterator::operator*() const {
+const TInstance& TPool::TCVIterator::operator*() const {
return ParentPool[Current - InstanceFoldNumbers.begin()];
}
@@ -75,10 +75,10 @@ void TPool::TCVIterator::Advance() {
bool TPool::TCVIterator::TakeCurrent() const {
switch (IteratorType) {
- case LearnIterator:
- return *Current != TestFoldNumber;
- case TestIterator:
- return *Current == TestFoldNumber;
+ case LearnIterator:
+ return *Current != TestFoldNumber;
+ case TestIterator:
+ return *Current == TestFoldNumber;
}
return false;
}
diff --git a/library/cpp/linear_regression/benchmark/pool.h b/library/cpp/linear_regression/benchmark/pool.h
index 4dcf7d7e9e..43288319c8 100644
--- a/library/cpp/linear_regression/benchmark/pool.h
+++ b/library/cpp/linear_regression/benchmark/pool.h
@@ -14,7 +14,7 @@ struct TInstance {
static TInstance FromFeaturesString(const TString& featuresString);
};
-struct TPool: public TVector<TInstance> {
+struct TPool: public TVector<TInstance> {
enum EIteratorType {
LearnIterator,
TestIterator,
@@ -33,7 +33,7 @@ struct TPool: public TVector<TInstance> {
const size_t* Current;
TMersenne<ui64> RandomGenerator;
-
+
public:
TCVIterator(const TPool& parentPool,
const size_t foldsCount,
@@ -45,10 +45,10 @@ struct TPool: public TVector<TInstance> {
bool IsValid() const;
- const TInstance& operator*() const;
- const TInstance* operator->() const;
+ const TInstance& operator*() const;
+ const TInstance* operator->() const;
TPool::TCVIterator& operator++();
-
+
private:
void Advance();
bool TakeCurrent() const;
diff --git a/library/cpp/linear_regression/linear_model.h b/library/cpp/linear_regression/linear_model.h
index 551bfe36dc..8bb050cff7 100644
--- a/library/cpp/linear_regression/linear_model.h
+++ b/library/cpp/linear_regression/linear_model.h
@@ -8,35 +8,35 @@
#include <utility>
class TLinearModel {
-private:
- TVector<double> Coefficients;
- double Intercept;
-
-public:
- Y_SAVELOAD_DEFINE(Coefficients, Intercept);
-
- TLinearModel(TVector<double>&& coefficients, const double intercept)
- : Coefficients(std::move(coefficients))
- , Intercept(intercept)
- {
- }
-
- explicit TLinearModel(size_t featuresCount = 0)
- : Coefficients(featuresCount)
- , Intercept(0.)
- {
- }
-
- const TVector<double>& GetCoefficients() const {
- return Coefficients;
- }
-
- double GetIntercept() const {
- return Intercept;
- }
-
- template <typename T>
- double Prediction(const TVector<T>& features) const {
- return InnerProduct(Coefficients, features, Intercept);
- }
+private:
+ TVector<double> Coefficients;
+ double Intercept;
+
+public:
+ Y_SAVELOAD_DEFINE(Coefficients, Intercept);
+
+ TLinearModel(TVector<double>&& coefficients, const double intercept)
+ : Coefficients(std::move(coefficients))
+ , Intercept(intercept)
+ {
+ }
+
+ explicit TLinearModel(size_t featuresCount = 0)
+ : Coefficients(featuresCount)
+ , Intercept(0.)
+ {
+ }
+
+ const TVector<double>& GetCoefficients() const {
+ return Coefficients;
+ }
+
+ double GetIntercept() const {
+ return Intercept;
+ }
+
+ template <typename T>
+ double Prediction(const TVector<T>& features) const {
+ return InnerProduct(Coefficients, features, Intercept);
+ }
};
diff --git a/library/cpp/linear_regression/linear_regression.cpp b/library/cpp/linear_regression/linear_regression.cpp
index 18ae939893..150f9d214e 100644
--- a/library/cpp/linear_regression/linear_regression.cpp
+++ b/library/cpp/linear_regression/linear_regression.cpp
@@ -4,8 +4,8 @@
#include <util/generic/ymath.h>
#ifdef _sse2_
-#include <emmintrin.h>
-#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <xmmintrin.h>
#endif
#include <algorithm>
@@ -68,8 +68,8 @@ bool TLinearRegressionSolver::Add(const TVector<double>& features, const double
LastMeans[featureNumber] = weight * (feature - featureMean);
featureMean += weight * (feature - featureMean) / SumWeights.Get();
- NewMeans[featureNumber] = feature - featureMean;
- ;
+ NewMeans[featureNumber] = feature - featureMean;
+ ;
}
double* olsMatrixElement = LinearizedOLSMatrix.data();
@@ -169,7 +169,7 @@ bool TSLRSolver::Add(const double feature, const double goal, const double weigh
bool TSLRSolver::Add(const double* featuresBegin,
const double* featuresEnd,
- const double* goalsBegin) {
+ const double* goalsBegin) {
for (; featuresBegin != featuresEnd; ++featuresBegin, ++goalsBegin) {
Add(*featuresBegin, *goalsBegin);
}
@@ -178,7 +178,7 @@ bool TSLRSolver::Add(const double* featuresBegin,
bool TSLRSolver::Add(const double* featuresBegin,
const double* featuresEnd,
const double* goalsBegin,
- const double* weightsBegin) {
+ const double* weightsBegin) {
for (; featuresBegin != featuresEnd; ++featuresBegin, ++goalsBegin, ++weightsBegin) {
Add(*featuresBegin, *goalsBegin, *weightsBegin);
}
@@ -198,7 +198,7 @@ namespace {
const double regularizationThreshold,
const double regularizationParameter,
TVector<double>& decompositionTrace,
- TVector<TVector<double>>& decompositionMatrix) {
+ TVector<TVector<double>>& decompositionMatrix) {
const size_t featuresCount = decompositionTrace.size();
size_t olsMatrixElementIdx = 0;
@@ -239,7 +239,7 @@ namespace {
void LDLDecomposition(const TVector<double>& linearizedOLSMatrix,
TVector<double>& decompositionTrace,
- TVector<TVector<double>>& decompositionMatrix) {
+ TVector<TVector<double>>& decompositionMatrix) {
const double regularizationThreshold = 1e-5;
double regularizationParameter = 0.;
@@ -247,14 +247,14 @@ namespace {
regularizationThreshold,
regularizationParameter,
decompositionTrace,
- decompositionMatrix)) {
+ decompositionMatrix)) {
regularizationParameter = regularizationParameter ? 2 * regularizationParameter : 1e-5;
}
}
- TVector<double> SolveLower(const TVector<TVector<double>>& decompositionMatrix,
+ TVector<double> SolveLower(const TVector<TVector<double>>& decompositionMatrix,
const TVector<double>& decompositionTrace,
- const TVector<double>& olsVector) {
+ const TVector<double>& olsVector) {
const size_t featuresCount = olsVector.size();
TVector<double> solution(featuresCount);
@@ -275,8 +275,8 @@ namespace {
return solution;
}
- TVector<double> SolveUpper(const TVector<TVector<double>>& decompositionMatrix,
- const TVector<double>& lowerSolution) {
+ TVector<double> SolveUpper(const TVector<TVector<double>>& decompositionMatrix,
+ const TVector<double>& lowerSolution) {
const size_t featuresCount = lowerSolution.size();
TVector<double> solution(featuresCount);
@@ -297,7 +297,7 @@ namespace {
const size_t featuresCount = olsVector.size();
TVector<double> decompositionTrace(featuresCount);
- TVector<TVector<double>> decompositionMatrix(featuresCount, TVector<double>(featuresCount));
+ TVector<TVector<double>> decompositionMatrix(featuresCount, TVector<double>(featuresCount));
LDLDecomposition(olsMatrix, decompositionTrace, decompositionMatrix);
@@ -307,7 +307,7 @@ namespace {
double SumSquaredErrors(const TVector<double>& olsMatrix,
const TVector<double>& olsVector,
const TVector<double>& solution,
- const double goalsDeviation) {
+ const double goalsDeviation) {
const size_t featuresCount = olsVector.size();
double sumSquaredErrors = goalsDeviation;
@@ -335,7 +335,7 @@ namespace {
for (; leftFeature != featuresEnd; ++leftFeature, ++matrixElement) {
const double weightedFeature = weight * *leftFeature;
const double* rightFeature = leftFeature;
- __m128d wf = {weightedFeature, weightedFeature};
+ __m128d wf = {weightedFeature, weightedFeature};
for (size_t i = 0; i < unaligned; ++i, ++rightFeature, ++matrixElement) {
*matrixElement += weightedFeature * *rightFeature;
}
@@ -397,10 +397,10 @@ namespace {
TFeaturesTransformer TFeaturesTransformerLearner::Solve(const size_t iterationsCount /* = 100 */) {
TTransformationParameters transformationParameters;
- auto updateParameter = [this, &transformationParameters](double TTransformationParameters::*parameter,
+ auto updateParameter = [this, &transformationParameters](double TTransformationParameters::*parameter,
const double left,
- const double right) {
- auto evalParameter = [this, &transformationParameters, parameter](double parameterValue) {
+ const double right) {
+ auto evalParameter = [this, &transformationParameters, parameter](double parameterValue) {
transformationParameters.*parameter = parameterValue;
TFeaturesTransformer transformer(TransformationType, transformationParameters);
diff --git a/library/cpp/linear_regression/linear_regression.h b/library/cpp/linear_regression/linear_regression.h
index 4a2684f506..e57de5ff6c 100644
--- a/library/cpp/linear_regression/linear_regression.h
+++ b/library/cpp/linear_regression/linear_regression.h
@@ -15,7 +15,7 @@ private:
TVector<double> LinearizedOLSMatrix;
TVector<double> OLSVector;
-
+
public:
bool Add(const TVector<double>& features, const double goal, const double weight = 1.);
TLinearModel Solve() const;
@@ -35,7 +35,7 @@ private:
TVector<double> OLSVector;
TKahanAccumulator<double> SumWeights;
-
+
public:
bool Add(const TVector<double>& features, const double goal, const double weight = 1.);
TLinearModel Solve() const;
@@ -54,7 +54,7 @@ private:
TStoreType SumProducts = TStoreType();
TStoreType SumWeights = TStoreType();
-
+
public:
bool Add(const double feature, const double goal, const double weight = 1.) {
SumFeatures += feature * weight;
@@ -72,7 +72,7 @@ public:
template <typename TFloatType>
void Solve(TFloatType& factor, TFloatType& intercept, const double regularizationParameter = 0.1) const {
- if (!(double)SumGoals) {
+ if (!(double)SumGoals) {
factor = intercept = TFloatType();
return;
}
@@ -82,20 +82,20 @@ public:
if (!featuresDeviation) {
factor = TFloatType();
- intercept = (double)SumGoals / (double)SumWeights;
+ intercept = (double)SumGoals / (double)SumWeights;
return;
}
factor = productsDeviation / (featuresDeviation + regularizationParameter);
- intercept = (double)SumGoals / (double)SumWeights - factor * (double)SumFeatures / (double)SumWeights;
+ intercept = (double)SumGoals / (double)SumWeights - factor * (double)SumFeatures / (double)SumWeights;
}
double SumSquaredErrors(const double regularizationParameter = 0.1) const {
- if (!(double)SumWeights) {
+ if (!(double)SumWeights) {
return 0.;
}
- const double sumGoalSquaredDeviations = (double)SumSquaredGoals - (double)SumGoals / (double)SumWeights * (double)SumGoals;
+ const double sumGoalSquaredDeviations = (double)SumSquaredGoals - (double)SumGoals / (double)SumWeights * (double)SumGoals;
double productsDeviation, featuresDeviation;
SetupSolutionFactors(productsDeviation, featuresDeviation);
@@ -108,24 +108,24 @@ public:
const double sumSquaredErrors = factor * factor * featuresDeviation - 2 * factor * productsDeviation + sumGoalSquaredDeviations;
return Max(0., sumSquaredErrors);
}
-
+
private:
void SetupSolutionFactors(double& productsDeviation, double& featuresDeviation) const {
- if (!(double)SumWeights) {
+ if (!(double)SumWeights) {
productsDeviation = featuresDeviation = 0.;
return;
}
- featuresDeviation = (double)SumSquaredFeatures - (double)SumFeatures / (double)SumWeights * (double)SumFeatures;
+ featuresDeviation = (double)SumSquaredFeatures - (double)SumFeatures / (double)SumWeights * (double)SumFeatures;
if (!featuresDeviation) {
return;
}
- productsDeviation = (double)SumProducts - (double)SumFeatures / (double)SumWeights * (double)SumGoals;
+ productsDeviation = (double)SumProducts - (double)SumFeatures / (double)SumWeights * (double)SumGoals;
}
};
using TFastSLRSolver = TTypedFastSLRSolver<double>;
-using TKahanSLRSolver = TTypedFastSLRSolver<TKahanAccumulator<double>>;
+using TKahanSLRSolver = TTypedFastSLRSolver<TKahanAccumulator<double>>;
class TSLRSolver {
private:
@@ -138,7 +138,7 @@ private:
TKahanAccumulator<double> SumWeights;
double Covariation = 0.;
-
+
public:
bool Add(const double feature, const double goal, const double weight = 1.);
@@ -178,7 +178,7 @@ template <typename TSLRSolverType>
class TTypedBestSLRSolver {
private:
TVector<TSLRSolverType> SLRSolvers;
-
+
public:
bool Add(const TVector<double>& features, const double goal, const double weight = 1.) {
if (SLRSolvers.empty()) {
@@ -240,16 +240,16 @@ struct TTransformationParameters {
double FeatureNormalizer = 1.;
Y_SAVELOAD_DEFINE(RegressionFactor,
- RegressionIntercept,
- FeatureOffset,
- FeatureNormalizer);
+ RegressionIntercept,
+ FeatureOffset,
+ FeatureNormalizer);
};
class TFeaturesTransformer {
private:
ETransformationType TransformationType;
TTransformationParameters TransformationParameters;
-
+
public:
Y_SAVELOAD_DEFINE(TransformationType, TransformationParameters);
@@ -264,10 +264,10 @@ public:
double Transformation(const double value) const {
switch (TransformationType) {
- case ETransformationType::TT_IDENTITY: {
+ case ETransformationType::TT_IDENTITY: {
return value;
}
- case ETransformationType::TT_SIGMA: {
+ case ETransformationType::TT_SIGMA: {
const double valueWithoutOffset = value - TransformationParameters.FeatureOffset;
const double transformedValue = valueWithoutOffset / (fabs(valueWithoutOffset) + TransformationParameters.FeatureNormalizer);
return TransformationParameters.RegressionIntercept + TransformationParameters.RegressionFactor * transformedValue;
@@ -290,7 +290,7 @@ private:
ETransformationType TransformationType;
TVector<TPoint> Points;
-
+
public:
TFeaturesTransformerLearner(const ETransformationType transformationType)
: TransformationType(transformationType)
@@ -317,7 +317,7 @@ private:
THashMap<double, TBucket> Buckets;
double Step;
-
+
public:
TFastFeaturesTransformerLearner(const ETransformationType transformationType, const double step = 0.1)
: TransformationType(transformationType)
diff --git a/library/cpp/linear_regression/linear_regression_ut.cpp b/library/cpp/linear_regression/linear_regression_ut.cpp
index d6e179bc0c..e71a16b67a 100644
--- a/library/cpp/linear_regression/linear_regression_ut.cpp
+++ b/library/cpp/linear_regression/linear_regression_ut.cpp
@@ -3,7 +3,7 @@
#include <util/generic/vector.h>
#include <util/generic/ymath.h>
-#include <util/random/random.h>
+#include <util/random/random.h>
#include <util/system/defaults.h>
@@ -200,14 +200,14 @@ Y_UNIT_TEST_SUITE(TLinearRegressionTest) {
}
const double intercept = 10;
- TVector<TVector<double>> featuresMatrix;
+ TVector<TVector<double>> featuresMatrix;
TVector<double> goals;
TVector<double> weights;
for (size_t instanceNumber = 0; instanceNumber < instancesCount; ++instanceNumber) {
TVector<double> features;
for (size_t featureNumber = 0; featureNumber < featuresCount; ++featureNumber) {
- features.push_back(RandomNumber<double>());
+ features.push_back(RandomNumber<double>());
}
featuresMatrix.push_back(features);
diff --git a/library/cpp/linear_regression/welford.h b/library/cpp/linear_regression/welford.h
index 89b5ce39ea..ee865d6693 100644
--- a/library/cpp/linear_regression/welford.h
+++ b/library/cpp/linear_regression/welford.h
@@ -9,7 +9,7 @@ class TMeanCalculator {
private:
double Mean = 0.;
TKahanAccumulator<double> SumWeights;
-
+
public:
Y_SAVELOAD_DEFINE(Mean, SumWeights);
@@ -20,11 +20,11 @@ public:
double GetSumWeights() const;
void Reset();
- bool operator<(const TMeanCalculator& other) const {
+ bool operator<(const TMeanCalculator& other) const {
return Mean < other.Mean;
}
- bool operator>(const TMeanCalculator& other) const {
+ bool operator>(const TMeanCalculator& other) const {
return Mean > other.Mean;
}
};
@@ -38,7 +38,7 @@ private:
double SecondValueMean = 0.;
TKahanAccumulator<double> SumWeights;
-
+
public:
Y_SAVELOAD_DEFINE(Covariation, FirstValueMean, SecondValueMean, SumWeights);
@@ -60,7 +60,7 @@ class TDeviationCalculator {
private:
double Deviation = 0.;
TMeanCalculator MeanCalculator;
-
+
public:
Y_SAVELOAD_DEFINE(Deviation, MeanCalculator);