diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/linear_regression/benchmark/main.cpp | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/linear_regression/benchmark/main.cpp')
-rw-r--r-- | library/cpp/linear_regression/benchmark/main.cpp | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/library/cpp/linear_regression/benchmark/main.cpp b/library/cpp/linear_regression/benchmark/main.cpp new file mode 100644 index 0000000000..735d41e988 --- /dev/null +++ b/library/cpp/linear_regression/benchmark/main.cpp @@ -0,0 +1,114 @@ +#include "pool.h" + +#include <library/cpp/linear_regression/linear_regression.h> + +#include <util/datetime/base.h> +#include <util/datetime/cputimer.h> + +#include <util/system/type_name.h> + +#include <util/string/printf.h> + +template <typename TLRSolver> +void QualityBenchmark(const TPool& originalPool) { + auto measure = [&](const double injureFactor, const double injureOffset) { + TPool injuredPool = originalPool.InjurePool(injureFactor, injureOffset); + + static const size_t runsCount = 10; + static const size_t foldsCount = 10; + + TMeanCalculator determinationCoefficientCalculator; + + TPool::TCVIterator learnIterator = injuredPool.CrossValidationIterator(foldsCount, TPool::LearnIterator); + TPool::TCVIterator testIterator = injuredPool.CrossValidationIterator(foldsCount, TPool::TestIterator); + + for (size_t runNumber = 0; runNumber < runsCount; ++runNumber) { + for (size_t foldNumber = 0; foldNumber < foldsCount; ++foldNumber) { + learnIterator.ResetShuffle(); + learnIterator.SetTestFold(foldNumber); + testIterator.ResetShuffle(); + testIterator.SetTestFold(foldNumber); + + TLRSolver solver; + for (; learnIterator.IsValid(); ++learnIterator) { + solver.Add(learnIterator->Features, learnIterator->Goal, learnIterator->Weight); + } + TLinearModel model = solver.Solve(); + + TDeviationCalculator goalsCalculator; + TKahanAccumulator<double> errorsCalculator; + for (; testIterator.IsValid(); ++testIterator) { + const double prediction = model.Prediction(testIterator->Features); + const double goal = testIterator->Goal; + const double weight = testIterator->Weight; + const double error = goal - prediction; + + goalsCalculator.Add(goal, weight); + errorsCalculator += error * error * weight; + } + + const double determinationCoefficient = 1 - errorsCalculator.Get() / goalsCalculator.GetDeviation(); + determinationCoefficientCalculator.Add(determinationCoefficient); + } + } + + return determinationCoefficientCalculator.GetMean(); + }; + + Cout << TypeName<TLRSolver>() << ":\n"; + Cout << "\t" << Sprintf("base : %.10lf\n", measure(1., 0.)); + Cout << "\t" << Sprintf("injure1 : %.10lf\n", measure(1e-1, 1e+1)); + Cout << "\t" << Sprintf("injure2 : %.10lf\n", measure(1e-3, 1e+4)); + Cout << "\t" << Sprintf("injure3 : %.10lf\n", measure(1e-3, 1e+5)); + Cout << "\t" << Sprintf("injure4 : %.10lf\n", measure(1e-3, 1e+6)); + Cout << "\t" << Sprintf("injure5 : %.10lf\n", measure(1e-4, 1e+6)); + Cout << "\t" << Sprintf("injure6 : %.10lf\n", measure(1e-4, 1e+7)); + Cout << Endl; +} + +template <typename TLRSolver> +void SpeedBenchmark(const TPool& originalPool) { + TDeviationCalculator speedTest; + + static const size_t runsCount = 1000; + for (size_t runNumber = 0; runNumber < runsCount; ++runNumber) { + TLRSolver solver; + TLinearModel model; + { + TSimpleTimer timer; + for (const TInstance& instance : originalPool) { + solver.Add(instance.Features, instance.Goal, instance.Weight); + } + model = solver.Solve(); + + speedTest.Add(timer.Get().MicroSeconds()); + } + } + + const double multiplier = 1e-6; + Cout << Sprintf("%.5lf +/- %.5lf: ", speedTest.GetMean() * multiplier, speedTest.GetStdDev() * multiplier) << TypeName<TLRSolver>() << Endl; +} + +int main(int argc, const char** argv) { + for (int taskNumber = 1; taskNumber < argc; ++taskNumber) { + TPool pool; + pool.ReadFromFeatures(argv[taskNumber]); + + Cout << argv[taskNumber] << ":" << Endl; + QualityBenchmark<TFastBestSLRSolver>(pool); + QualityBenchmark<TKahanBestSLRSolver>(pool); + QualityBenchmark<TBestSLRSolver>(pool); + + QualityBenchmark<TLinearRegressionSolver>(pool); + QualityBenchmark<TFastLinearRegressionSolver>(pool); + + SpeedBenchmark<TFastBestSLRSolver>(pool); + SpeedBenchmark<TKahanBestSLRSolver>(pool); + SpeedBenchmark<TBestSLRSolver>(pool); + + SpeedBenchmark<TLinearRegressionSolver>(pool); + SpeedBenchmark<TFastLinearRegressionSolver>(pool); + } + + return 0; +} |