aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/linear_regression/benchmark/main.cpp
blob: 735d41e9888c09e4fee14f38c8be467542c5439c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#include "pool.h"

#include <library/cpp/linear_regression/linear_regression.h>

#include <util/datetime/base.h>
#include <util/datetime/cputimer.h>

#include <util/system/type_name.h>

#include <util/string/printf.h>

template <typename TLRSolver>
void QualityBenchmark(const TPool& originalPool) {
    auto measure = [&](const double injureFactor, const double injureOffset) {
        TPool injuredPool = originalPool.InjurePool(injureFactor, injureOffset);

        static const size_t runsCount = 10;
        static const size_t foldsCount = 10;

        TMeanCalculator determinationCoefficientCalculator;

        TPool::TCVIterator learnIterator = injuredPool.CrossValidationIterator(foldsCount, TPool::LearnIterator);
        TPool::TCVIterator testIterator = injuredPool.CrossValidationIterator(foldsCount, TPool::TestIterator);

        for (size_t runNumber = 0; runNumber < runsCount; ++runNumber) {
            for (size_t foldNumber = 0; foldNumber < foldsCount; ++foldNumber) {
                learnIterator.ResetShuffle();
                learnIterator.SetTestFold(foldNumber);
                testIterator.ResetShuffle();
                testIterator.SetTestFold(foldNumber);

                TLRSolver solver;
                for (; learnIterator.IsValid(); ++learnIterator) {
                    solver.Add(learnIterator->Features, learnIterator->Goal, learnIterator->Weight);
                }
                TLinearModel model = solver.Solve();

                TDeviationCalculator goalsCalculator;
                TKahanAccumulator<double> errorsCalculator;
                for (; testIterator.IsValid(); ++testIterator) {
                    const double prediction = model.Prediction(testIterator->Features);
                    const double goal = testIterator->Goal;
                    const double weight = testIterator->Weight;
                    const double error = goal - prediction;

                    goalsCalculator.Add(goal, weight);
                    errorsCalculator += error * error * weight;
                }

                const double determinationCoefficient = 1 - errorsCalculator.Get() / goalsCalculator.GetDeviation();
                determinationCoefficientCalculator.Add(determinationCoefficient);
            }
        }

        return determinationCoefficientCalculator.GetMean();
    };

    Cout << TypeName<TLRSolver>() << ":\n";
    Cout << "\t" << Sprintf("base    : %.10lf\n", measure(1., 0.));
    Cout << "\t" << Sprintf("injure1 : %.10lf\n", measure(1e-1, 1e+1));
    Cout << "\t" << Sprintf("injure2 : %.10lf\n", measure(1e-3, 1e+4));
    Cout << "\t" << Sprintf("injure3 : %.10lf\n", measure(1e-3, 1e+5));
    Cout << "\t" << Sprintf("injure4 : %.10lf\n", measure(1e-3, 1e+6));
    Cout << "\t" << Sprintf("injure5 : %.10lf\n", measure(1e-4, 1e+6));
    Cout << "\t" << Sprintf("injure6 : %.10lf\n", measure(1e-4, 1e+7));
    Cout << Endl;
}

template <typename TLRSolver>
void SpeedBenchmark(const TPool& originalPool) {
    TDeviationCalculator speedTest;

    static const size_t runsCount = 1000;
    for (size_t runNumber = 0; runNumber < runsCount; ++runNumber) {
        TLRSolver solver;
        TLinearModel model;
        {
            TSimpleTimer timer;
            for (const TInstance& instance : originalPool) {
                solver.Add(instance.Features, instance.Goal, instance.Weight);
            }
            model = solver.Solve();

            speedTest.Add(timer.Get().MicroSeconds());
        }
    }

    const double multiplier = 1e-6;
    Cout << Sprintf("%.5lf +/- %.5lf: ", speedTest.GetMean() * multiplier, speedTest.GetStdDev() * multiplier) << TypeName<TLRSolver>() << Endl;
}

int main(int argc, const char** argv) {
    for (int taskNumber = 1; taskNumber < argc; ++taskNumber) {
        TPool pool;
        pool.ReadFromFeatures(argv[taskNumber]);

        Cout << argv[taskNumber] << ":" << Endl;
        QualityBenchmark<TFastBestSLRSolver>(pool);
        QualityBenchmark<TKahanBestSLRSolver>(pool);
        QualityBenchmark<TBestSLRSolver>(pool);

        QualityBenchmark<TLinearRegressionSolver>(pool);
        QualityBenchmark<TFastLinearRegressionSolver>(pool);

        SpeedBenchmark<TFastBestSLRSolver>(pool);
        SpeedBenchmark<TKahanBestSLRSolver>(pool);
        SpeedBenchmark<TBestSLRSolver>(pool);

        SpeedBenchmark<TLinearRegressionSolver>(pool);
        SpeedBenchmark<TFastLinearRegressionSolver>(pool);
    }

    return 0;
}