aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/linear_regression/benchmark/main.cpp
blob: f68b365d4ebbb02745ae0e3934ee1b7b6e0e4836 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#include "pool.h" 
 
#include <library/cpp/linear_regression/linear_regression.h>
 
#include <util/datetime/base.h> 
#include <util/datetime/cputimer.h> 
 
#include <util/system/type_name.h>
 
#include <util/string/printf.h> 
 
template <typename TLRSolver> 
void QualityBenchmark(const TPool& originalPool) { 
    auto measure = [&](const double injureFactor, const double injureOffset) { 
        TPool injuredPool = originalPool.InjurePool(injureFactor, injureOffset); 
 
        static const size_t runsCount = 10; 
        static const size_t foldsCount = 10; 
 
        TMeanCalculator determinationCoefficientCalculator; 
 
        TPool::TCVIterator learnIterator = injuredPool.CrossValidationIterator(foldsCount, TPool::LearnIterator); 
        TPool::TCVIterator testIterator = injuredPool.CrossValidationIterator(foldsCount, TPool::TestIterator); 
 
        for (size_t runNumber = 0; runNumber < runsCount; ++runNumber) { 
            for (size_t foldNumber = 0; foldNumber < foldsCount; ++foldNumber) { 
                learnIterator.ResetShuffle(); 
                learnIterator.SetTestFold(foldNumber); 
                testIterator.ResetShuffle(); 
                testIterator.SetTestFold(foldNumber); 
 
                TLRSolver solver; 
                for (; learnIterator.IsValid(); ++learnIterator) { 
                    solver.Add(learnIterator->Features, learnIterator->Goal, learnIterator->Weight); 
                } 
                TLinearModel model = solver.Solve(); 
 
                TDeviationCalculator goalsCalculator; 
                TKahanAccumulator<double> errorsCalculator; 
                for (; testIterator.IsValid(); ++testIterator) { 
                    const double prediction = model.Prediction(testIterator->Features); 
                    const double goal = testIterator->Goal; 
                    const double weight = testIterator->Weight; 
                    const double error = goal - prediction; 
 
                    goalsCalculator.Add(goal, weight); 
                    errorsCalculator += error * error * weight; 
                } 
 
                const double determinationCoefficient = 1 - errorsCalculator.Get() / goalsCalculator.GetDeviation(); 
                determinationCoefficientCalculator.Add(determinationCoefficient); 
            } 
        } 
 
        return determinationCoefficientCalculator.GetMean(); 
    }; 
 
    Cout << TypeName<TLRSolver>() << ":\n"; 
    Cout << "\t" << Sprintf("base    : %.10lf\n", measure(1., 0.)); 
    Cout << "\t" << Sprintf("injure1 : %.10lf\n", measure(1e-1, 1e+1)); 
    Cout << "\t" << Sprintf("injure2 : %.10lf\n", measure(1e-3, 1e+4)); 
    Cout << "\t" << Sprintf("injure3 : %.10lf\n", measure(1e-3, 1e+5)); 
    Cout << "\t" << Sprintf("injure4 : %.10lf\n", measure(1e-3, 1e+6)); 
    Cout << "\t" << Sprintf("injure5 : %.10lf\n", measure(1e-4, 1e+6)); 
    Cout << "\t" << Sprintf("injure6 : %.10lf\n", measure(1e-4, 1e+7)); 
    Cout << Endl; 
} 
 
template <typename TLRSolver> 
void SpeedBenchmark(const TPool& originalPool) { 
    TDeviationCalculator speedTest; 
 
    static const size_t runsCount = 1000; 
    for (size_t runNumber = 0; runNumber < runsCount; ++runNumber) { 
        TLRSolver solver; 
        TLinearModel model; 
        { 
            TSimpleTimer timer; 
            for (const TInstance& instance : originalPool) { 
                solver.Add(instance.Features, instance.Goal, instance.Weight); 
            } 
            model = solver.Solve(); 
 
            speedTest.Add(timer.Get().MicroSeconds()); 
        } 
    } 
 
    const double multiplier = 1e-6; 
    Cout << Sprintf("%.5lf +/- %.5lf: ", speedTest.GetMean() * multiplier, speedTest.GetStdDev() * multiplier) << TypeName<TLRSolver>() << Endl; 
} 
 
int main(int argc, const char** argv) { 
    for (int taskNumber = 1; taskNumber < argc; ++taskNumber) { 
        TPool pool; 
        pool.ReadFromFeatures(argv[taskNumber]); 
 
        Cout << argv[taskNumber] << ":" << Endl; 
        QualityBenchmark<TFastBestSLRSolver>(pool); 
        QualityBenchmark<TKahanBestSLRSolver>(pool); 
        QualityBenchmark<TBestSLRSolver>(pool); 
 
        QualityBenchmark<TLinearRegressionSolver>(pool); 
        QualityBenchmark<TFastLinearRegressionSolver>(pool); 
 
        SpeedBenchmark<TFastBestSLRSolver>(pool); 
        SpeedBenchmark<TKahanBestSLRSolver>(pool); 
        SpeedBenchmark<TBestSLRSolver>(pool); 
 
        SpeedBenchmark<TLinearRegressionSolver>(pool); 
        SpeedBenchmark<TFastLinearRegressionSolver>(pool); 
    } 
 
    return 0; 
}