intermediate changes

ref:cde9a383711a11544ce7e107a78147fb96cc4029
author: Devtools Arcadia <arcadia-devtools@yandex-team.ru> 2022-02-07 18:08:42 +0300
committer: Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> 2022-02-07 18:08:42 +0300
commit: 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
tree: e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/testing/benchmark
download: ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
11 files changed, 1023 insertions, 0 deletions
diff --git a/library/cpp/testing/benchmark/README.md b/library/cpp/testing/benchmark/README.md
new file mode 100644
index 00000000000..ad440e07a2f
--- /dev/null
+++ b/library/cpp/testing/benchmark/README.md
@@ -0,0 +1,11 @@
+----------- YourBenchmarkName ---------------
+ samples:       403
+ iterations:    100576
+ iterations hr:    101K
+ run time:      5.016924443
+ per iteration: 119265.0829 (119K) cycles
+
+samples – сколько раз была вызвана функция с бенчмарком
+iterations – сколько всего итераций было сделано. при каждом вызове функции бенчмарка может быть разное кол-во итераций
+run time – скольку времени исполнялся бенчмарк
+per iteration – сколько времени (процессорных тактов) ушло на одну итерацию, в идеале это то сколько работает функция для которой ты написал бенчмарк
+\ No newline at end of file
diff --git a/library/cpp/testing/benchmark/bench.cpp b/library/cpp/testing/benchmark/bench.cpp
new file mode 100644
index 00000000000..08d8708005a
--- /dev/null
+++ b/library/cpp/testing/benchmark/bench.cpp
@@ -0,0 +1,604 @@
+#include "bench.h"
+
+#include <contrib/libs/re2/re2/re2.h>
+
+#include <library/cpp/colorizer/output.h>
+#include <library/cpp/getopt/small/last_getopt.h>
+#include <library/cpp/json/json_value.h>
+#include <library/cpp/linear_regression/linear_regression.h>
+#include <library/cpp/threading/poor_man_openmp/thread_helper.h>
+
+#include <util/system/hp_timer.h>
+#include <util/system/info.h>
+#include <util/stream/output.h>
+#include <util/datetime/base.h>
+#include <util/random/random.h>
+#include <util/string/cast.h>
+#include <util/generic/xrange.h>
+#include <util/generic/algorithm.h>
+#include <util/generic/singleton.h>
+#include <util/system/spinlock.h>
+#include <util/generic/function.h>
+#include <util/generic/maybe.h>
+#include <util/generic/strbuf.h>
+#include <util/generic/intrlist.h>
+#include <util/stream/format.h>
+#include <util/system/yield.h>
+
+using re2::RE2;
+
+using namespace NBench;
+using namespace NColorizer;
+using namespace NLastGetopt;
+
+namespace {
+    struct TOptions {
+        double TimeBudget;
+    };
+
+    struct TResult {
+        TStringBuf TestName;
+        ui64 Samples;
+        ui64 Iterations;
+        TMaybe<double> CyclesPerIteration;
+        TMaybe<double> SecondsPerIteration;
+        double RunTime;
+        size_t TestId;  //  Sequential test id (zero-based)
+    };
+
+    struct ITestRunner: public TIntrusiveListItem<ITestRunner> {
+        virtual ~ITestRunner() = default;
+        void Register();
+
+        virtual TStringBuf Name() const noexcept = 0;
+        virtual TResult Run(const TOptions& opts) = 0;
+        size_t SequentialId = 0;
+    };
+
+    struct TCpuBenchmark: public ITestRunner {
+        inline TCpuBenchmark(const char* name, NCpu::TUserFunc func)
+            : F(func)
+            , N(name)
+        {
+            Register();
+        }
+
+        TResult Run(const TOptions& opts) override;
+
+        TStringBuf Name() const noexcept override {
+            return N;
+        }
+
+        std::function<NCpu::TUserFunc> F;
+        const TStringBuf N;
+    };
+
+    inline TString DoFmtTime(double t) {
+        if (t > 0.1) {
+            return ToString(t) + " seconds";
+        }
+
+        t *= 1000.0;
+
+        if (t > 0.1) {
+            return ToString(t) + " milliseconds";
+        }
+
+        t *= 1000.0;
+
+        if (t > 0.1) {
+            return ToString(t) + " microseconds";
+        }
+
+        t *= 1000.0;
+
+        if (t < 0.05) {
+            t = 0.0;
+        }
+
+        return ToString(t) + " nanoseconds";
+    }
+
+    struct THiPerfTimer: public THPTimer {
+        static inline TString FmtTime(double t) {
+            return DoFmtTime(t);
+        }
+    };
+
+    struct TSimpleTimer {
+        inline double Passed() const noexcept {
+            return (TInstant::Now() - N).MicroSeconds() / 1000000.0;
+        }
+
+        static inline TString FmtTime(double t) {
+            return DoFmtTime(t);
+        }
+
+        const TInstant N = TInstant::Now();
+    };
+
+    struct TCycleTimer {
+        inline ui64 Passed() const noexcept {
+            return GetCycleCount() - N;
+        }
+
+        static inline TString FmtTime(double t) {
+            if (t < 0.5) {
+                t = 0.0;
+            }
+
+            TString hr;
+            if (t > 10 * 1000) {
+                hr = " (" + ToString(HumanReadableSize(t, ESizeFormat::SF_QUANTITY)) + ")";
+            }
+
+            return ToString(t) + hr + " cycles";
+        }
+
+        const ui64 N = GetCycleCount();
+    };
+
+    template <class TMyTimer, class T>
+    inline double Measure(T&& t, size_t n) {
+        TMyTimer timer;
+
+        t(n);
+
+        return timer.Passed();
+    }
+
+    struct TSampleIterator {
+        inline size_t Next() noexcept {
+            return M++;
+
+            N *= 1.02;
+            M += 1;
+
+            return Max<double>(N, M);
+        }
+
+        double N = 1.0;
+        size_t M = 1;
+    };
+
+    using TSample = std::pair<size_t, double>;
+    using TSamples = TVector<TSample>;
+
+    struct TLinFunc {
+        double A;
+        double B;
+
+        inline double operator()(double x) const noexcept {
+            return A * x + B;
+        }
+    };
+
+    TLinFunc CalcModel(const TSamples& s) {
+        TKahanSLRSolver solver;
+
+        for (const auto& p : s) {
+            solver.Add(p.first, p.second);
+        }
+
+        double c = 0;
+        double i = 0;
+
+        solver.Solve(c, i);
+
+        return TLinFunc{c, i};
+    }
+
+    inline TSamples RemoveOutliers(const TSamples& s, double fraction) {
+        if (s.size() < 20) {
+            return s;
+        }
+
+        const auto predictor = CalcModel(s);
+
+        const auto errfunc = [&predictor](const TSample& p) -> double {
+            //return (1.0 + fabs(predictor(p.first) - p.second)) / (1.0 + fabs(p.second));
+            //return fabs((predictor(p.first) - p.second)) / (1.0 + fabs(p.second));
+            //return fabs((predictor(p.first) - p.second)) / (1.0 + p.first);
+            return fabs((predictor(p.first) - p.second));
+        };
+
+        using TSampleWithError = std::pair<const TSample*, double>;
+        TVector<TSampleWithError> v;
+
+        v.reserve(s.size());
+
+        for (const auto& p : s) {
+            v.emplace_back(&p, errfunc(p));
+        }
+
+        Sort(v.begin(), v.end(), [](const TSampleWithError& l, const TSampleWithError& r) -> bool {
+            return (l.second < r.second) || ((l.second == r.second) && (l.first < r.first));
+        });
+
+        if (0) {
+            for (const auto& x : v) {
+                Cout << x.first->first << ", " << x.first->second << " -> " << x.second << Endl;
+            }
+        }
+
+        TSamples ret;
+
+        ret.reserve(v.size());
+
+        for (const auto i : xrange<size_t>(0, fraction * v.size())) {
+            ret.push_back(*v[i].first);
+        }
+
+        return ret;
+    }
+
+    template <class TMyTimer, class T>
+    static inline TResult RunTest(T&& func, double budget, ITestRunner& test) {
+        THPTimer start;
+
+        start.Passed();
+
+        TSampleIterator sample;
+        TSamples samples;
+        ui64 iters = 0;
+
+        //warm up
+        func(1);
+
+        while (start.Passed() < budget) {
+            if (start.Passed() < ((budget * samples.size()) / 2000000.0)) {
+                ThreadYield();
+            } else {
+                const size_t n = sample.Next();
+
+                iters += (ui64)n;
+                samples.emplace_back(n, Measure<TMyTimer>(func, n));
+            }
+        }
+
+        auto filtered = RemoveOutliers(samples, 0.9);
+
+        return {test.Name(), filtered.size(), iters, CalcModel(filtered).A, Nothing(), start.Passed(), test.SequentialId};
+    }
+
+    using TTests = TIntrusiveListWithAutoDelete<ITestRunner, TDestructor>;
+
+    inline TTests& Tests() {
+        return *Singleton<TTests>();
+    }
+
+    void ITestRunner::Register() {
+        Tests().PushBack(this);
+    }
+
+    TResult TCpuBenchmark::Run(const TOptions& opts) {
+        return RunTest<TCycleTimer>([this](size_t n) {
+            NCpu::TParams params{n};
+
+            F(params);
+        }, opts.TimeBudget, *this);
+    }
+
+    enum EOutFormat {
+        F_CONSOLE = 0 /* "console" */,
+        F_CSV /* "csv" */,
+        F_JSON /* "json" */
+    };
+
+    TAdaptiveLock STDOUT_LOCK;
+
+    struct IReporter {
+        virtual void Report(TResult&& result) = 0;
+
+        virtual void Finish() {
+        }
+
+        virtual ~IReporter() {
+        }
+    };
+
+    class TConsoleReporter: public IReporter {
+    public:
+        ~TConsoleReporter() override {
+        }
+
+        void Report(TResult&& r) override {
+            with_lock (STDOUT_LOCK) {
+                Cout << r;
+            }
+        }
+    };
+
+    class TCSVReporter: public IReporter {
+    public:
+        TCSVReporter() {
+            Cout << "Name\tSamples\tIterations\tRun_time\tPer_iteration_sec\tPer_iteration_cycles" << Endl;
+        }
+
+        ~TCSVReporter() override {
+        }
+
+        void Report(TResult&& r) override {
+            with_lock (STDOUT_LOCK) {
+                Cout << r.TestName
+                     << '\t' << r.Samples
+                     << '\t' << r.Iterations
+                     << '\t' << r.RunTime;
+
+                Cout << '\t';
+                if (r.CyclesPerIteration) {
+                    Cout << TCycleTimer::FmtTime(*r.CyclesPerIteration);
+                } else {
+                    Cout << '-';
+                }
+
+                Cout << '\t';
+                if (r.SecondsPerIteration) {
+                    Cout << DoFmtTime(*r.SecondsPerIteration);
+                } else {
+                    Cout << '-';
+                }
+
+                Cout << Endl;
+            }
+        }
+    };
+
+    class TJSONReporter: public IReporter {
+    public:
+        ~TJSONReporter() override {
+        }
+
+        void Report(TResult&& r) override {
+            with_lock (ResultsLock_) {
+                Results_.emplace_back(std::move(r));
+            }
+        }
+
+        void Finish() override {
+            NJson::TJsonValue report;
+            auto& bench = report["benchmark"];
+            bench.SetType(NJson::JSON_ARRAY);
+
+            NJson::TJsonValue benchReport;
+
+            for (const auto& result : Results_) {
+                NJson::TJsonValue{}.Swap(benchReport);
+                benchReport["name"] = result.TestName;
+                benchReport["samples"] = result.Samples;
+                benchReport["run_time"] = result.RunTime;
+
+                if (result.CyclesPerIteration) {
+                    benchReport["per_iteration_cycles"] = *result.CyclesPerIteration;
+                }
+
+                if (result.SecondsPerIteration) {
+                    benchReport["per_iteration_secons"] = *result.SecondsPerIteration;
+                }
+
+                bench.AppendValue(benchReport);
+            }
+
+            Cout << report << Endl;
+        }
+
+    private:
+        TAdaptiveLock ResultsLock_;
+        TVector<TResult> Results_;
+    };
+
+    class TOrderedReporter: public IReporter {
+    public:
+        TOrderedReporter(THolder<IReporter> slave)
+            : Slave_(std::move(slave))
+        {
+        }
+
+        void Report(TResult&& result) override {
+            with_lock (ResultsLock_) {
+                OrderedResultQueue_.emplace(result.TestId, std::move(result));
+                while (!OrderedResultQueue_.empty() && OrderedResultQueue_.begin()->first <= ExpectedTestId_) {
+                    Slave_->Report(std::move(OrderedResultQueue_.begin()->second));
+                    OrderedResultQueue_.erase(OrderedResultQueue_.begin());
+                    ++ExpectedTestId_;
+                }
+            }
+        }
+
+        void Finish() override {
+            for (auto& it : OrderedResultQueue_) {
+                Slave_->Report(std::move(it.second));
+            }
+            OrderedResultQueue_.clear();
+            Slave_->Finish();
+        }
+
+    private:
+        THolder<IReporter> Slave_;
+        size_t ExpectedTestId_ = 0;
+        TMap<size_t, TResult> OrderedResultQueue_;
+        TAdaptiveLock ResultsLock_;
+    };
+
+    THolder<IReporter> MakeReporter(const EOutFormat type) {
+        switch (type) {
+            case F_CONSOLE:
+                return MakeHolder<TConsoleReporter>();
+
+            case F_CSV:
+                return MakeHolder<TCSVReporter>();
+
+            case F_JSON:
+                return MakeHolder<TJSONReporter>();
+
+            default:
+                break;
+        }
+
+        return MakeHolder<TConsoleReporter>(); // make compiler happy
+    }
+
+    THolder<IReporter> MakeOrderedReporter(const EOutFormat type) {
+        return MakeHolder<TOrderedReporter>(MakeReporter(type));
+    }
+
+    void EnumerateTests(TVector<ITestRunner*>& tests) {
+        for (size_t id : xrange(tests.size())) {
+            tests[id]->SequentialId = id;
+        }
+    }
+}
+
+template <>
+EOutFormat FromStringImpl<EOutFormat>(const char* data, size_t len) {
+    const auto s = TStringBuf{data, len};
+
+    if (TStringBuf("console") == s) {
+        return F_CONSOLE;
+    } else if (TStringBuf("csv") == s) {
+        return F_CSV;
+    } else if (TStringBuf("json") == s) {
+        return F_JSON;
+    }
+
+    ythrow TFromStringException{} << "failed to convert '" << s << '\'';
+}
+
+template <>
+void Out<TResult>(IOutputStream& out, const TResult& r) {
+    out << "----------- " << LightRed() << r.TestName << Old() << " ---------------" << Endl
+        << " samples:       " << White() << r.Samples << Old() << Endl
+        << " iterations:    " << White() << r.Iterations << Old() << Endl
+        << " iterations hr:    " << White() << HumanReadableSize(r.Iterations, SF_QUANTITY) << Old() << Endl
+        << " run time:      " << White() << r.RunTime << Old() << Endl;
+
+    if (r.CyclesPerIteration) {
+        out << " per iteration: " << White() << TCycleTimer::FmtTime(*r.CyclesPerIteration) << Old() << Endl;
+    }
+
+    if (r.SecondsPerIteration) {
+        out << " per iteration: " << White() << DoFmtTime(*r.SecondsPerIteration) << Old() << Endl;
+    }
+}
+
+NCpu::TRegistar::TRegistar(const char* name, TUserFunc func) {
+    static_assert(sizeof(TCpuBenchmark) + alignof(TCpuBenchmark) < sizeof(Buf), "fix Buf size");
+
+    new (AlignUp(Buf, alignof(TCpuBenchmark))) TCpuBenchmark(name, func);
+}
+
+namespace {
+    struct TProgOpts {
+        TProgOpts(int argc, char** argv) {
+            TOpts opts = TOpts::Default();
+
+            opts.AddHelpOption();
+
+            opts.AddLongOption('b', "budget")
+                .StoreResult(&TimeBudget)
+                .RequiredArgument("SEC")
+                .Optional()
+                .Help("overall time budget");
+
+            opts.AddLongOption('l', "list")
+                .NoArgument()
+                .StoreValue(&ListTests, true)
+                .Help("list all tests");
+
+            opts.AddLongOption('t', "threads")
+                .StoreResult(&Threads)
+                .OptionalValue(ToString((NSystemInfo::CachedNumberOfCpus() + 1) / 2), "JOBS")
+                .DefaultValue("1")
+                .Help("run benchmarks in parallel");
+
+            opts.AddLongOption('f', "format")
+                .AddLongName("benchmark_format")
+                .StoreResult(&OutFormat)
+                .RequiredArgument("FORMAT")
+                .DefaultValue("console")
+                .Help("output format (console|csv|json)");
+
+            opts.SetFreeArgDefaultTitle("REGEXP", "RE2 regular expression to filter tests");
+
+            const TOptsParseResult parseResult{&opts, argc, argv};
+
+            for (const auto& regexp : parseResult.GetFreeArgs()) {
+                Filters.push_back(MakeHolder<RE2>(regexp.data(), RE2::Quiet));
+                Y_ENSURE(Filters.back()->ok(), "incorrect RE2 expression '" << regexp << "'");
+            }
+        }
+
+        bool MatchFilters(const TStringBuf& name) const {
+            if (!Filters) {
+                return true;
+            }
+
+            for (auto&& re : Filters) {
+                if (RE2::FullMatchN({name.data(), name.size()}, *re, nullptr, 0)) {
+                    return true;
+                }
+            }
+
+            return false;
+        }
+
+        bool ListTests = false;
+        double TimeBudget = -1.0;
+        TVector<THolder<RE2>> Filters;
+        size_t Threads = 0;
+        EOutFormat OutFormat;
+    };
+}
+
+int NBench::Main(int argc, char** argv) {
+    const TProgOpts opts(argc, argv);
+
+    TVector<ITestRunner*> tests;
+
+    for (auto&& it : Tests()) {
+        if (opts.MatchFilters(it.Name())) {
+            tests.push_back(&it);
+        }
+    }
+    EnumerateTests(tests);
+
+    if (opts.ListTests) {
+        for (const auto* const it : tests) {
+            Cout << it->Name() << Endl;
+        }
+
+        return 0;
+    }
+
+    if (!tests) {
+        return 0;
+    }
+
+    double timeBudget = opts.TimeBudget;
+
+    if (timeBudget < 0) {
+        timeBudget = 5.0 * tests.size();
+    }
+
+    const TOptions testOpts = {timeBudget / tests.size()};
+    const auto reporter = MakeOrderedReporter(opts.OutFormat);
+
+    std::function<void(ITestRunner**)> func = [&](ITestRunner** it) {
+        auto&& res = (*it)->Run(testOpts);
+
+        reporter->Report(std::move(res));
+    };
+
+    if (opts.Threads > 1) {
+        NYmp::SetThreadCount(opts.Threads);
+        NYmp::ParallelForStaticChunk(tests.data(), tests.data() + tests.size(), 1, func);
+    } else {
+        for (auto it : tests) {
+            func(&it);
+        }
+    }
+
+    reporter->Finish();
+
+    return 0;
+}
diff --git a/library/cpp/testing/benchmark/bench.h b/library/cpp/testing/benchmark/bench.h
new file mode 100644
index 00000000000..21551ad0dd4
--- /dev/null
+++ b/library/cpp/testing/benchmark/bench.h
@@ -0,0 +1,92 @@
+#pragma once
+
+#include <util/system/compiler.h>
+#include <util/system/types.h>
+
+#include <utility>
+
+namespace NBench {
+    namespace NCpu {
+        struct TParams {
+            inline size_t Iterations() const noexcept {
+                return Iterations_;
+            }
+
+            const size_t Iterations_;
+        };
+
+        using TUserFunc = void(TParams&);
+
+        struct TRegistar {
+            TRegistar(const char* name, TUserFunc func);
+
+            char Buf[128];
+        };
+    }
+
+    /**
+     * Functions that states "I can read and write everywhere in memory".
+     *
+     * Use it to prevent optimizer from reordering or discarding memory writes prior to it's call,
+     * and force memory reads after it's call.
+     */
+    void Clobber();
+
+    /**
+     * Forces whatever `p` points to be in memory and not in register.
+     *
+     * @param       Pointer to data.
+     */
+    template <typename T>
+    void Escape(T* p);
+
+#if defined(__GNUC__)
+    Y_FORCE_INLINE void Clobber() {
+        asm volatile(""
+                     :
+                     :
+                     : "memory");
+    }
+#elif defined(_MSC_VER)
+    Y_FORCE_INLINE void Clobber() {
+        _ReadWriteBarrier();
+    }
+
+#else
+    Y_FORCE_INLINE void Clobber() {
+    }
+#endif
+
+#if defined(__GNUC__)
+    template <typename T>
+    Y_FORCE_INLINE void Escape(T* p) {
+        asm volatile(""
+                     :
+                     : "g"(p)
+                     : "memory");
+    }
+#else
+    template <typename T>
+    Y_FORCE_INLINE void Escape(T*) {
+    }
+#endif
+
+    /**
+     * Use this function to prevent unused variables elimination.
+     *
+     * @param       Unused variable (e.g. return value of benchmarked function).
+     */
+    template <typename T>
+    Y_FORCE_INLINE void DoNotOptimize(T&& datum) {
+        ::DoNotOptimizeAway(std::forward<T>(datum));
+    }
+
+    int Main(int argc, char** argv);
+}
+
+#define Y_CPU_BENCHMARK(name, cnt)                              \
+    namespace N_bench_##name {                                  \
+        static void Run(::NBench::NCpu::TParams&);              \
+        const ::NBench::NCpu::TRegistar benchmark(#name, &Run); \
+    }                                                           \
+    static void N_bench_##name::Run(::NBench::NCpu::TParams& cnt)
diff --git a/library/cpp/testing/benchmark/dummy.cpp b/library/cpp/testing/benchmark/dummy.cpp
new file mode 100644
index 00000000000..cdb05c19db2
--- /dev/null
+++ b/library/cpp/testing/benchmark/dummy.cpp
@@ -0,0 +1,8 @@
+#include "bench.h"
+
+namespace NBench {
+    namespace NPrivate {
+        void UseCharPointer(volatile const char*) {
+        }
+    }
+}
diff --git a/library/cpp/testing/benchmark/examples/main.cpp b/library/cpp/testing/benchmark/examples/main.cpp
new file mode 100644
index 00000000000..ddd8b05ffc7
--- /dev/null
+++ b/library/cpp/testing/benchmark/examples/main.cpp
@@ -0,0 +1,215 @@
+#include <library/cpp/testing/benchmark/bench.h>
+
+#include <util/generic/xrange.h>
+#include <util/generic/algorithm.h>
+#include <util/generic/vector.h>
+#include <util/generic/yexception.h>
+#include <util/generic/bt_exception.h>
+
+Y_CPU_BENCHMARK(F, iface) {
+    TVector<size_t> x;
+
+    x.reserve(iface.Iterations());
+
+    for (size_t i = 0; i < iface.Iterations(); ++i) {
+        x.push_back(i);
+    }
+}
+
+Y_CPU_BENCHMARK(EmptyF, iface) {
+    (void)iface;
+}
+
+Y_CPU_BENCHMARK(AlmostEmptyF, iface) {
+    (void)iface;
+
+    TVector<size_t> x;
+    x.resize(1);
+}
+
+Y_CPU_BENCHMARK(TestThrow, iface) {
+    for (size_t i = 0; i < iface.Iterations(); ++i) {
+        try {
+            ythrow yexception() << i;
+        } catch (...) {
+            //CurrentExceptionMessage();
+        }
+    }
+}
+
+Y_CPU_BENCHMARK(TestThrowBT, iface) {
+    for (size_t i = 0; i < iface.Iterations(); ++i) {
+        try {
+            ythrow TWithBackTrace<yexception>() << i;
+        } catch (...) {
+            //CurrentExceptionMessage();
+        }
+    }
+}
+
+Y_CPU_BENCHMARK(TestThrowCatch, iface) {
+    for (size_t i = 0; i < iface.Iterations(); ++i) {
+        try {
+            ythrow yexception() << i;
+        } catch (...) {
+            Y_DO_NOT_OPTIMIZE_AWAY(CurrentExceptionMessage());
+        }
+    }
+}
+
+Y_CPU_BENCHMARK(TestThrowCatchBT, iface) {
+    for (size_t i = 0; i < iface.Iterations(); ++i) {
+        try {
+            ythrow TWithBackTrace<yexception>() << i;
+        } catch (...) {
+            Y_DO_NOT_OPTIMIZE_AWAY(CurrentExceptionMessage());
+        }
+    }
+}
+
+Y_CPU_BENCHMARK(TestRobust, iface) {
+    if (iface.Iterations() % 100 == 0) {
+        usleep(100000);
+    }
+}
+
+Y_CPU_BENCHMARK(IterationSpeed, iface) {
+    const auto n = iface.Iterations();
+
+    for (size_t i = 0; i < n; ++i) {
+        Y_DO_NOT_OPTIMIZE_AWAY(i);
+    }
+}
+
+Y_CPU_BENCHMARK(XRangeSpeed, iface) {
+    for (auto i : xrange<size_t>(0, iface.Iterations())) {
+        Y_DO_NOT_OPTIMIZE_AWAY(i);
+    }
+}
+
+Y_NO_INLINE int FFF() {
+    return 0;
+}
+
+Y_NO_INLINE int FFF(int x) {
+    return x;
+}
+
+Y_NO_INLINE int FFF(int x, int y) {
+    return x + y;
+}
+
+Y_NO_INLINE size_t FS1(TStringBuf x) {
+    return x.size();
+}
+
+Y_NO_INLINE size_t FS1_2(TStringBuf x, TStringBuf y) {
+    return x.size() + y.size();
+}
+
+Y_NO_INLINE size_t FS2(const TStringBuf& x) {
+    return x.size();
+}
+
+Y_NO_INLINE size_t FS2_2(const TStringBuf& x, const TStringBuf& y) {
+    return x.size() + y.size();
+}
+
+Y_CPU_BENCHMARK(FunctionCallCost_StringBufVal1, iface) {
+    TStringBuf x;
+
+    for (auto i : xrange<size_t>(0, iface.Iterations())) {
+        (void)i;
+        NBench::Escape(&x);
+        Y_DO_NOT_OPTIMIZE_AWAY(FS1(x));
+        NBench::Clobber();
+    }
+}
+
+Y_CPU_BENCHMARK(FunctionCallCost_StringBufRef1, iface) {
+    TStringBuf x;
+
+    for (auto i : xrange<size_t>(0, iface.Iterations())) {
+        (void)i;
+        NBench::Escape(&x);
+        Y_DO_NOT_OPTIMIZE_AWAY(FS2(x));
+        NBench::Clobber();
+    }
+}
+
+Y_CPU_BENCHMARK(FunctionCallCost_StringBufVal2, iface) {
+    TStringBuf x;
+    TStringBuf y;
+
+    for (auto i : xrange<size_t>(0, iface.Iterations())) {
+        (void)i;
+        NBench::Escape(&x);
+        NBench::Escape(&y);
+        Y_DO_NOT_OPTIMIZE_AWAY(FS1_2(x, y));
+        NBench::Clobber();
+    }
+}
+
+Y_CPU_BENCHMARK(FunctionCallCost_StringBufRef2, iface) {
+    TStringBuf x;
+    TStringBuf y;
+
+    for (auto i : xrange<size_t>(0, iface.Iterations())) {
+        (void)i;
+        NBench::Escape(&x);
+        NBench::Escape(&y);
+        Y_DO_NOT_OPTIMIZE_AWAY(FS2_2(x, y));
+        NBench::Clobber();
+    }
+}
+
+Y_CPU_BENCHMARK(FunctionCallCost_NoArg, iface) {
+    for (auto i : xrange<size_t>(0, iface.Iterations())) {
+        (void)i;
+        Y_DO_NOT_OPTIMIZE_AWAY(FFF());
+    }
+}
+
+Y_CPU_BENCHMARK(FunctionCallCost_OneArg, iface) {
+    for (auto i : xrange<size_t>(0, iface.Iterations())) {
+        Y_DO_NOT_OPTIMIZE_AWAY(FFF(i));
+    }
+}
+
+Y_CPU_BENCHMARK(FunctionCallCost_TwoArg, iface) {
+    for (auto i : xrange<size_t>(0, iface.Iterations())) {
+        Y_DO_NOT_OPTIMIZE_AWAY(FFF(i, i));
+    }
+}
+
+/* An example of incorrect benchmark. As of r2581591 Clang 3.7 produced following assembly:
+ * @code
+ *        │      push   %rbp
+ *        │      mov    %rsp,%rbp
+ *        │      push   %rbx
+ *        │      push   %rax
+ *        │      mov    (%rdi),%rbx
+ *        │      test   %rbx,%rbx
+ *        │    ↓ je     25
+ *        │      xor    %edi,%edi
+ *        │      xor    %esi,%esi
+ *        │    → callq  FS1(TBasicStringBuf<char, std::char_traits<char
+ *        │      nop
+ * 100.00 │20:┌─→dec    %rbx
+ *        │   └──jne    20
+ *        │25:   add    $0x8,%rsp
+ *        │      pop    %rbx
+ *        │      pop    %rbp
+ *        │    ← retq
+ * @endcode
+ *
+ * So, this benchmark is measuring empty loop!
+ */
+Y_CPU_BENCHMARK(Incorrect_FunctionCallCost_StringBufVal1, iface) {
+    TStringBuf x;
+
+    for (auto i : xrange<size_t>(0, iface.Iterations())) {
+        (void)i;
+        Y_DO_NOT_OPTIMIZE_AWAY(FS1(x));
+    }
+}
diff --git a/library/cpp/testing/benchmark/examples/metrics/main.py b/library/cpp/testing/benchmark/examples/metrics/main.py
new file mode 100644
index 00000000000..8f9d9d06aeb
--- /dev/null
+++ b/library/cpp/testing/benchmark/examples/metrics/main.py
@@ -0,0 +1,7 @@
+import yatest.common as yc
+
+
+def test_export_metrics(metrics):
+    metrics.set_benchmark(yc.execute_benchmark(
+        'library/cpp/testing/benchmark/examples/examples',
+        threads=8))
diff --git a/library/cpp/testing/benchmark/examples/metrics/ya.make b/library/cpp/testing/benchmark/examples/metrics/ya.make
new file mode 100644
index 00000000000..a9dbdca9fad
--- /dev/null
+++ b/library/cpp/testing/benchmark/examples/metrics/ya.make
@@ -0,0 +1,20 @@
+OWNER(
+    pg
+    yazevnul
+)
+
+PY2TEST()
+
+SIZE(LARGE)
+
+TAG(
+    ya:force_sandbox
+    sb:intel_e5_2660v1
+    ya:fat
+)
+
+TEST_SRCS(main.py)
+
+DEPENDS(library/cpp/testing/benchmark/examples)
+
+END()
diff --git a/library/cpp/testing/benchmark/examples/ya.make b/library/cpp/testing/benchmark/examples/ya.make
new file mode 100644
index 00000000000..7e696e127ab
--- /dev/null
+++ b/library/cpp/testing/benchmark/examples/ya.make
@@ -0,0 +1,12 @@
+OWNER(
+    pg
+    yazevnul
+)
+
+Y_BENCHMARK()
+
+SRCS(
+    main.cpp
+)
+
+END()
diff --git a/library/cpp/testing/benchmark/main/main.cpp b/library/cpp/testing/benchmark/main/main.cpp
new file mode 100644
index 00000000000..aabcb89c435
--- /dev/null
+++ b/library/cpp/testing/benchmark/main/main.cpp
@@ -0,0 +1,16 @@
+#include <library/cpp/testing/benchmark/bench.h>
+
+#include <util/generic/yexception.h>
+#include <util/stream/output.h>
+
+#include <cstdlib>
+
+int main(int argc, char** argv) {
+    try {
+        return NBench::Main(argc, argv);
+    } catch (...) {
+        Cerr << CurrentExceptionMessage() << Endl;
+    }
+
+    return EXIT_FAILURE;
+}
diff --git a/library/cpp/testing/benchmark/main/ya.make b/library/cpp/testing/benchmark/main/ya.make
new file mode 100644
index 00000000000..d00cdcf9fca
--- /dev/null
+++ b/library/cpp/testing/benchmark/main/ya.make
@@ -0,0 +1,16 @@
+LIBRARY()
+
+OWNER(
+    pg
+    yazevnul
+)
+
+SRCS(
+    GLOBAL main.cpp
+)
+
+PEERDIR(
+    library/cpp/testing/benchmark
+)
+
+END()
diff --git a/library/cpp/testing/benchmark/ya.make b/library/cpp/testing/benchmark/ya.make
new file mode 100644
index 00000000000..f42be80698e
--- /dev/null
+++ b/library/cpp/testing/benchmark/ya.make
@@ -0,0 +1,22 @@
+LIBRARY()
+
+OWNER(
+    pg
+    yazevnul
+)
+
+SRCS(
+    bench.cpp
+    dummy.cpp
+)
+
+PEERDIR(
+    contrib/libs/re2
+    library/cpp/colorizer
+    library/cpp/getopt/small
+    library/cpp/json
+    library/cpp/linear_regression
+    library/cpp/threading/poor_man_openmp
+)
+
+END()
author	Devtools Arcadia <arcadia-devtools@yandex-team.ru>	2022-02-07 18:08:42 +0300
committer	Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>	2022-02-07 18:08:42 +0300
commit	1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
tree	e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/testing/benchmark
download	ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz