diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/coroutine/engine/stack/benchmark | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/coroutine/engine/stack/benchmark')
-rw-r--r-- | library/cpp/coroutine/engine/stack/benchmark/alloc_bm.cpp | 316 | ||||
-rw-r--r-- | library/cpp/coroutine/engine/stack/benchmark/ya.make | 13 |
2 files changed, 329 insertions, 0 deletions
diff --git a/library/cpp/coroutine/engine/stack/benchmark/alloc_bm.cpp b/library/cpp/coroutine/engine/stack/benchmark/alloc_bm.cpp new file mode 100644 index 0000000000..38d713d274 --- /dev/null +++ b/library/cpp/coroutine/engine/stack/benchmark/alloc_bm.cpp @@ -0,0 +1,316 @@ +#include <benchmark/benchmark.h> + +#include <util/generic/vector.h> +#include <util/system/yassert.h> + +#include <library/cpp/coroutine/engine/stack/stack_allocator.h> +#include <library/cpp/coroutine/engine/stack/stack_guards.h> +#include <library/cpp/coroutine/engine/stack/stack_pool.h> +#include <library/cpp/coroutine/engine/stack/stack_utils.h> + + +namespace NCoro::NStack::NBenchmark { + + const char* TestCoroName = "any_name"; + constexpr uint64_t BigCoroSize = PageSize * 25; + constexpr uint64_t SmallCoroSize = PageSize * 4; + constexpr uint64_t ManyStacks = 4096; + + void BasicOperations(TStackHolder& stack) { + Y_VERIFY(!stack.Get().empty()); + stack.LowerCanaryOk(); + stack.UpperCanaryOk(); + } + + void WriteStack(TStackHolder& stack) { + auto memory = stack.Get(); + Y_VERIFY(!memory.empty()); + stack.LowerCanaryOk(); + stack.UpperCanaryOk(); + for (uint64_t i = PageSize / 2; i < memory.size(); i += PageSize * 2) { + memory[i] = 42; + } + } + + static void BM_GetAlignedMemory(benchmark::State& state) { + char* raw = nullptr; + char* aligned = nullptr; + for (auto _ : state) { + if (NCoro::NStack::GetAlignedMemory(state.range(0), raw, aligned)) { + free(raw); + } + } + } + BENCHMARK(BM_GetAlignedMemory)->RangeMultiplier(16)->Range(1, 1024 * 1024); + + static void BM_GetAlignedMemoryReleaseRss(benchmark::State& state) { + char* raw = nullptr; + char* aligned = nullptr; + for (auto _ : state) { + if (NCoro::NStack::GetAlignedMemory(state.range(0), raw, aligned)) { + const auto toFree = state.range(0) > 2 ? state.range(0) - 2 : 1; + ReleaseRss(aligned, toFree); + free(raw); + } + } + } + BENCHMARK(BM_GetAlignedMemoryReleaseRss)->RangeMultiplier(16)->Range(1, 1024 * 1024); + + static void BM_PoolAllocator(benchmark::State& state) { + auto allocator = GetAllocator(TPoolAllocatorSettings{}, (EGuard)state.range(0)); + for (auto _ : state) { + TStackHolder stack(*allocator, state.range(1), TestCoroName); + BasicOperations(stack); + } + } + BENCHMARK(BM_PoolAllocator) + ->Args({(int64_t)EGuard::Canary, BigCoroSize}) // old version - ArgsProduct() is not supported + ->Args({(int64_t)EGuard::Canary, SmallCoroSize}) + ->Args({(int64_t)EGuard::Page, BigCoroSize}) + ->Args({(int64_t)EGuard::Page, SmallCoroSize}); + + static void BM_DefaultAllocator(benchmark::State& state) { + auto allocator = GetAllocator(Nothing(), (EGuard)state.range(0)); + for (auto _ : state) { + TStackHolder stack(*allocator, state.range(1), TestCoroName); + BasicOperations(stack); + } + } + BENCHMARK(BM_DefaultAllocator) + ->Args({(int64_t)EGuard::Canary, BigCoroSize}) // old version - ArgsProduct() is not supported + ->Args({(int64_t)EGuard::Canary, SmallCoroSize}) + ->Args({(int64_t)EGuard::Page, BigCoroSize}) + ->Args({(int64_t)EGuard::Page, SmallCoroSize}); + + static void BM_PoolAllocatorManyStacksOneAtTime(benchmark::State& state) { + TPoolAllocatorSettings settings; + settings.StacksPerChunk = state.range(2); + auto allocator = GetAllocator(settings, (EGuard)state.range(0)); + for (auto _ : state) { + for (uint64_t i = 0; i < ManyStacks; ++i) { + TStackHolder stack(*allocator, state.range(1), TestCoroName); + BasicOperations(stack); + } + } + } + BENCHMARK(BM_PoolAllocatorManyStacksOneAtTime) + ->Args({(int64_t)EGuard::Canary, BigCoroSize, 1}) // old version - ArgsProduct() is not supported + ->Args({(int64_t)EGuard::Canary, SmallCoroSize, 1}) + ->Args({(int64_t)EGuard::Page, BigCoroSize, 1}) + ->Args({(int64_t)EGuard::Page, SmallCoroSize, 1}) + ->Args({(int64_t)EGuard::Canary, BigCoroSize, 1024}) + ->Args({(int64_t)EGuard::Canary, SmallCoroSize, 1024}) + ->Args({(int64_t)EGuard::Page, BigCoroSize, 1024}) + ->Args({(int64_t)EGuard::Page, SmallCoroSize, 1024}); + + static void BM_DefaultAllocatorManyStacksOneAtTime(benchmark::State& state) { + auto allocator = GetAllocator(Nothing(), (EGuard)state.range(0)); + for (auto _ : state) { + for (uint64_t i = 0; i < ManyStacks; ++i) { + TStackHolder stack(*allocator, state.range(1), TestCoroName); + BasicOperations(stack); + } + } + } + BENCHMARK(BM_DefaultAllocatorManyStacksOneAtTime) + ->Args({(int64_t)EGuard::Canary, BigCoroSize}) // old version - ArgsProduct() is not supported + ->Args({(int64_t)EGuard::Canary, SmallCoroSize}) + ->Args({(int64_t)EGuard::Page, BigCoroSize}) + ->Args({(int64_t)EGuard::Page, SmallCoroSize}); + + static void BM_PoolAllocatorManyStacks(benchmark::State& state) { + TPoolAllocatorSettings settings; + settings.StacksPerChunk = state.range(2); + auto allocator = GetAllocator(settings, (EGuard)state.range(0)); + TVector<TStackHolder> stacks; // store stacks during benchmark + stacks.reserve(ManyStacks); + for (auto _ : state) { + for (uint64_t i = 0; i < ManyStacks; ++i) { + stacks.emplace_back(*allocator, state.range(1), TestCoroName); + BasicOperations(stacks.back()); + } + } + } + BENCHMARK(BM_PoolAllocatorManyStacks) + ->Args({(int64_t)EGuard::Canary, BigCoroSize, 1}) // old version - ArgsProduct() is not supported + ->Args({(int64_t)EGuard::Canary, SmallCoroSize, 1}) + ->Args({(int64_t)EGuard::Page, BigCoroSize, 1}) + ->Args({(int64_t)EGuard::Page, SmallCoroSize, 1}) + ->Args({(int64_t)EGuard::Canary, BigCoroSize, 1024}) + ->Args({(int64_t)EGuard::Canary, SmallCoroSize, 1024}) + ->Args({(int64_t)EGuard::Page, BigCoroSize, 1024}) + ->Args({(int64_t)EGuard::Page, SmallCoroSize, 1024}); + + static void BM_DefaultAllocatorManyStacks(benchmark::State& state) { + auto allocator = GetAllocator(Nothing(), (EGuard)state.range(0)); + TVector<TStackHolder> stacks; // store stacks during benchmark + stacks.reserve(ManyStacks); + for (auto _ : state) { + for (uint64_t i = 0; i < ManyStacks; ++i) { + stacks.push_back(TStackHolder(*allocator, state.range(1), TestCoroName)); + BasicOperations(stacks.back()); + } + } + } + BENCHMARK(BM_DefaultAllocatorManyStacks) + ->Args({(int64_t)EGuard::Canary, BigCoroSize}) // old version - ArgsProduct() is not supported + ->Args({(int64_t)EGuard::Canary, SmallCoroSize}) + ->Args({(int64_t)EGuard::Page, BigCoroSize}) + ->Args({(int64_t)EGuard::Page, SmallCoroSize}); + + // ------------------------------------------------------------------------ + static void BM_PoolAllocatorManyStacksReleased(benchmark::State& state) { + TPoolAllocatorSettings settings; + settings.StacksPerChunk = state.range(2); + auto allocator = GetAllocator(settings, (EGuard)state.range(0)); + TVector<TStackHolder> stacks; // store stacks during benchmark + stacks.reserve(ManyStacks); + for (auto _ : state) { + for (uint64_t i = 0; i < ManyStacks; ++i) { + stacks.emplace_back(*allocator, state.range(1), TestCoroName); + BasicOperations(stacks.back()); + } + stacks.clear(); + } + } + BENCHMARK(BM_PoolAllocatorManyStacksReleased) + ->Args({(int64_t)EGuard::Canary, BigCoroSize, 1}) // old version - ArgsProduct() is not supported + ->Args({(int64_t)EGuard::Canary, SmallCoroSize, 1}) + ->Args({(int64_t)EGuard::Page, BigCoroSize, 1}) + ->Args({(int64_t)EGuard::Page, SmallCoroSize, 1}) + ->Args({(int64_t)EGuard::Canary, BigCoroSize, 1024}) + ->Args({(int64_t)EGuard::Canary, SmallCoroSize, 1024}) + ->Args({(int64_t)EGuard::Page, BigCoroSize, 1024}) + ->Args({(int64_t)EGuard::Page, SmallCoroSize, 1024}); + + static void BM_DefaultAllocatorManyStacksReleased(benchmark::State& state) { + auto allocator = GetAllocator(Nothing(), (EGuard)state.range(0)); + TVector<TStackHolder> stacks; // store stacks during benchmark + stacks.reserve(ManyStacks); + for (auto _ : state) { + for (uint64_t i = 0; i < ManyStacks; ++i) { + stacks.push_back(TStackHolder(*allocator, state.range(1), TestCoroName)); + BasicOperations(stacks.back()); + } + stacks.clear(); + } + } + BENCHMARK(BM_DefaultAllocatorManyStacksReleased) + ->Args({(int64_t)EGuard::Canary, BigCoroSize}) // old version - ArgsProduct() is not supported + ->Args({(int64_t)EGuard::Canary, SmallCoroSize}) + ->Args({(int64_t)EGuard::Page, BigCoroSize}) + ->Args({(int64_t)EGuard::Page, SmallCoroSize}); + + // ------------------------------------------------------------------------ + static void BM_PoolAllocatorManyStacksReleasedAndRealloc(benchmark::State& state) { + TPoolAllocatorSettings settings; + settings.StacksPerChunk = state.range(2); + auto allocator = GetAllocator(settings, (EGuard)state.range(0)); + TVector<TStackHolder> stacks; // store stacks during benchmark + stacks.reserve(ManyStacks); + for (auto _ : state) { + for (uint64_t i = 0; i < ManyStacks; ++i) { + stacks.emplace_back(*allocator, state.range(1), TestCoroName); + BasicOperations(stacks.back()); + } + stacks.clear(); + for (uint64_t i = 0; i < ManyStacks; ++i) { + stacks.emplace_back(*allocator, state.range(1), TestCoroName); + BasicOperations(stacks.back()); + } + } + } + BENCHMARK(BM_PoolAllocatorManyStacksReleasedAndRealloc) + ->Args({(int64_t)EGuard::Canary, BigCoroSize, 1}) // old version - ArgsProduct() is not supported + ->Args({(int64_t)EGuard::Canary, SmallCoroSize, 1}) + ->Args({(int64_t)EGuard::Page, BigCoroSize, 1}) + ->Args({(int64_t)EGuard::Page, SmallCoroSize, 1}) + ->Args({(int64_t)EGuard::Canary, BigCoroSize, 1024}) + ->Args({(int64_t)EGuard::Canary, SmallCoroSize, 1024}) + ->Args({(int64_t)EGuard::Page, BigCoroSize, 1024}) + ->Args({(int64_t)EGuard::Page, SmallCoroSize, 1024}) + ->Args({(int64_t)EGuard::Canary, BigCoroSize, 8192}) + ->Args({(int64_t)EGuard::Canary, SmallCoroSize, 8192}) + ->Args({(int64_t)EGuard::Page, BigCoroSize, 8192}) + ->Args({(int64_t)EGuard::Page, SmallCoroSize, 8192}); + + static void BM_DefaultAllocatorManyStacksReleasedAndRealloc(benchmark::State& state) { + auto allocator = GetAllocator(Nothing(), (EGuard)state.range(0)); + TVector<TStackHolder> stacks; // store stacks during benchmark + stacks.reserve(ManyStacks); + for (auto _ : state) { + for (uint64_t i = 0; i < ManyStacks; ++i) { + stacks.push_back(TStackHolder(*allocator, state.range(1), TestCoroName)); + BasicOperations(stacks.back()); + } + stacks.clear(); + for (uint64_t i = 0; i < ManyStacks; ++i) { + stacks.push_back(TStackHolder(*allocator, state.range(1), TestCoroName)); + BasicOperations(stacks.back()); + } + } + } + BENCHMARK(BM_DefaultAllocatorManyStacksReleasedAndRealloc) + ->Args({(int64_t)EGuard::Canary, BigCoroSize}) // old version - ArgsProduct() is not supported + ->Args({(int64_t)EGuard::Canary, SmallCoroSize}) + ->Args({(int64_t)EGuard::Page, BigCoroSize}) + ->Args({(int64_t)EGuard::Page, SmallCoroSize}); + + // ------------------------------------------------------------------------ + static void BM_PoolAllocatorManyStacksMemoryWriteReleasedAndRealloc(benchmark::State& state) { + TPoolAllocatorSettings settings; + settings.StacksPerChunk = state.range(2); + auto allocator = GetAllocator(settings, (EGuard)state.range(0)); + TVector<TStackHolder> stacks; // store stacks during benchmark + stacks.reserve(ManyStacks); + for (auto _ : state) { + for (uint64_t i = 0; i < ManyStacks; ++i) { + stacks.emplace_back(*allocator, state.range(1), TestCoroName); + WriteStack(stacks.back()); + } + stacks.clear(); + for (uint64_t i = 0; i < ManyStacks; ++i) { + stacks.emplace_back(*allocator, state.range(1), TestCoroName); + WriteStack(stacks.back()); + } + } + } + BENCHMARK(BM_PoolAllocatorManyStacksMemoryWriteReleasedAndRealloc) + ->Args({(int64_t)EGuard::Canary, BigCoroSize, 1}) // old version - ArgsProduct() is not supported + ->Args({(int64_t)EGuard::Canary, SmallCoroSize, 1}) + ->Args({(int64_t)EGuard::Page, BigCoroSize, 1}) + ->Args({(int64_t)EGuard::Page, SmallCoroSize, 1}) + ->Args({(int64_t)EGuard::Canary, BigCoroSize, 1024}) + ->Args({(int64_t)EGuard::Canary, SmallCoroSize, 1024}) + ->Args({(int64_t)EGuard::Page, BigCoroSize, 1024}) + ->Args({(int64_t)EGuard::Page, SmallCoroSize, 1024}) + ->Args({(int64_t)EGuard::Canary, BigCoroSize, 8192}) + ->Args({(int64_t)EGuard::Canary, SmallCoroSize, 8192}) + ->Args({(int64_t)EGuard::Page, BigCoroSize, 8192}) + ->Args({(int64_t)EGuard::Page, SmallCoroSize, 8192}); + + static void BM_DefaultAllocatorManyStacksMemoryWriteReleasedAndRealloc(benchmark::State& state) { + auto allocator = GetAllocator(Nothing(), (EGuard)state.range(0)); + TVector<TStackHolder> stacks; // store stacks during benchmark + stacks.reserve(ManyStacks); + for (auto _ : state) { + for (uint64_t i = 0; i < ManyStacks; ++i) { + stacks.push_back(TStackHolder(*allocator, state.range(1), TestCoroName)); + WriteStack(stacks.back()); + } + stacks.clear(); + for (uint64_t i = 0; i < ManyStacks; ++i) { + stacks.push_back(TStackHolder(*allocator, state.range(1), TestCoroName)); + WriteStack(stacks.back()); + } + } + } + BENCHMARK(BM_DefaultAllocatorManyStacksMemoryWriteReleasedAndRealloc) + ->Args({(int64_t)EGuard::Canary, BigCoroSize}) // old version - ArgsProduct() is not supported + ->Args({(int64_t)EGuard::Canary, SmallCoroSize}) + ->Args({(int64_t)EGuard::Page, BigCoroSize}) + ->Args({(int64_t)EGuard::Page, SmallCoroSize}); + +} + +BENCHMARK_MAIN(); diff --git a/library/cpp/coroutine/engine/stack/benchmark/ya.make b/library/cpp/coroutine/engine/stack/benchmark/ya.make new file mode 100644 index 0000000000..b2942fe8ca --- /dev/null +++ b/library/cpp/coroutine/engine/stack/benchmark/ya.make @@ -0,0 +1,13 @@ +G_BENCHMARK() + +OWNER(g:balancer) + +SRCS( + alloc_bm.cpp +) + +PEERDIR( + library/cpp/coroutine/engine +) + +END()
\ No newline at end of file |