diff options
author | aidarsamer <aidarsamer@ydb.tech> | 2023-06-08 17:25:48 +0300 |
---|---|---|
committer | aidarsamer <aidarsamer@ydb.tech> | 2023-06-08 17:25:48 +0300 |
commit | d8ffca06077f82939f880ce805c07cb7bc946755 (patch) | |
tree | a9e4d5a26a272bd37c832068e5e660931efd5cf3 | |
parent | 67979c6e5cdbfdb34dcbafc7edace699e2ba375f (diff) | |
download | ydb-d8ffca06077f82939f880ce805c07cb7bc946755.tar.gz |
Add TPC-H to YDB CLI workload
-rw-r--r-- | ydb/public/lib/ydb_cli/commands/CMakeLists.darwin-x86_64.txt | 10 | ||||
-rw-r--r-- | ydb/public/lib/ydb_cli/commands/CMakeLists.linux-aarch64.txt | 10 | ||||
-rw-r--r-- | ydb/public/lib/ydb_cli/commands/CMakeLists.linux-x86_64.txt | 10 | ||||
-rw-r--r-- | ydb/public/lib/ydb_cli/commands/CMakeLists.windows-x86_64.txt | 10 | ||||
-rw-r--r-- | ydb/public/lib/ydb_cli/commands/benchmark_utils.cpp | 143 | ||||
-rw-r--r-- | ydb/public/lib/ydb_cli/commands/benchmark_utils.h | 30 | ||||
-rw-r--r-- | ydb/public/lib/ydb_cli/commands/click_bench.cpp | 138 | ||||
-rw-r--r-- | ydb/public/lib/ydb_cli/commands/tpch.cpp | 384 | ||||
-rw-r--r-- | ydb/public/lib/ydb_cli/commands/tpch.h | 64 | ||||
-rw-r--r-- | ydb/public/lib/ydb_cli/commands/tpch_schema.sql | 124 | ||||
-rw-r--r-- | ydb/public/lib/ydb_cli/commands/ydb_workload.cpp | 2 |
11 files changed, 777 insertions, 148 deletions
diff --git a/ydb/public/lib/ydb_cli/commands/CMakeLists.darwin-x86_64.txt b/ydb/public/lib/ydb_cli/commands/CMakeLists.darwin-x86_64.txt index 11902c60d28..d7c15db749d 100644 --- a/ydb/public/lib/ydb_cli/commands/CMakeLists.darwin-x86_64.txt +++ b/ydb/public/lib/ydb_cli/commands/CMakeLists.darwin-x86_64.txt @@ -48,9 +48,11 @@ target_link_libraries(clicommands PUBLIC target_sources(clicommands PRIVATE ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/interactive/interactive_cli.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/interactive/term_io.cpp + ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/benchmark_utils.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/click_bench.cpp - ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/stock_workload.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/kv_workload.cpp + ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/stock_workload.cpp + ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/tpch.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/ydb_sdk_core_access.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/ydb_command.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/ydb_profile.cpp @@ -102,14 +104,16 @@ target_link_libraries(clicommands.global PUBLIC library-cpp-resource ) target_sources(clicommands.global PRIVATE - ${CMAKE_BINARY_DIR}/ydb/public/lib/ydb_cli/commands/b27fa204a2892655fc6f34eb84a6343b.cpp + ${CMAKE_BINARY_DIR}/ydb/public/lib/ydb_cli/commands/5be7e95ef4262fc1083e6058ef5d266e.cpp ) resources(clicommands.global - ${CMAKE_BINARY_DIR}/ydb/public/lib/ydb_cli/commands/b27fa204a2892655fc6f34eb84a6343b.cpp + ${CMAKE_BINARY_DIR}/ydb/public/lib/ydb_cli/commands/5be7e95ef4262fc1083e6058ef5d266e.cpp INPUTS ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/click_bench_queries.sql ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/click_bench_schema.sql + ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/tpch_schema.sql KEYS click_bench_queries.sql click_bench_schema.sql + tpch_schema.sql ) diff --git a/ydb/public/lib/ydb_cli/commands/CMakeLists.linux-aarch64.txt b/ydb/public/lib/ydb_cli/commands/CMakeLists.linux-aarch64.txt index a14760c1e32..1ae60f16044 100644 --- a/ydb/public/lib/ydb_cli/commands/CMakeLists.linux-aarch64.txt +++ b/ydb/public/lib/ydb_cli/commands/CMakeLists.linux-aarch64.txt @@ -49,9 +49,11 @@ target_link_libraries(clicommands PUBLIC target_sources(clicommands PRIVATE ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/interactive/interactive_cli.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/interactive/term_io.cpp + ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/benchmark_utils.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/click_bench.cpp - ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/stock_workload.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/kv_workload.cpp + ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/stock_workload.cpp + ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/tpch.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/ydb_sdk_core_access.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/ydb_command.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/ydb_profile.cpp @@ -104,14 +106,16 @@ target_link_libraries(clicommands.global PUBLIC library-cpp-resource ) target_sources(clicommands.global PRIVATE - ${CMAKE_BINARY_DIR}/ydb/public/lib/ydb_cli/commands/b27fa204a2892655fc6f34eb84a6343b.cpp + ${CMAKE_BINARY_DIR}/ydb/public/lib/ydb_cli/commands/5be7e95ef4262fc1083e6058ef5d266e.cpp ) resources(clicommands.global - ${CMAKE_BINARY_DIR}/ydb/public/lib/ydb_cli/commands/b27fa204a2892655fc6f34eb84a6343b.cpp + ${CMAKE_BINARY_DIR}/ydb/public/lib/ydb_cli/commands/5be7e95ef4262fc1083e6058ef5d266e.cpp INPUTS ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/click_bench_queries.sql ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/click_bench_schema.sql + ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/tpch_schema.sql KEYS click_bench_queries.sql click_bench_schema.sql + tpch_schema.sql ) diff --git a/ydb/public/lib/ydb_cli/commands/CMakeLists.linux-x86_64.txt b/ydb/public/lib/ydb_cli/commands/CMakeLists.linux-x86_64.txt index a14760c1e32..1ae60f16044 100644 --- a/ydb/public/lib/ydb_cli/commands/CMakeLists.linux-x86_64.txt +++ b/ydb/public/lib/ydb_cli/commands/CMakeLists.linux-x86_64.txt @@ -49,9 +49,11 @@ target_link_libraries(clicommands PUBLIC target_sources(clicommands PRIVATE ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/interactive/interactive_cli.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/interactive/term_io.cpp + ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/benchmark_utils.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/click_bench.cpp - ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/stock_workload.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/kv_workload.cpp + ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/stock_workload.cpp + ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/tpch.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/ydb_sdk_core_access.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/ydb_command.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/ydb_profile.cpp @@ -104,14 +106,16 @@ target_link_libraries(clicommands.global PUBLIC library-cpp-resource ) target_sources(clicommands.global PRIVATE - ${CMAKE_BINARY_DIR}/ydb/public/lib/ydb_cli/commands/b27fa204a2892655fc6f34eb84a6343b.cpp + ${CMAKE_BINARY_DIR}/ydb/public/lib/ydb_cli/commands/5be7e95ef4262fc1083e6058ef5d266e.cpp ) resources(clicommands.global - ${CMAKE_BINARY_DIR}/ydb/public/lib/ydb_cli/commands/b27fa204a2892655fc6f34eb84a6343b.cpp + ${CMAKE_BINARY_DIR}/ydb/public/lib/ydb_cli/commands/5be7e95ef4262fc1083e6058ef5d266e.cpp INPUTS ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/click_bench_queries.sql ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/click_bench_schema.sql + ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/tpch_schema.sql KEYS click_bench_queries.sql click_bench_schema.sql + tpch_schema.sql ) diff --git a/ydb/public/lib/ydb_cli/commands/CMakeLists.windows-x86_64.txt b/ydb/public/lib/ydb_cli/commands/CMakeLists.windows-x86_64.txt index 11902c60d28..d7c15db749d 100644 --- a/ydb/public/lib/ydb_cli/commands/CMakeLists.windows-x86_64.txt +++ b/ydb/public/lib/ydb_cli/commands/CMakeLists.windows-x86_64.txt @@ -48,9 +48,11 @@ target_link_libraries(clicommands PUBLIC target_sources(clicommands PRIVATE ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/interactive/interactive_cli.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/interactive/term_io.cpp + ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/benchmark_utils.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/click_bench.cpp - ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/stock_workload.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/kv_workload.cpp + ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/stock_workload.cpp + ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/tpch.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/ydb_sdk_core_access.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/ydb_command.cpp ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/ydb_profile.cpp @@ -102,14 +104,16 @@ target_link_libraries(clicommands.global PUBLIC library-cpp-resource ) target_sources(clicommands.global PRIVATE - ${CMAKE_BINARY_DIR}/ydb/public/lib/ydb_cli/commands/b27fa204a2892655fc6f34eb84a6343b.cpp + ${CMAKE_BINARY_DIR}/ydb/public/lib/ydb_cli/commands/5be7e95ef4262fc1083e6058ef5d266e.cpp ) resources(clicommands.global - ${CMAKE_BINARY_DIR}/ydb/public/lib/ydb_cli/commands/b27fa204a2892655fc6f34eb84a6343b.cpp + ${CMAKE_BINARY_DIR}/ydb/public/lib/ydb_cli/commands/5be7e95ef4262fc1083e6058ef5d266e.cpp INPUTS ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/click_bench_queries.sql ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/click_bench_schema.sql + ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/commands/tpch_schema.sql KEYS click_bench_queries.sql click_bench_schema.sql + tpch_schema.sql ) diff --git a/ydb/public/lib/ydb_cli/commands/benchmark_utils.cpp b/ydb/public/lib/ydb_cli/commands/benchmark_utils.cpp new file mode 100644 index 00000000000..aeb77aee2b0 --- /dev/null +++ b/ydb/public/lib/ydb_cli/commands/benchmark_utils.cpp @@ -0,0 +1,143 @@ +#include "benchmark_utils.h" + +#include <util/string/split.h> +#include <util/stream/file.h> +#include <util/folder/pathsplit.h> +#include <util/folder/path.h> + +#include <library/cpp/json/json_writer.h> + +#include <ydb/public/sdk/cpp/client/ydb_table/table.h> +#include <ydb/public/lib/yson_value/ydb_yson_value.h> + +#include <vector> + +namespace NYdb::NConsoleClient::BenchmarkUtils { + +using namespace NYdb; +using namespace NYdb::NTable; + +TTestInfo::TTestInfo(std::vector<TDuration>&& timings) + : Timings(std::move(timings)) +{ + + if (Timings.empty()) { + return; + } + + ColdTime = Timings[0]; + + if (Timings.size() > 1) { + ui32 sum = 0; + for (size_t j = 1; j < Timings.size(); ++j) { + if (Max < Timings[j]) { + Max = Timings[j]; + } + if (!Min || Min > Timings[j]) { + Min = Timings[j]; + } + sum += Timings[j].MilliSeconds(); + } + Mean = (double) sum / (double) (Timings.size() - 1); + if (Timings.size() > 2) { + double variance = 0; + for (size_t j = 1; j < Timings.size(); ++j) { + variance += (Mean - Timings[j].MilliSeconds()) * (Mean - Timings[j].MilliSeconds()); + } + variance = variance / (double) (Timings.size() - 2); + Std = sqrt(variance); + } + } +} + +TString FullTablePath(const TString& database, const TString& table) { + TPathSplitUnix prefixPathSplit(database); + prefixPathSplit.AppendComponent(table); + return prefixPathSplit.Reconstruct(); +} + + +void ThrowOnError(const TStatus& status) { + if (!status.IsSuccess()) { + ythrow yexception() << "Operation failed with status " << status.GetStatus() << ": " + << status.GetIssues().ToString(); + } +} + +bool HasCharsInString(const TString& str) { + for (auto c : str) { + if (std::isalpha(c)) { + return true; + } + } + return false; +} + +std::pair<TString, TString> ResultToYson(NTable::TScanQueryPartIterator& it) { + TStringStream out; + TStringStream err_out; + NYson::TYsonWriter writer(&out, NYson::EYsonFormat::Text, ::NYson::EYsonType::Node, true); + writer.OnBeginList(); + + for (;;) { + auto streamPart = it.ReadNext().GetValueSync(); + if (!streamPart.IsSuccess()) { + if (!streamPart.EOS()) { + err_out << streamPart.GetIssues().ToString() << Endl; + } + break; + } + + if (streamPart.HasResultSet()) { + auto result = streamPart.ExtractResultSet(); + auto columns = result.GetColumnsMeta(); + + NYdb::TResultSetParser parser(result); + while (parser.TryNextRow()) { + writer.OnListItem(); + writer.OnBeginList(); + for (ui32 i = 0; i < columns.size(); ++i) { + writer.OnListItem(); + FormatValueYson(parser.GetValue(i), writer); + } + writer.OnEndList(); + out << "\n"; + } + } + } + + writer.OnEndList(); + return {out.Str(), err_out.Str()}; +} + +std::pair<TString, TString> Execute(const TString& query, NTable::TTableClient& client) { + TStreamExecScanQuerySettings settings; + settings.CollectQueryStats(ECollectQueryStatsMode::Full); + auto it = client.StreamExecuteScanQuery(query, settings).GetValueSync(); + ThrowOnError(it); + return ResultToYson(it); +} + +NJson::TJsonValue GetQueryLabels(ui32 queryId) { + NJson::TJsonValue labels(NJson::JSON_MAP); + labels.InsertValue("query", Sprintf("Query%02u", queryId)); + return labels; +} + +NJson::TJsonValue GetSensorValue(TStringBuf sensor, TDuration& value, ui32 queryId) { + NJson::TJsonValue sensorValue(NJson::JSON_MAP); + sensorValue.InsertValue("sensor", sensor); + sensorValue.InsertValue("value", value.MilliSeconds()); + sensorValue.InsertValue("labels", GetQueryLabels(queryId)); + return sensorValue; +} + +NJson::TJsonValue GetSensorValue(TStringBuf sensor, double value, ui32 queryId) { + NJson::TJsonValue sensorValue(NJson::JSON_MAP); + sensorValue.InsertValue("sensor", sensor); + sensorValue.InsertValue("value", value); + sensorValue.InsertValue("labels", GetQueryLabels(queryId)); + return sensorValue; +} + +} // NYdb::NConsoleClient::BenchmarkUtils
\ No newline at end of file diff --git a/ydb/public/lib/ydb_cli/commands/benchmark_utils.h b/ydb/public/lib/ydb_cli/commands/benchmark_utils.h new file mode 100644 index 00000000000..e3cb1bf3814 --- /dev/null +++ b/ydb/public/lib/ydb_cli/commands/benchmark_utils.h @@ -0,0 +1,30 @@ +#pragma once + +#include <library/cpp/json/json_value.h> +#include <ydb/public/sdk/cpp/client/ydb_table/table.h> + +#include <vector> + +namespace NYdb::NConsoleClient::BenchmarkUtils { + +struct TTestInfo { + TDuration ColdTime; + TDuration Min; + TDuration Max; + double Mean = 0; + double Std = 0; + std::vector<TDuration> Timings; + + explicit TTestInfo(std::vector<TDuration>&& timings); +}; + +TString FullTablePath(const TString& database, const TString& table); +void ThrowOnError(const TStatus& status); +bool HasCharsInString(const TString& str); +std::pair<TString, TString> ResultToYson(NYdb::NTable::TScanQueryPartIterator& it); +std::pair<TString, TString> Execute(const TString& query, NTable::TTableClient& client); +NJson::TJsonValue GetQueryLabels(ui32 queryId); +NJson::TJsonValue GetSensorValue(TStringBuf sensor, TDuration& value, ui32 queryId); +NJson::TJsonValue GetSensorValue(TStringBuf sensor, double value, ui32 queryId); + +} // NYdb::NConsoleClient::BenchmarkUtils
\ No newline at end of file diff --git a/ydb/public/lib/ydb_cli/commands/click_bench.cpp b/ydb/public/lib/ydb_cli/commands/click_bench.cpp index eb06264785c..f6b3114b010 100644 --- a/ydb/public/lib/ydb_cli/commands/click_bench.cpp +++ b/ydb/public/lib/ydb_cli/commands/click_bench.cpp @@ -14,153 +14,19 @@ #include <ydb/public/lib/yson_value/ydb_yson_value.h> #include "click_bench.h" +#include "benchmark_utils.h" namespace NYdb::NConsoleClient { using namespace NYdb; using namespace NYdb::NTable; +using namespace NYdb::NConsoleClient::BenchmarkUtils; namespace { static const char DefaultTablePath[] = "clickbench/hits"; -struct TTestInfo { - TDuration ColdTime; - TDuration Min; - TDuration Max; - double Mean = 0; - double Std = 0; - std::vector<TDuration> Timings; - - explicit TTestInfo(std::vector<TDuration>&& timings) - : Timings(std::move(timings)) - { - - if (Timings.empty()) { - return; - } - - ColdTime = Timings[0]; - - if (Timings.size() > 1) { - ui32 sum = 0; - for (size_t j = 1; j < Timings.size(); ++j) { - if (Max < Timings[j]) { - Max = Timings[j]; - } - if (!Min || Min > Timings[j]) { - Min = Timings[j]; - } - sum += Timings[j].MilliSeconds(); - } - Mean = (double) sum / (double) (Timings.size() - 1); - if (Timings.size() > 2) { - double variance = 0; - for (size_t j = 1; j < Timings.size(); ++j) { - variance += (Mean - Timings[j].MilliSeconds()) * (Mean - Timings[j].MilliSeconds()); - } - variance = variance / (double) (Timings.size() - 2); - Std = sqrt(variance); - } - } - } -}; - -TString FullTablePath(const TString& database, const TString& table) { - TPathSplitUnix prefixPathSplit(database); - prefixPathSplit.AppendComponent(table); - return prefixPathSplit.Reconstruct(); -} - - -static void ThrowOnError(const TStatus& status) { - if (!status.IsSuccess()) { - ythrow yexception() << "Operation failed with status " << status.GetStatus() << ": " - << status.GetIssues().ToString(); - } -} - -static bool HasCharsInString(const TString& str) { - for (auto c : str) { - if (std::isalpha(c)) { - return true; - } - } - return false; -} - -static std::pair<TString, TString> ResultToYson(NTable::TScanQueryPartIterator& it) { - TStringStream out; - TStringStream err_out; - NYson::TYsonWriter writer(&out, NYson::EYsonFormat::Text, ::NYson::EYsonType::Node, true); - writer.OnBeginList(); - - for (;;) { - auto streamPart = it.ReadNext().GetValueSync(); - if (!streamPart.IsSuccess()) { - if (!streamPart.EOS()) { - err_out << streamPart.GetIssues().ToString() << Endl; - } - break; - } - - if (streamPart.HasResultSet()) { - auto result = streamPart.ExtractResultSet(); - auto columns = result.GetColumnsMeta(); - - NYdb::TResultSetParser parser(result); - while (parser.TryNextRow()) { - writer.OnListItem(); - writer.OnBeginList(); - for (ui32 i = 0; i < columns.size(); ++i) { - writer.OnListItem(); - FormatValueYson(parser.GetValue(i), writer); - } - writer.OnEndList(); - out << "\n"; - } - } - } - - writer.OnEndList(); - return {out.Str(), err_out.Str()}; -} - -static std::pair<TString, TString> Execute(const TString& query, NTable::TTableClient& client) { - TStreamExecScanQuerySettings settings; - settings.CollectQueryStats(ECollectQueryStatsMode::Full); - auto it = client.StreamExecuteScanQuery(query, settings).GetValueSync(); - ThrowOnError(it); - return ResultToYson(it); -} - -static NJson::TJsonValue GetQueryLabels(ui32 queryId) { - NJson::TJsonValue labels(NJson::JSON_MAP); - labels.InsertValue("query", Sprintf("Query%02u", queryId)); - return labels; -} - -static NJson::TJsonValue GetSensorValue(TStringBuf sensor, TDuration& value, ui32 queryId) { - NJson::TJsonValue sensorValue(NJson::JSON_MAP); - sensorValue.InsertValue("sensor", sensor); - sensorValue.InsertValue("value", value.MilliSeconds()); - sensorValue.InsertValue("labels", GetQueryLabels(queryId)); - return sensorValue; -} - -static NJson::TJsonValue GetSensorValue(TStringBuf sensor, double value, ui32 queryId) { - NJson::TJsonValue sensorValue(NJson::JSON_MAP); - sensorValue.InsertValue("sensor", sensor); - sensorValue.InsertValue("value", value); - sensorValue.InsertValue("labels", GetQueryLabels(queryId)); - return sensorValue; -} - -} - -namespace { - class TExternalVariable { private: TString Id; diff --git a/ydb/public/lib/ydb_cli/commands/tpch.cpp b/ydb/public/lib/ydb_cli/commands/tpch.cpp new file mode 100644 index 00000000000..bdf1956408d --- /dev/null +++ b/ydb/public/lib/ydb_cli/commands/tpch.cpp @@ -0,0 +1,384 @@ +#include "tpch.h" + +#include <util/string/split.h> +#include <util/stream/file.h> +#include <util/string/strip.h> +#include <util/string/join.h> +#include <util/string/printf.h> +#include <util/folder/pathsplit.h> +#include <util/folder/path.h> + +#include <library/cpp/json/json_writer.h> + +#include <ydb/public/sdk/cpp/client/ydb_table/table.h> + +#include "benchmark_utils.h" + + +namespace NYdb::NConsoleClient { + +using namespace NYdb; +using namespace NYdb::NTable; +using namespace NYdb::NConsoleClient::BenchmarkUtils; + +namespace { + int getQueryNumber(int queryN) { + return queryN + 1; + } +} + + +TVector<TString> TTpchCommandRun::GetQueries() const { + TVector<TString> queries; + TFsPath queriesDir(ExternalQueriesDir); + TVector<TString> queriesList; + queriesDir.ListNames(queriesList); + std::sort(queriesList.begin(), queriesList.end(), [](const TString& l, const TString& r) { + auto leftNum = l.substr(1); + auto rightNum = r.substr(1); + return std::stoi(leftNum) < std::stoi(rightNum); + }); + for (auto&& queryFileName : queriesList) { + const TString expectedFileName = "q" + ::ToString(getQueryNumber(queries.size())) + ".sql"; + Y_VERIFY(queryFileName == expectedFileName, "incorrect files naming. have to be q<number>.sql where number in [1, N], where N is requests count"); + TFileInput fInput(ExternalQueriesDir + "/" + expectedFileName); + queries.emplace_back(fInput.ReadAll()); + } + return queries; +} + +bool TTpchCommandRun::RunBench(TConfig& config) +{ + TOFStream outFStream{OutFilePath}; + + auto driver = CreateDriver(config); + auto client = NYdb::NTable::TTableClient(driver); + + TStringStream report; + report << "Results for " << IterationsCount << " iterations" << Endl; + report << "+---------+----------+---------+---------+----------+---------+" << Endl; + report << "| Query # | ColdTime | Min | Max | Mean | Std |" << Endl; + report << "+---------+----------+---------+---------+----------+---------+" << Endl; + + NJson::TJsonValue jsonReport(NJson::JSON_ARRAY); + const bool collectJsonSensors = !JsonReportFileName.empty(); + const TVector<TString> qtokens = GetQueries(); + bool allOkay = true; + + std::map<ui32, TTestInfo> QueryRuns; + for (ui32 queryN = 0; queryN < qtokens.size(); ++queryN) { + if (!NeedRun(queryN)) { + continue; + } + + if (!HasCharsInString(qtokens[queryN])) { + continue; + } + const TString query = PatchQuery(qtokens[queryN]); + + std::vector<TDuration> timings; + timings.reserve(IterationsCount); + + Cout << Sprintf("Query%02u", getQueryNumber(queryN)) << ":" << Endl; + Cerr << "Query text:\n" << Endl; + Cerr << query << Endl << Endl; + + ui32 successIteration = 0; + for (ui32 i = 0; i < IterationsCount * 10 && successIteration < IterationsCount; ++i) { + auto t1 = TInstant::Now(); + auto res = Execute(query, client); + auto duration = TInstant::Now() - t1; + + Cout << "\titeration " << i << ":\t"; + if (res.second == "") { + Cout << "ok\t" << duration << " seconds" << Endl; + timings.emplace_back(duration); + ++successIteration; + if (successIteration == 1) { + outFStream << getQueryNumber(queryN) << ": " << Endl + << res.first << res.second << Endl << Endl; + } + } else { + Cout << "failed\t" << duration << " seconds" << Endl; + Cerr << getQueryNumber(queryN) << ": " << query << Endl + << res.first << res.second << Endl; + Sleep(TDuration::Seconds(1)); + } + } + + if (successIteration != IterationsCount) { + allOkay = false; + } + + auto [inserted, success] = QueryRuns.emplace(queryN, TTestInfo(std::move(timings))); + Y_VERIFY(success); + auto& testInfo = inserted->second; + + report << Sprintf("| %02u | %8.3f | %7.3f | %7.3f | %8.3f | %7.3f |", getQueryNumber(queryN), + testInfo.ColdTime.MilliSeconds() * 0.001, testInfo.Min.MilliSeconds() * 0.001, testInfo.Max.MilliSeconds() * 0.001, + testInfo.Mean * 0.001, testInfo.Std * 0.001) << Endl; + if (collectJsonSensors) { + jsonReport.AppendValue(GetSensorValue("ColdTime", testInfo.ColdTime, queryN)); + jsonReport.AppendValue(GetSensorValue("Min", testInfo.Min, queryN)); + jsonReport.AppendValue(GetSensorValue("Max", testInfo.Max, queryN)); + jsonReport.AppendValue(GetSensorValue("Mean", testInfo.Mean, queryN)); + jsonReport.AppendValue(GetSensorValue("Std", testInfo.Std, queryN)); + } + } + + driver.Stop(true); + + report << "+---------+----------+---------+---------+----------+---------+" << Endl; + + Cout << Endl << report.Str() << Endl; + Cout << "Results saved to " << OutFilePath << Endl; + + if (MiniStatFileName) { + TOFStream jStream{MiniStatFileName}; + + for(ui32 rowId = 0; rowId < IterationsCount; ++rowId) { + ui32 colId = 0; + for(auto [_, testInfo] : QueryRuns) { + if (colId) { + jStream << ","; + } + ++colId; + jStream << testInfo.Timings.at(rowId).MilliSeconds(); + } + + jStream << Endl; + } + jStream.Finish(); + } + + if (collectJsonSensors) { + TOFStream jStream{JsonReportFileName}; + NJson::WriteJson(&jStream, &jsonReport, /*formatOutput*/ true); + jStream.Finish(); + Cout << "Report saved to " << JsonReportFileName << Endl; + } + + return allOkay; +} + + +TString TTpchCommandRun::PatchQuery(const TStringBuf& original) const { + TString result(original.data(), original.size()); + + if (!QuerySettings.empty()) { + result = JoinSeq("\n", QuerySettings) + "\n" + result; + } + + std::vector<TStringBuf> lines; + for(auto& line : StringSplitter(result).Split('\n').SkipEmpty()) { + if (line.StartsWith("--")) { + continue; + } + + lines.push_back(line); + } + + return JoinSeq('\n', lines); +} + + +bool TTpchCommandRun::NeedRun(const ui32 queryIdx) const { + if (QueriesToRun.size() && !QueriesToRun.contains(queryIdx)) { + return false; + } + if (QueriesToSkip.contains(queryIdx)) { + return false; + } + return true; +} + + +TTpchCommandInit::TTpchCommandInit() + : TYdbCommand("init", {"i"}, "Initialize tables") +{} + +void TTpchCommandInit::Config(TConfig& config) { + NYdb::NConsoleClient::TClientCommand::Config(config); + config.SetFreeArgsNum(0); + config.Opts->AddLongOption('p', "path", "Folder name to create tables in") + .Optional() + .DefaultValue("") + .Handler1T<TStringBuf>([this](TStringBuf arg) { + if (arg.StartsWith('/')) { + ythrow NLastGetopt::TUsageException() << "Path must be relative"; + } + TablesPath = arg; + }); + config.Opts->AddLongOption("store", "Storage type." + " Options: row, column\n" + "row - use row-based storage engine;\n" + "column - use column-based storage engine.") + .DefaultValue("row").StoreResult(&StoreType); +}; + +void TTpchCommandInit::SetPartitionByCols(TString& createSql) { + if (StoreType == "column") { + SubstGlobal(createSql, "{partition_customer}", "PARTITION BY HASH(c_custkey)"); + SubstGlobal(createSql, "{partition_lineitem}", "PARTITION BY HASH(l_orderkey)"); + SubstGlobal(createSql, "{partition_nation}", "PARTITION BY HASH(n_nationkey)"); + SubstGlobal(createSql, "{partition_orders}", "PARTITION BY HASH(o_orderkey)"); + SubstGlobal(createSql, "{partition_part}", "PARTITION BY HASH(p_partkey)"); + SubstGlobal(createSql, "{partition_partsupp}", "PARTITION BY HASH(ps_partkey)"); + SubstGlobal(createSql, "{partition_region}", "PARTITION BY HASH(r_regionkey)"); + SubstGlobal(createSql, "{partition_supplier}", "PARTITION BY HASH(s_suppkey)"); + } else { + SubstGlobal(createSql, "{partition_customer}", ""); + SubstGlobal(createSql, "{partition_lineitem}", ""); + SubstGlobal(createSql, "{partition_nation}", ""); + SubstGlobal(createSql, "{partition_orders}", ""); + SubstGlobal(createSql, "{partition_part}", ""); + SubstGlobal(createSql, "{partition_partsupp}", ""); + SubstGlobal(createSql, "{partition_region}", ""); + SubstGlobal(createSql, "{partition_supplier}", ""); + } +} + +int TTpchCommandInit::Run(TConfig& config) { + StoreType = to_lower(StoreType); + TString storageType = ""; + TString notNull = ""; + if (StoreType == "column") { + storageType = "STORE = COLUMN,"; + notNull = "NOT NULL"; + } else if (StoreType != "row") { + throw yexception() << "Incorrect storage type. Available options: \"row\", \"column\"." << Endl; + } + + auto driver = CreateDriver(config); + + TString createSql = NResource::Find("tpch_schema.sql"); + TTableClient client(driver); + + SubstGlobal(createSql, "{notnull}", notNull); + SubstGlobal(createSql, "{path}", TablesPath); + SubstGlobal(createSql, "{store}", storageType); + SetPartitionByCols(createSql); + + ThrowOnError(client.RetryOperationSync([createSql](TSession session) { + return session.ExecuteSchemeQuery(createSql).GetValueSync(); + })); + + Cout << "Tables are created." << Endl; + driver.Stop(true); + return 0; +}; + +TTpchCommandClean::TTpchCommandClean() + : TYdbCommand("clean", {}, "Drop tables") +{} + +void TTpchCommandClean::Config(TConfig& config) { + NYdb::NConsoleClient::TClientCommand::Config(config); + config.SetFreeArgsNum(0); +}; + +int TTpchCommandClean::Run(TConfig& config) { + auto driver = CreateDriver(config); + TTableClient client(driver); + + static const char DropDdlTmpl[] = "DROP TABLE `%s`;"; + char dropDdl[sizeof(DropDdlTmpl) + 8192*3]; // 32*256 for DbPath + for (auto& table : Tables) { + TString fullPath = FullTablePath(config.Database, table); + int res = std::sprintf(dropDdl, DropDdlTmpl, fullPath.c_str()); + if (res < 0) { + Cerr << "Failed to generate DROP DDL query for `" << fullPath << "` table." << Endl; + return -1; + } + + ThrowOnError(client.RetryOperationSync([dropDdl](TSession session) { + return session.ExecuteSchemeQuery(dropDdl).GetValueSync(); + })); + } + + Cout << "Clean succeeded." << Endl; + driver.Stop(true); + return 0; +}; + + +TTpchCommandRun::TTpchCommandRun() + : TYdbCommand("run", {"b"}, "Perform benchmark") +{} + +void TTpchCommandRun::Config(TConfig& config) { + TClientCommand::Config(config); + config.SetFreeArgsNum(0); + config.Opts->AddLongOption("output", "Save queries output to file") + .Optional() + .RequiredArgument("FILE") + .DefaultValue("results.out") + .StoreResult(&OutFilePath); + config.Opts->AddLongOption("iterations", "Iterations count") + .DefaultValue(1) + .StoreResult(&IterationsCount); + config.Opts->AddLongOption("json", "Json report file name") + .DefaultValue("") + .StoreResult(&JsonReportFileName); + config.Opts->AddLongOption("ministat", "Ministat report file name") + .DefaultValue("") + .StoreResult(&MiniStatFileName); + config.Opts->AddLongOption("query-settings", "Query settings.") + .DefaultValue("") + .AppendTo(&QuerySettings); + config.Opts->AddLongOption("ext-queries-dir", "Directory with external queries. Naming have to be q[0-N].sql") + .StoreResult(&ExternalQueriesDir); + + auto fillTestCases = [](TStringBuf line, std::function<void(ui32)>&& op) { + for (const auto& token : StringSplitter(line).Split(',').SkipEmpty()) { + TStringBuf part = token.Token(); + TStringBuf from, to; + if (part.TrySplit('-', from, to)) { + ui32 begin = FromString(from); + ui32 end = FromString(to); + while (begin <= end) { + op(begin); + ++begin; + } + } else { + op(FromString<ui32>(part)); + } + } + }; + + auto includeOpt = config.Opts->AddLongOption("include", + "Run only specified queries (ex.: 1,2,3,5-10,20)") + .Optional() + .Handler1T<TStringBuf>([this, fillTestCases](TStringBuf line) { + QueriesToRun.clear(); + fillTestCases(line, [this](ui32 q) { + QueriesToRun.insert(q-1); + }); + }); + auto excludeOpt = config.Opts->AddLongOption("exclude", + "Run all queries except given ones (ex.: 0,1,2,3,5-10,20)") + .Optional() + .Handler1T<TStringBuf>([this, fillTestCases](TStringBuf line) { + fillTestCases(line, [this](ui32 q) { + QueriesToSkip.emplace(q); + }); + }); + + config.Opts->MutuallyExclusiveOpt(includeOpt, excludeOpt); +}; + + +int TTpchCommandRun::Run(TConfig& config) { + const bool okay = RunBench(config); + return !okay; +}; + +TCommandTpch::TCommandTpch() + : TClientCommandTree("tpch", {}, "TPC-H workload") +{ + AddCommand(std::make_unique<TTpchCommandRun>()); + AddCommand(std::make_unique<TTpchCommandInit>()); + AddCommand(std::make_unique<TTpchCommandClean>()); +} + +} // namespace NYdb::NConsoleClient diff --git a/ydb/public/lib/ydb_cli/commands/tpch.h b/ydb/public/lib/ydb_cli/commands/tpch.h new file mode 100644 index 00000000000..dd4a4264fb2 --- /dev/null +++ b/ydb/public/lib/ydb_cli/commands/tpch.h @@ -0,0 +1,64 @@ +#pragma once + +#include <util/generic/set.h> + +#include "ydb_command.h" + +namespace NYdb::NConsoleClient { + +class TTpchCommandInit : public NYdb::NConsoleClient::TYdbCommand { +public: + TTpchCommandInit(); + void Config(TConfig& config); + int Run(TConfig& config); + +private: + void SetPartitionByCols(TString& createSql); + + TString TablesPath; + TString StoreType; +}; + +class TTpchCommandClean : public NYdb::NConsoleClient::TYdbCommand { +public: + TTpchCommandClean(); + void Config(TConfig& config); + int Run(TConfig& config); + +private: + std::vector<TString> Tables = {"customer", "lineitem", "nation", "orders", + "region", "part", "partsupp", "supplier"}; +}; + +class TTpchCommandRun : public NYdb::NConsoleClient::TYdbCommand { +protected: + TSet<ui32> QueriesToRun; + TSet<ui32> QueriesToSkip; + TVector<TString> QuerySettings; + TString ExternalQueries; + TString ExternalQueriesFile; + TString ExternalQueriesDir; + TString ExternalVariablesString; +public: + TTpchCommandRun(); + void Config(TConfig& config); + int Run(TConfig& config); + TString PatchQuery(const TStringBuf& original) const; + bool NeedRun(const ui32 queryIdx) const; + bool RunBench(TConfig& config); + + TVector<TString> GetQueries() const; + + TString OutFilePath; + ui32 IterationsCount; + TString JsonReportFileName; + TString MiniStatFileName; + TString Table; +}; + +class TCommandTpch : public NYdb::NConsoleClient::TClientCommandTree { +public: + TCommandTpch(); +}; + +} // namespace NYdb::NConsoleClient diff --git a/ydb/public/lib/ydb_cli/commands/tpch_schema.sql b/ydb/public/lib/ydb_cli/commands/tpch_schema.sql new file mode 100644 index 00000000000..040749448b2 --- /dev/null +++ b/ydb/public/lib/ydb_cli/commands/tpch_schema.sql @@ -0,0 +1,124 @@ +CREATE TABLE `{path}customer` ( + c_acctbal Double {notnull}, -- it should be Decimal(12, 2) + c_address String {notnull}, + c_comment String {notnull}, + c_custkey Int32 {notnull}, -- Identifier + c_mktsegment String {notnull}, + c_name String {notnull}, + c_nationkey Int32 {notnull}, -- FK to N_NATIONKEY + c_phone String {notnull}, + PRIMARY KEY (c_custkey) +) +{partition_customer} +WITH ({store} +AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 32) +; + +CREATE TABLE `{path}lineitem` ( + l_comment String {notnull}, + l_commitdate Date {notnull}, + l_discount Double {notnull}, -- it should be Decimal(12, 2) + l_extendedprice Double {notnull}, -- it should be Decimal(12, 2) + l_linenumber Int32 {notnull}, + l_linestatus String {notnull}, + l_orderkey Int32 {notnull}, -- FK to O_ORDERKEY + l_partkey Int32 {notnull}, -- FK to P_PARTKEY, first part of the compound FK to (PS_PARTKEY, PS_SUPPKEY) with L_SUPPKEY + l_quantity Double {notnull}, -- it should be Decimal(12, 2) + l_receiptdate Date {notnull}, + l_returnflag String {notnull}, + l_shipdate Date {notnull}, + l_shipinstruct String {notnull}, + l_shipmode String {notnull}, + l_suppkey Int32 {notnull}, -- FK to S_SUPPKEY, second part of the compound FK to (PS_PARTKEY, PS_SUPPKEY) with L_PARTKEY + l_tax Double {notnull}, -- it should be Decimal(12, 2) + PRIMARY KEY (l_orderkey, l_linenumber) +) +{partition_lineitem} +WITH ({store} +AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 32) +; + +CREATE TABLE `{path}nation` ( + n_comment String {notnull}, + n_name String {notnull}, + n_nationkey Int32 {notnull}, -- Identifier + n_regionkey Int32 {notnull}, -- FK to R_REGIONKEY + PRIMARY KEY(n_nationkey) +) +{partition_nation} +WITH ({store} +AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 1) +; + +CREATE TABLE `{path}orders` ( + o_clerk String {notnull}, + o_comment String {notnull}, + o_custkey Int32 {notnull}, -- FK to C_CUSTKEY + o_orderdate Date {notnull}, + o_orderkey Int32 {notnull}, -- Identifier + o_orderpriority String {notnull}, + o_orderstatus String {notnull}, + o_shippriority Int32 {notnull}, + o_totalprice Double {notnull}, -- it should be Decimal(12, 2) + PRIMARY KEY (o_orderkey) +) +{partition_orders} +WITH ({store} +AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 32) +; + +CREATE TABLE `{path}part` ( + p_brand String {notnull}, + p_comment String {notnull}, + p_container String {notnull}, + p_mfgr String {notnull}, + p_name String {notnull}, + p_partkey Int32 {notnull}, -- Identifier + p_retailprice Double {notnull}, -- it should be Decimal(12, 2) + p_size Int32 {notnull}, + p_type String {notnull}, + PRIMARY KEY(p_partkey) +) +{partition_part} +WITH ({store} +AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 32) +; + +CREATE TABLE `{path}partsupp` ( + ps_availqty Int32 {notnull}, + ps_comment String {notnull}, + ps_partkey Int32 {notnull}, -- FK to P_PARTKEY + ps_suppkey Int32 {notnull}, -- FK to S_SUPPKEY + ps_supplycost Double {notnull}, -- it should be Decimal(12, 2) + PRIMARY KEY(ps_partkey, ps_suppkey) +) +{partition_partsupp} +WITH ({store} +AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 32) +; + +CREATE TABLE `{path}region` ( + r_comment String {notnull}, + r_name String {notnull}, + r_regionkey Int32 {notnull}, -- Identifier + PRIMARY KEY(r_regionkey) +) +{partition_region} +WITH ({store} +AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 1) +; + +CREATE TABLE `{path}supplier` ( + s_acctbal Double {notnull}, -- it should be Decimal(12, 2) + s_address String {notnull}, + s_comment String {notnull}, + s_name String {notnull}, + s_nationkey Int32 {notnull}, -- FK to N_NATIONKEY + s_phone String {notnull}, + s_suppkey Int32 {notnull}, -- Identifier + PRIMARY KEY(s_suppkey) +) +{partition_supplier} +WITH ({store} +AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 32) +; diff --git a/ydb/public/lib/ydb_cli/commands/ydb_workload.cpp b/ydb/public/lib/ydb_cli/commands/ydb_workload.cpp index 549a14ad3a3..637c5fb1b63 100644 --- a/ydb/public/lib/ydb_cli/commands/ydb_workload.cpp +++ b/ydb/public/lib/ydb_cli/commands/ydb_workload.cpp @@ -3,6 +3,7 @@ #include "stock_workload.h" #include "kv_workload.h" #include "click_bench.h" +#include "tpch.h" #include "topic_workload/topic_workload.h" #include <ydb/library/workload/workload_factory.h> @@ -40,6 +41,7 @@ TCommandWorkload::TCommandWorkload() AddCommand(std::make_unique<TCommandKv>()); AddCommand(std::make_unique<TCommandClickBench>()); AddCommand(std::make_unique<TCommandWorkloadTopic>()); + AddCommand(std::make_unique<TCommandTpch>()); } TWorkloadCommand::TWorkloadCommand(const TString& name, const std::initializer_list<TString>& aliases, const TString& description) |