aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorОлег <150132506+iddqdex@users.noreply.github.com>2025-05-30 17:53:45 +0300
committerGitHub <noreply@github.com>2025-05-30 14:53:45 +0000
commita310fbc63e16e7c67950de8c286a896472ed48f1 (patch)
treef99b35c704e2f5113f83b615241f0e3ef6b3b59a
parent56dd77af780ca8b0eba63484e4c791d6a07110dc (diff)
downloadydb-a310fbc63e16e7c67950de8c286a896472ed48f1.tar.gz
Make `ydb workload clean` more safe (#19033)
-rw-r--r--ydb/apps/ydb/CHANGELOG.md1
-rw-r--r--ydb/library/workload/benchmark_base/workload.cpp12
-rw-r--r--ydb/public/lib/ydb_cli/commands/ydb_workload.cpp15
-rw-r--r--ydb/public/lib/ydb_cli/commands/ydb_workload.h3
-rw-r--r--ydb/public/lib/ydb_cli/common/recursive_remove.cpp2
-rw-r--r--ydb/tests/functional/tpc/medium/test_clean.py77
-rw-r--r--ydb/tests/functional/tpc/medium/ya.make1
7 files changed, 108 insertions, 3 deletions
diff --git a/ydb/apps/ydb/CHANGELOG.md b/ydb/apps/ydb/CHANGELOG.md
index 68e5b89db36..3784dd1684b 100644
--- a/ydb/apps/ydb/CHANGELOG.md
+++ b/ydb/apps/ydb/CHANGELOG.md
@@ -1,3 +1,4 @@
+* Fixed an issue where the `ydb workload * clean` commands were deleting all contents from the target directory, instead of just the tables created by the init command.
* Switched highlighting engine
* Added `ydb admin cluster config verion` command to show configuration version (V1/V2) on nodes.
* Removed `--executor` option from `ydb workload run` commands. Use always `generic`.
diff --git a/ydb/library/workload/benchmark_base/workload.cpp b/ydb/library/workload/benchmark_base/workload.cpp
index f68a16761d4..124f293ac03 100644
--- a/ydb/library/workload/benchmark_base/workload.cpp
+++ b/ydb/library/workload/benchmark_base/workload.cpp
@@ -59,7 +59,7 @@ void TWorkloadGeneratorBase::GenerateDDLForTable(IOutputStream& result, const NJ
specialTypes["timestamp_type"] = Params.GetTimestampType();
const auto& tableName = table["name"].GetString();
- const auto path = Params.GetFullTableName(single ? nullptr : tableName.c_str());
+ const auto path = Params.GetFullTableName((single && Params.GetPath())? nullptr : tableName.c_str());
result << Endl << "CREATE ";
if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::ExternalS3) {
result << "EXTERNAL ";
@@ -153,7 +153,15 @@ NJson::TJsonValue TWorkloadGeneratorBase::GetTablesJson() const {
}
TVector<std::string> TWorkloadGeneratorBase::GetCleanPaths() const {
- return { Params.GetPath().c_str() };
+ const auto json = GetTablesJson();
+ TVector<std::string> result;
+ for (const auto& table: json["tables"].GetArray()) {
+ result.emplace_back(Params.GetPath() + "/" + table["name"].GetString());
+ }
+ if (json.Has("table")) {
+ result.emplace_back(Params.GetPath() ? Params.GetPath() : json["table"]["name"].GetString());
+ }
+ return result;
}
TWorkloadDataInitializerBase::TWorkloadDataInitializerBase(const TString& name, const TString& description, const TWorkloadBaseParams& params)
diff --git a/ydb/public/lib/ydb_cli/commands/ydb_workload.cpp b/ydb/public/lib/ydb_cli/commands/ydb_workload.cpp
index fe3161d3223..a76b7afb7cb 100644
--- a/ydb/public/lib/ydb_cli/commands/ydb_workload.cpp
+++ b/ydb/public/lib/ydb_cli/commands/ydb_workload.cpp
@@ -413,11 +413,26 @@ void TWorkloadCommandBase::CleanTables(NYdbWorkload::IWorkloadQueryGenerator& wo
Cout << "Remove " << fullPath << Endl;
} else {
NStatusHelpers::ThrowOnErrorOrPrintIssues(RemovePathRecursive(*Driver.Get(), fullPath, settings));
+ RmParentIfEmpty(path, config);
}
Cout << "Remove path " << path << "...Ok" << Endl;
}
}
+void TWorkloadCommandBase::RmParentIfEmpty(TStringBuf path, TConfig& config) {
+ path.RNextTok('/');
+ if (!path) {
+ return;
+ }
+ auto fullPath = std::string(config.Database.c_str()) + "/" + std::string(path.cbegin(), path.cend());
+ auto lsResult = SchemeClient->ListDirectory(fullPath).GetValueSync();
+ if (lsResult.IsSuccess() && lsResult.GetChildren().empty() && lsResult.GetEntry().Type == NScheme::ESchemeEntryType::Directory) {
+ Cout << "Folder " << path << " is empty, remove it..." << Endl;
+ NStatusHelpers::ThrowOnErrorOrPrintIssues(SchemeClient->RemoveDirectory(fullPath).GetValueSync());
+ }
+ RmParentIfEmpty(path, config);
+}
+
std::unique_ptr<TClientCommand> TWorkloadCommandRoot::CreateRunCommand(const NYdbWorkload::IWorkloadQueryGenerator::TWorkloadType& workload) {
switch (workload.Kind) {
case NYdbWorkload::IWorkloadQueryGenerator::TWorkloadType::EKind::Workload:
diff --git a/ydb/public/lib/ydb_cli/commands/ydb_workload.h b/ydb/public/lib/ydb_cli/commands/ydb_workload.h
index 1b459e71b42..271fce87676 100644
--- a/ydb/public/lib/ydb_cli/commands/ydb_workload.h
+++ b/ydb/public/lib/ydb_cli/commands/ydb_workload.h
@@ -104,6 +104,9 @@ protected:
THolder<NQuery::TQueryClient> QueryClient;
int Type = 0;
bool DryRun = false;
+
+private:
+ void RmParentIfEmpty(TStringBuf path, TConfig& config);
};
class TWorkloadCommandInit final: public TWorkloadCommandBase {
diff --git a/ydb/public/lib/ydb_cli/common/recursive_remove.cpp b/ydb/public/lib/ydb_cli/common/recursive_remove.cpp
index 37cc374b5a7..e73bc939cbd 100644
--- a/ydb/public/lib/ydb_cli/common/recursive_remove.cpp
+++ b/ydb/public/lib/ydb_cli/common/recursive_remove.cpp
@@ -261,7 +261,7 @@ TStatus RemovePathRecursive(
case NYdb::NScheme::ESchemeEntryType::Directory:
return RemoveDirectoryRecursive(driver, path, settings);
default:
- return RemovePathRecursive(driver, entity.GetEntry(), settings);
+ return RemovePathRecursive(driver, entry, settings);
}
}
diff --git a/ydb/tests/functional/tpc/medium/test_clean.py b/ydb/tests/functional/tpc/medium/test_clean.py
new file mode 100644
index 00000000000..5604a1ef15d
--- /dev/null
+++ b/ydb/tests/functional/tpc/medium/test_clean.py
@@ -0,0 +1,77 @@
+from ydb.tests.functional.tpc.lib.conftest import FunctionalTestBase
+from ydb.tests.olap.lib.ydb_cluster import YdbCluster
+
+
+class TestClean(FunctionalTestBase):
+ def test(self):
+ def _exists(path: str) -> bool:
+ try:
+ YdbCluster.get_ydb_driver().scheme_client.describe_path(f'/{YdbCluster.ydb_database}/{path}')
+ except BaseException:
+ return False
+ return True
+
+ self.run_cli(['workload', 'tpch', '-p', 'custom/tpch/s1', 'init', '--store=column'])
+ assert _exists('custom/tpch/s1/orders')
+
+ self.run_cli(['workload', 'tpcds', '-p', 'custom/tpcds/s1', 'init', '--store=column'])
+ assert _exists('custom/tpcds/s1/call_center')
+
+ self.run_cli(['workload', 'tpcds', 'init', '--store=column'])
+ assert _exists('call_center')
+
+ self.run_cli(['workload', 'clickbench', '-p', 'custom/clickbench/hits', 'init', '--store=column'])
+ assert _exists('custom/clickbench/hits')
+
+ self.run_cli(['workload', 'clickbench', 'init', '--store=column'])
+ assert _exists('clickbench/hits')
+
+ self.run_cli(['workload', 'stock', 'init', '--store=column', '-o', '0'])
+ assert _exists('orderLines')
+ assert _exists('orders')
+ assert _exists('stock')
+
+ self.run_cli(['workload', 'kv', 'init', '--init-upserts=0', '--store=column'])
+ assert _exists('kv_test')
+
+ self.run_cli(['workload', 'kv', '-p', 'custom/kv/table', 'init', '--init-upserts=0', '--store=column'])
+ assert _exists('custom/kv/table')
+
+ self.run_cli(['workload', 'kv', 'clean'])
+ assert not _exists('kv_test')
+ assert _exists('custom')
+
+ self.run_cli(['workload', 'stock', 'clean'])
+ assert not _exists('orderLines')
+ assert not _exists('orders')
+ assert not _exists('stock')
+ assert _exists('custom')
+
+ self.run_cli(['workload', 'clickbench', 'clean'])
+ assert not _exists('clickbench')
+ assert _exists('custom')
+
+ self.run_cli(['workload', 'tpcds', 'clean'])
+ assert not _exists('call_center')
+ assert _exists('custom')
+
+ self.run_cli(['workload', 'kv', '-p', 'custom/kv/table', 'clean'])
+ assert not _exists('custom/kv')
+ assert _exists('custom')
+
+ self.run_cli(['workload', 'clickbench', '-p', 'custom/clickbench/hits', 'clean'])
+ assert not _exists('custom/clickbench')
+ assert _exists('custom')
+
+ self.run_cli(['workload', 'tpcds', '-p', 'custom/tpcds/s1', 'clean'])
+ assert not _exists('custom/tpcds')
+ assert _exists('custom')
+
+ self.run_cli(['workload', 'tpch', '-p', 'custom/tpch/s1', 'clean'])
+ assert not _exists('custom')
+ children = list(filter(lambda x: not x.startswith('.'), [e.name for e in YdbCluster.get_ydb_driver().scheme_client.list_directory(f'/{YdbCluster.ydb_database}').children]))
+ assert len(children) == 0
+
+ @classmethod
+ def setup_class(cls) -> None:
+ cls.setup_cluster()
diff --git a/ydb/tests/functional/tpc/medium/ya.make b/ydb/tests/functional/tpc/medium/ya.make
index d7f5e322ffa..554c417d494 100644
--- a/ydb/tests/functional/tpc/medium/ya.make
+++ b/ydb/tests/functional/tpc/medium/ya.make
@@ -2,6 +2,7 @@ PY3TEST()
ENV(YDB_HARD_MEMORY_LIMIT_BYTES="107374182400")
TEST_SRCS(
+ test_clean.py
test_clickbench.py
test_workload_simple_queue.py
# test_external.py