From cb56e1cde2824ff3b64be1de4794bff3cab0db61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9E=D0=BB=D0=B5=D0=B3?= <150132506+iddqdex@users.noreply.github.com> Date: Wed, 12 Mar 2025 13:26:18 +0300 Subject: fix race in clickbench generator (#15580) --- ydb/library/workload/clickbench/data_generator.cpp | 27 ++++++++++++---------- ydb/library/workload/clickbench/data_generator.h | 1 + 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/ydb/library/workload/clickbench/data_generator.cpp b/ydb/library/workload/clickbench/data_generator.cpp index 5ef975d968..65f756deeb 100644 --- a/ydb/library/workload/clickbench/data_generator.cpp +++ b/ydb/library/workload/clickbench/data_generator.cpp @@ -50,10 +50,9 @@ TClickbenchWorkloadDataInitializerGenerator::TDataGenerartor::TDataGenerartor(co IBulkDataGenerator::TDataPortions TClickbenchWorkloadDataInitializerGenerator::TDataGenerartor::GenerateDataPortion() { while (true) { - size_t index; TFile::TPtr file; with_lock(Lock) { - if (Files.empty()) { + if (!FilesCount) { return {}; } if (FirstPortion) { @@ -72,19 +71,22 @@ IBulkDataGenerator::TDataPortions TClickbenchWorkloadDataInitializerGenerator::T )}; } } - index = std::hash<std::thread::id>{}(std::this_thread::get_id()) % Files.size(); - file = Files[index]; + if (!Files.empty()) { + file = Files.back(); + Files.pop_back(); + } + } + if (!file) { + Sleep(TDuration::MilliSeconds(100)); + continue; } if (auto result = file->GetPortion()) { + auto g = Guard(Lock); + Files.push_back(file); return {result}; - } - with_lock(Lock) { - if (index < Files.size() && file == Files[index]) { - if (index + 1 != Files.size()) { - Files[index].Swap(Files.back()); - } - Files.pop_back(); - } + } else { + auto g = Guard(Lock); + --FilesCount; } } } @@ -179,6 +181,7 @@ void TClickbenchWorkloadDataInitializerGenerator::TDataGenerartor::AddFile(const } else if (name.EndsWith(".csv") || name.EndsWith(".csv.gz")) { Files.push_back(MakeIntrusive<TCsvFile>(*this, path)); } + ++FilesCount; } } \ No newline at end of file diff --git a/ydb/library/workload/clickbench/data_generator.h b/ydb/library/workload/clickbench/data_generator.h index de6b9eede5..6aeb81eaf3 100644 --- a/ydb/library/workload/clickbench/data_generator.h +++ b/ydb/library/workload/clickbench/data_generator.h @@ -40,6 +40,7 @@ private: private: const TClickbenchWorkloadDataInitializerGenerator& Owner; TVector<TFile::TPtr> Files; + ui32 FilesCount = 0; TAdaptiveLock Lock; bool FirstPortion = true; static constexpr ui64 DataSetSize = 99997497; -- cgit v1.2.3