From cb56e1cde2824ff3b64be1de4794bff3cab0db61 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9E=D0=BB=D0=B5=D0=B3?=
 <150132506+iddqdex@users.noreply.github.com>
Date: Wed, 12 Mar 2025 13:26:18 +0300
Subject: fix race in clickbench generator (#15580)

---
 ydb/library/workload/clickbench/data_generator.cpp | 27 ++++++++++++----------
 ydb/library/workload/clickbench/data_generator.h   |  1 +
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/ydb/library/workload/clickbench/data_generator.cpp b/ydb/library/workload/clickbench/data_generator.cpp
index 5ef975d968..65f756deeb 100644
--- a/ydb/library/workload/clickbench/data_generator.cpp
+++ b/ydb/library/workload/clickbench/data_generator.cpp
@@ -50,10 +50,9 @@ TClickbenchWorkloadDataInitializerGenerator::TDataGenerartor::TDataGenerartor(co
 
 IBulkDataGenerator::TDataPortions TClickbenchWorkloadDataInitializerGenerator::TDataGenerartor::GenerateDataPortion() {
     while (true) {
-        size_t index;
         TFile::TPtr file;
         with_lock(Lock) {
-            if (Files.empty()) {
+            if (!FilesCount) {
                 return {};
             }
             if (FirstPortion) {
@@ -72,19 +71,22 @@ IBulkDataGenerator::TDataPortions TClickbenchWorkloadDataInitializerGenerator::T
                     )};
                 }
             }
-            index = std::hash<std::thread::id>{}(std::this_thread::get_id()) % Files.size();
-            file = Files[index];
+            if (!Files.empty()) {
+                file = Files.back();
+                Files.pop_back();
+            }
+        }
+        if (!file) {
+            Sleep(TDuration::MilliSeconds(100));
+            continue;
         }
         if (auto result = file->GetPortion()) {
+            auto g = Guard(Lock);
+            Files.push_back(file);
             return {result};
-        }
-        with_lock(Lock) {
-            if (index < Files.size() && file == Files[index]) {
-                if (index + 1 != Files.size()) {
-                    Files[index].Swap(Files.back());
-                }
-                Files.pop_back();
-            }
+        } else {
+            auto g = Guard(Lock);
+            --FilesCount;
         }
     }
 }
@@ -179,6 +181,7 @@ void TClickbenchWorkloadDataInitializerGenerator::TDataGenerartor::AddFile(const
     } else if (name.EndsWith(".csv") || name.EndsWith(".csv.gz")) {
         Files.push_back(MakeIntrusive<TCsvFile>(*this, path));
     }
+    ++FilesCount;
 }
 
 }
\ No newline at end of file
diff --git a/ydb/library/workload/clickbench/data_generator.h b/ydb/library/workload/clickbench/data_generator.h
index de6b9eede5..6aeb81eaf3 100644
--- a/ydb/library/workload/clickbench/data_generator.h
+++ b/ydb/library/workload/clickbench/data_generator.h
@@ -40,6 +40,7 @@ private:
     private:
         const TClickbenchWorkloadDataInitializerGenerator& Owner;
         TVector<TFile::TPtr> Files;
+        ui32 FilesCount = 0;
         TAdaptiveLock Lock;
         bool FirstPortion = true;
         static constexpr ui64 DataSetSize = 99997497;
-- 
cgit v1.2.3