aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorauzhegov <auzhegov@yandex-team.com>2023-02-16 12:28:29 +0300
committerauzhegov <auzhegov@yandex-team.com>2023-02-16 12:28:29 +0300
commita550b15e307f7b41ef2cca24df4a4fd80efdbbc6 (patch)
tree51afb79bc436128ccfae2886e896eedd588371d2
parent463633625b0cd4f6dac71bb122e40d0c2bd856d7 (diff)
downloadydb-a550b15e307f7b41ef2cca24df4a4fd80efdbbc6.tar.gz
[ListingLargeDirectories] Change message format
Initial implementation
-rw-r--r--ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp13
-rw-r--r--ydb/library/yql/providers/s3/proto/range.proto1
-rw-r--r--ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp2
-rw-r--r--ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp18
-rw-r--r--ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp6
-rw-r--r--ydb/library/yql/providers/s3/provider/yql_s3_logical_opt.cpp8
-rw-r--r--ydb/library/yql/providers/s3/range_helpers/file_tree_builder.cpp19
-rw-r--r--ydb/library/yql/providers/s3/range_helpers/file_tree_builder.h18
-rw-r--r--ydb/library/yql/providers/s3/range_helpers/file_tree_builder_ut.cpp93
-rw-r--r--ydb/library/yql/providers/s3/range_helpers/path_list_reader.cpp15
-rw-r--r--ydb/library/yql/providers/s3/range_helpers/path_list_reader.h11
-rw-r--r--ydb/library/yql/providers/s3/range_helpers/path_list_reader_ut.cpp40
12 files changed, 153 insertions, 91 deletions
diff --git a/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp b/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp
index 81ba304ce2..d29c26cd8d 100644
--- a/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp
+++ b/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp
@@ -310,11 +310,11 @@ public:
void StartDownload(size_t index) {
DownloadInflight++;
const TPath& path = Paths[index];
- auto url = Url + std::get<TString>(path);
+ auto url = Url + path.Path;
auto id = index + StartPathIndex;
const TString requestId = CreateGuidAsString();
LOG_D("TS3ReadActor", "Download: " << url << ", ID: " << id << ", request id: [" << requestId << "]");
- Gateway->Download(url, MakeHeaders(Token, requestId), 0U, std::min(std::get<size_t>(path), SizeLimit),
+ Gateway->Download(url, MakeHeaders(Token, requestId), 0U, std::min(path.Size, SizeLimit),
std::bind(&TS3ReadActor::OnDownloadFinished, ActorSystem, SelfId(), requestId, std::placeholders::_1, id), {}, RetryPolicy);
}
@@ -392,7 +392,7 @@ private:
void Handle(TEvPrivate::TEvReadResult::TPtr& result) {
++IsDoneCounter;
const auto id = result->Get()->PathIndex;
- const auto path = std::get<TString>(Paths[id - StartPathIndex]);
+ const auto path = Paths[id - StartPathIndex].Path;
const auto httpCode = result->Get()->Result.HttpResponseCode;
const auto requestId = result->Get()->RequestId;
IngressBytes += result->Get()->Result.size();
@@ -427,7 +427,7 @@ private:
++IsDoneCounter;
auto id = result->Get()->PathIndex;
const auto requestId = result->Get()->RequestId;
- const auto path = std::get<TString>(Paths[id - StartPathIndex]);
+ const auto path = Paths[id - StartPathIndex].Path;
LOG_W("TS3ReadActor", "Error while reading file " << path << ", details: ID: " << id << ", TEvReadError: " << result->Get()->Error.ToOneLineString() << ", request id: [" << requestId << "]");
auto issues = NS3Util::AddParentIssue(TStringBuilder{} << "Error while reading file " << path << " with request id [" << requestId << "]", TIssues{result->Get()->Error});
Send(ComputeActorId, new TEvAsyncInputError(InputIndex, std::move(issues), NYql::NDqProto::StatusIds::EXTERNAL_ERROR));
@@ -1266,15 +1266,14 @@ public:
DownloadInflight++;
const TPath& path = Paths[index];
const TString requestId = CreateGuidAsString();
- ui64 fileSize = std::get<std::size_t>(path);
- auto stuff = std::make_shared<TRetryStuff>(Gateway, Url + std::get<TString>(path), MakeHeaders(Token, requestId), fileSize, TxId, requestId, RetryPolicy);
+ auto stuff = std::make_shared<TRetryStuff>(Gateway, Url + path.Path, MakeHeaders(Token, requestId), path.Size, TxId, requestId, RetryPolicy);
auto pathIndex = index + StartPathIndex;
RetryStuffForFile.emplace(pathIndex, stuff);
if (TaskCounters) {
HttpInflightLimit->Add(Gateway->GetBuffersSizePerStream());
}
::NMonitoring::TDynamicCounters::TCounterPtr inflightCounter;
- auto impl = MakeHolder<TS3ReadCoroImpl>(InputIndex, TxId, ComputeActorId, stuff, ReadSpec, pathIndex, std::get<TString>(path), Url, MaxBlocksInFly, ArrowReader, ReadActorFactoryCfg, DeferredQueueSize, HttpInflightSize, HttpDataRps);
+ auto impl = MakeHolder<TS3ReadCoroImpl>(InputIndex, TxId, ComputeActorId, stuff, ReadSpec, pathIndex, path.Path, Url, MaxBlocksInFly, ArrowReader, ReadActorFactoryCfg, DeferredQueueSize, HttpInflightSize, HttpDataRps);
CoroActors.insert(RegisterWithSameMailbox(std::make_unique<TS3ReadCoroActor>(std::move(impl), std::move(stuff), pathIndex, impl->IsDownloadNeeded(), impl->HttpInflightSize).release()));
}
diff --git a/ydb/library/yql/providers/s3/proto/range.proto b/ydb/library/yql/providers/s3/proto/range.proto
index 975c0d559e..9e3ef2c58e 100644
--- a/ydb/library/yql/providers/s3/proto/range.proto
+++ b/ydb/library/yql/providers/s3/proto/range.proto
@@ -13,5 +13,6 @@ message TRange {
repeated TPath Children = 2;
uint64 Size = 3;
bool Read = 4; // Read this path
+ bool IsDirectory = 5;
}
}
diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp
index 2b71b9d0cd..cb708b15fe 100644
--- a/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp
+++ b/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp
@@ -22,7 +22,7 @@ bool ValidateS3PackedPaths(TPositionHandle pos, TStringBuf blob, bool isTextEnco
TPathList paths;
UnpackPathsList(blob, isTextEncoded, paths);
for (size_t i = 0; i < paths.size(); ++i) {
- if (std::get<0>(paths[i]).empty()) {
+ if (paths[i].Path.empty()) {
ctx.AddError(TIssue(ctx.GetPosition(pos), TStringBuilder() << "Expected non-empty path (index " << i << ")"));
return false;
}
diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp
index 56919944b9..28cabdb079 100644
--- a/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp
+++ b/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp
@@ -62,17 +62,20 @@ public:
ui64 Partition(const TDqSettings&, size_t maxPartitions, const TExprNode& node, TVector<TString>& partitions, TString*, TExprContext&, bool) override {
TString cluster;
- std::vector<std::vector<std::pair<TString, ui64>>> parts;
+ std::vector<std::vector<TPath>> parts;
if (const TMaybeNode<TDqSource> source = &node) {
cluster = source.Cast().DataSource().Cast<TS3DataSource>().Cluster().Value();
const auto settings = source.Cast().Settings().Cast<TS3SourceSettingsBase>();
for (auto i = 0u; i < settings.Paths().Size(); ++i) {
- const auto& path = settings.Paths().Item(i);
+ const auto& packed = settings.Paths().Item(i);
TPathList paths;
- UnpackPathsList(path.Data().Literal().Value(), FromString<bool>(path.IsText().Literal().Value()), paths);
+ UnpackPathsList(
+ packed.Data().Literal().Value(),
+ FromString<bool>(packed.IsText().Literal().Value()),
+ paths);
parts.reserve(parts.size() + paths.size());
- for (auto& p : paths) {
- parts.emplace_back(1U, std::pair(std::get<0>(p), std::get<1>(p)));
+ for (const auto& path : paths) {
+ parts.emplace_back(1U, path);
}
}
}
@@ -104,7 +107,10 @@ public:
NS3::TRange range;
range.SetStartPathIndex(startIdx);
TFileTreeBuilder builder;
- std::for_each(part.cbegin(), part.cend(), [&builder, &startIdx](const std::pair<TString, ui64>& f) { builder.AddPath(f.first, f.second); ++startIdx; });
+ std::for_each(part.cbegin(), part.cend(), [&builder, &startIdx](const TPath& f) {
+ builder.AddPath(f.Path, f.Size, f.IsDirectory);
+ ++startIdx;
+ });
builder.Save(&range);
partitions.emplace_back();
diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp
index 44d463de94..976f3e3ff2 100644
--- a/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp
+++ b/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp
@@ -177,7 +177,7 @@ private:
const auto& listEntries = std::get<IS3Lister::TListEntries>(listResult);
for (auto& entry : listEntries) {
- listedPaths.emplace_back(entry.Path, entry.Size);
+ listedPaths.emplace_back(entry.Path, entry.Size, false);
}
}
@@ -340,7 +340,7 @@ private:
}
auto& pathList = pathsByExtraValues[extraValues];
- pathList.emplace_back(entry.Path, entry.Size);
+ pathList.emplace_back(entry.Path, entry.Size, false);
readSize += entry.Size;
}
@@ -512,7 +512,7 @@ private:
req.Token = tokenStr;
req.Url = url;
for (const auto& directory : directories) {
- req.Pattern = NS3::NormalizePath(TStringBuilder() << std::get<0>(directory) << "/" << effectiveFilePattern);
+ req.Pattern = NS3::NormalizePath(TStringBuilder() << directory.Path << "/" << effectiveFilePattern);
RequestsByNode_[source.Raw()].push_back(req);
if (PendingRequests_.find(req) == PendingRequests_.end()) {
auto future = Lister_->List(req.Token, req.Url, req.Pattern);
diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_logical_opt.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_logical_opt.cpp
index 551c0e1d80..e62c23fab9 100644
--- a/ydb/library/yql/providers/s3/provider/yql_s3_logical_opt.cpp
+++ b/ydb/library/yql/providers/s3/provider/yql_s3_logical_opt.cpp
@@ -241,15 +241,13 @@ public:
UnpackPathsList(packed, isTextEncoded, paths);
for (auto& entry : paths) {
- const TString path = std::get<0>(entry);
- const size_t size = std::get<1>(entry);
- if (size > fileSizeLimit) {
+ if (entry.Size > fileSizeLimit) {
ctx.AddError(TIssue(ctx.GetPosition(batch.Pos()),
- TStringBuilder() << "Size of object " << path << " = " << size << " and exceeds limit = " << fileSizeLimit << " specified for format " << formatName));
+ TStringBuilder() << "Size of object " << entry.Path << " = " << entry.Size << " and exceeds limit = " << fileSizeLimit << " specified for format " << formatName));
hasErr = true;
return false;
}
- totalSize += size;
+ totalSize += entry.Size;
++count;
}
}
diff --git a/ydb/library/yql/providers/s3/range_helpers/file_tree_builder.cpp b/ydb/library/yql/providers/s3/range_helpers/file_tree_builder.cpp
index 166e097b98..cc5c313ac2 100644
--- a/ydb/library/yql/providers/s3/range_helpers/file_tree_builder.cpp
+++ b/ydb/library/yql/providers/s3/range_helpers/file_tree_builder.cpp
@@ -2,18 +2,21 @@
namespace NYql::NS3Details {
-void TFileTreeBuilder::AddPath(const TString& path, ui64 fileSize) {
+void TFileTreeBuilder::AddPath(const TString& path, ui64 fileSize, bool isDirectory) {
const auto parts = SplitPath(path);
- std::map<TString, TPath>* currentChildren = &Roots;
+ TPath::TFileTreeMap* currentChildren = &Roots;
for (size_t i = 0, size = parts.size(); i < size; ++i) {
- TPath& p = (*currentChildren)[parts[i]];
- if (i == size - 1) { // last
+ bool isSubDirectory = i != size - 1;
+ if (!isSubDirectory) {
+ TPath& p = (*currentChildren)[TTreeKey{parts[i], isDirectory}];
Y_VERIFY(p.FileSize == 0);
Y_VERIFY(!p.Read);
p.FileSize = fileSize;
p.Read = true;
} else {
+ TPath& p = (*currentChildren)[TTreeKey{parts[i], isSubDirectory}];
currentChildren = &p.Children;
+
}
}
}
@@ -24,8 +27,9 @@ void TFileTreeBuilder::Save(NS3::TRange* range) const {
}
}
-void TFileTreeBuilder::SaveImpl(NS3::TRange::TPath* path, const TString& name, const TPath& srcPath) const {
- path->SetName(name);
+void TFileTreeBuilder::SaveImpl(NS3::TRange::TPath* path, const TTreeKey& nodeKey, const TPath& srcPath) const {
+ path->SetName(nodeKey.Name);
+ path->SetIsDirectory(nodeKey.IsDirectory);
path->SetSize(srcPath.FileSize);
path->SetRead(srcPath.Read);
for (const auto& [n, p] : srcPath.Children) {
@@ -35,6 +39,9 @@ void TFileTreeBuilder::SaveImpl(NS3::TRange::TPath* path, const TString& name, c
std::vector<TString> TFileTreeBuilder::SplitPath(const TString& path) {
std::vector<TString> parts = StringSplitter(path).Split('/');
+ if (!path.empty() && path.back() == '/') {
+ parts.pop_back();
+ }
return parts;
}
diff --git a/ydb/library/yql/providers/s3/range_helpers/file_tree_builder.h b/ydb/library/yql/providers/s3/range_helpers/file_tree_builder.h
index 363c60f78b..0398789d7b 100644
--- a/ydb/library/yql/providers/s3/range_helpers/file_tree_builder.h
+++ b/ydb/library/yql/providers/s3/range_helpers/file_tree_builder.h
@@ -9,22 +9,32 @@
namespace NYql::NS3Details {
class TFileTreeBuilder {
+
+ struct TTreeKey {
+ TString Name;
+ bool IsDirectory = false;
+
+ std::strong_ordering operator<=>(const TTreeKey& other) const = default;
+ };
+
struct TPath {
+ using TFileTreeMap = std::map<TTreeKey, TPath>;
+
ui64 FileSize = 0;
bool Read = false;
- std::map<TString, TPath> Children;
+ TFileTreeMap Children;
};
public:
- void AddPath(const TString& path, ui64 fileSize);
+ void AddPath(const TString& path, ui64 fileSize, bool isDirectory);
void Save(NS3::TRange* range) const;
private:
- void SaveImpl(NS3::TRange::TPath* path, const TString& name, const TPath& srcPath) const;
+ void SaveImpl(NS3::TRange::TPath* path, const TTreeKey& nodeKey, const TPath& srcPath) const;
static std::vector<TString> SplitPath(const TString& path);
private:
- std::map<TString, TPath> Roots;
+ TPath::TFileTreeMap Roots;
};
} // namespace NYql::NS3Details
diff --git a/ydb/library/yql/providers/s3/range_helpers/file_tree_builder_ut.cpp b/ydb/library/yql/providers/s3/range_helpers/file_tree_builder_ut.cpp
index 8644c9acc0..cbecc1086a 100644
--- a/ydb/library/yql/providers/s3/range_helpers/file_tree_builder_ut.cpp
+++ b/ydb/library/yql/providers/s3/range_helpers/file_tree_builder_ut.cpp
@@ -18,7 +18,7 @@ Y_UNIT_TEST_SUITE(S3FileTreeBuilderTest) {
Y_UNIT_TEST(Simple) {
TFileTreeBuilder b;
- b.AddPath("name", 42);
+ b.AddPath("name", 42, false);
NS3::TRange range;
b.Save(&range);
@@ -27,68 +27,81 @@ Y_UNIT_TEST_SUITE(S3FileTreeBuilderTest) {
UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(0).GetName(), "name");
UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(0).GetSize(), 42);
UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(0).ChildrenSize(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(0).GetIsDirectory(), false);
}
Y_UNIT_TEST(Interesting) {
TFileTreeBuilder b;
- b.AddPath("name", 42);
- b.AddPath("root/folder/file", 100500);
- b.AddPath("root2/file", 10);
- b.AddPath("root2", 42);
- b.AddPath("root/folder/other_file", 22);
- b.AddPath("root/file/", 12);
+ b.AddPath("name", 42, false);
+ b.AddPath("root/folder/file", 100500, false);
+ b.AddPath("root2/file", 10, false);
+ b.AddPath("root2", 42, false);
+ b.AddPath("root/folder/other_file", 22, false);
+ b.AddPath("root/file/", 0, true);
NS3::TRange range;
b.Save(&range);
- UNIT_ASSERT_VALUES_EQUAL(range.PathsSize(), 3);
+ UNIT_ASSERT_VALUES_EQUAL(range.PathsSize(), 4);
// name
UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(0).GetName(), "name");
UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(0).GetSize(), 42);
UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(0).ChildrenSize(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(0).GetIsDirectory(), false);
- // root
+ // root/
UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetName(), "root");
UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetSize(), 0);
UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).ChildrenSize(), 2);
+ UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetIsDirectory(), true);
// root/file/
- UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(0).ChildrenSize(), 1);
UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(0).GetName(), "file");
- UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(0).GetChildren(0).GetName(), "");
- UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(0).GetChildren(0).GetSize(), 12);
- UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(0).GetChildren(0).ChildrenSize(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(0).GetSize(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(0).GetIsDirectory(), true);
+ UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(0).ChildrenSize(), 0);
- // root/folder
+ // root/folder/
UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).GetName(), "folder");
UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).GetSize(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).GetIsDirectory(), true);
UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).ChildrenSize(), 2);
// root/folder/file
UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).GetChildren(0).GetName(), "file");
UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).GetChildren(0).GetSize(), 100500);
+ UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).GetChildren(0).GetIsDirectory(), false);
UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).GetChildren(0).ChildrenSize(), 0);
// root/folder/other_file
UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).GetChildren(1).GetName(), "other_file");
UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).GetChildren(1).GetSize(), 22);
+ UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).GetChildren(1).GetIsDirectory(), false);
UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).GetChildren(1).ChildrenSize(), 0);
// root2
UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(2).GetName(), "root2");
UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(2).GetSize(), 42);
- UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(2).ChildrenSize(), 1);
+ UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(2).GetIsDirectory(), false);
+ UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(2).ChildrenSize(), 0);
+
+ // root2/
+ UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(3).GetName(), "root2");
+ UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(3).GetSize(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(3).GetIsDirectory(), true);
+ UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(3).ChildrenSize(), 1);
// root2/file
- UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(2).GetChildren(0).GetName(), "file");
- UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(2).GetChildren(0).GetSize(), 10);
- UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(2).GetChildren(0).ChildrenSize(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(3).GetChildren(0).GetName(), "file");
+ UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(3).GetChildren(0).GetSize(), 10);
+ UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(3).GetChildren(0).GetIsDirectory(), false);
+ UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(3).GetChildren(0).ChildrenSize(), 0);
}
Y_UNIT_TEST(PassesFileWithZeroSize) {
TFileTreeBuilder b;
- b.AddPath("name", 0);
+ b.AddPath("name", 0, false);
NS3::TRange range;
b.Save(&range);
@@ -98,13 +111,14 @@ Y_UNIT_TEST_SUITE(S3FileTreeBuilderTest) {
ReadPathsList({}, MakeParams(range), paths, startPathIndex);
UNIT_ASSERT_VALUES_EQUAL(paths.size(), 1);
- UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[0]), "name");
- UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[0]), 0);
+ UNIT_ASSERT_VALUES_EQUAL(paths[0].Path, "name");
+ UNIT_ASSERT_VALUES_EQUAL(paths[0].Size, 0);
+ UNIT_ASSERT_VALUES_EQUAL(paths[0].IsDirectory, false);
}
Y_UNIT_TEST(DeserializesManySlashes) {
TFileTreeBuilder b;
- b.AddPath("a///b", 42);
+ b.AddPath("a///b", 42, false);
NS3::TRange range;
b.Save(&range);
@@ -114,14 +128,15 @@ Y_UNIT_TEST_SUITE(S3FileTreeBuilderTest) {
ReadPathsList({}, MakeParams(range), paths, startPathIndex);
UNIT_ASSERT_VALUES_EQUAL(paths.size(), 1);
- UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[0]), "a///b");
- UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[0]), 42);
+ UNIT_ASSERT_VALUES_EQUAL(paths[0].Path, "a///b");
+ UNIT_ASSERT_VALUES_EQUAL(paths[0].Size, 42);
+ UNIT_ASSERT_VALUES_EQUAL(paths[0].IsDirectory, false);
}
Y_UNIT_TEST(DeserializesTrailingSlash) {
TFileTreeBuilder b;
- b.AddPath("root/name//", 3);
- b.AddPath("root/name/", 0);
+ b.AddPath("root/name//", 3, true);
+ b.AddPath("root/name/", 0, true);
NS3::TRange range;
b.Save(&range);
@@ -131,17 +146,19 @@ Y_UNIT_TEST_SUITE(S3FileTreeBuilderTest) {
ReadPathsList({}, MakeParams(range), paths, startPathIndex);
UNIT_ASSERT_VALUES_EQUAL(paths.size(), 2);
- UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[0]), "root/name/");
- UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[0]), 0);
+ UNIT_ASSERT_VALUES_EQUAL(paths[0].Path, "root/name/");
+ UNIT_ASSERT_VALUES_EQUAL(paths[0].Size, 0);
+ UNIT_ASSERT_VALUES_EQUAL(paths[0].IsDirectory, true);
- UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[1]), "root/name//");
- UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[1]), 3);
+ UNIT_ASSERT_VALUES_EQUAL(paths[1].Path, "root/name//");
+ UNIT_ASSERT_VALUES_EQUAL(paths[1].Size, 3);
+ UNIT_ASSERT_VALUES_EQUAL(paths[1].IsDirectory, true);
}
Y_UNIT_TEST(DeserializesLeadingSlash) {
TFileTreeBuilder b;
- b.AddPath("/root/name", 3);
- b.AddPath("/", 42);
+ b.AddPath("/root/name", 3, false);
+ b.AddPath("/", 42, true);
NS3::TRange range;
b.Save(&range);
@@ -152,12 +169,14 @@ Y_UNIT_TEST_SUITE(S3FileTreeBuilderTest) {
UNIT_ASSERT_VALUES_EQUAL(paths.size(), 2);
- UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[0]), "/");
- UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[0]), 42);
+ UNIT_ASSERT_VALUES_EQUAL(paths[0].Path, "/");
+ UNIT_ASSERT_VALUES_EQUAL(paths[0].Size, 42);
+ UNIT_ASSERT_VALUES_EQUAL(paths[0].IsDirectory, true);
- UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[1]), "/root/name");
- UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[1]), 3);
+ UNIT_ASSERT_VALUES_EQUAL(paths[1].Path, "/root/name");
+ UNIT_ASSERT_VALUES_EQUAL(paths[1].Size, 3);
+ UNIT_ASSERT_VALUES_EQUAL(paths[1].IsDirectory, false);
}
}
-} // namespace NYql::NS3Details
+} // namespace NYql::NS3Details \ No newline at end of file
diff --git a/ydb/library/yql/providers/s3/range_helpers/path_list_reader.cpp b/ydb/library/yql/providers/s3/range_helpers/path_list_reader.cpp
index 4e5d9b6163..8401f249f8 100644
--- a/ydb/library/yql/providers/s3/range_helpers/path_list_reader.cpp
+++ b/ydb/library/yql/providers/s3/range_helpers/path_list_reader.cpp
@@ -24,7 +24,9 @@ static void BuildPathsFromTree(const google::protobuf::RepeatedPtrField<NYql::NS
const size_t prevSize = currentPath.size();
currentPath += path.GetName();
if (path.GetRead()) {
- paths.emplace_back(currentPath, path.GetSize());
+ auto isDirectory = path.GetIsDirectory();
+ auto readPath = isDirectory ? currentPath + "/" : currentPath;
+ paths.emplace_back(TPath{readPath, path.GetSize(), path.GetIsDirectory()});
}
BuildPathsFromTree(path.GetChildren(), paths, currentPath, currentDepth + 1);
currentPath.resize(prevSize);
@@ -53,19 +55,22 @@ void ReadPathsList(const NS3::TSource& sourceDesc, const THashMap<TString, TStri
const auto& path = range.GetDeprecatedPath().Get(i);
auto it = map.find(path);
YQL_ENSURE(it != map.end());
- paths.emplace_back(path, it->second);
+ paths.emplace_back(TPath{path, it->second, false});
}
} else {
for (auto i = 0; i < sourceDesc.GetDeprecatedPath().size(); ++i) {
- paths.emplace_back(sourceDesc.GetDeprecatedPath().Get(i).GetPath(), sourceDesc.GetDeprecatedPath().Get(i).GetSize());
+ paths.emplace_back(TPath{
+ sourceDesc.GetDeprecatedPath().Get(i).GetPath(),
+ sourceDesc.GetDeprecatedPath().Get(i).GetSize(),
+ false});
}
}
}
void PackPathsList(const TPathList& paths, TString& packed, bool& isTextEncoded) {
TFileTreeBuilder builder;
- for (auto& item : paths) {
- builder.AddPath(std::get<0>(item), std::get<1>(item));
+ for (const auto& [path, size, isDirectory] : paths) {
+ builder.AddPath(path, size, isDirectory);
}
NS3::TRange range;
builder.Save(&range);
diff --git a/ydb/library/yql/providers/s3/range_helpers/path_list_reader.h b/ydb/library/yql/providers/s3/range_helpers/path_list_reader.h
index 8f67336878..478360c4e0 100644
--- a/ydb/library/yql/providers/s3/range_helpers/path_list_reader.h
+++ b/ydb/library/yql/providers/s3/range_helpers/path_list_reader.h
@@ -9,7 +9,16 @@
namespace NYql::NS3Details {
-using TPath = std::tuple<TString, size_t>;
+struct TPath {
+ TString Path;
+ size_t Size = 0;
+ bool IsDirectory = false;
+
+ TPath(TString path, size_t size, bool isDirectory)
+ : Path(std::move(path))
+ , Size(size)
+ , IsDirectory(isDirectory) { }
+};
using TPathList = std::vector<TPath>;
void ReadPathsList(const NS3::TSource& sourceDesc, const THashMap<TString, TString>& taskParams, TPathList& paths, ui64& startPathIndex);
diff --git a/ydb/library/yql/providers/s3/range_helpers/path_list_reader_ut.cpp b/ydb/library/yql/providers/s3/range_helpers/path_list_reader_ut.cpp
index a736e505d5..4fe81aa11f 100644
--- a/ydb/library/yql/providers/s3/range_helpers/path_list_reader_ut.cpp
+++ b/ydb/library/yql/providers/s3/range_helpers/path_list_reader_ut.cpp
@@ -38,11 +38,13 @@ Y_UNIT_TEST_SUITE(PathListReaderTest) {
UNIT_ASSERT_VALUES_EQUAL(startPathIndex, 42);
UNIT_ASSERT_VALUES_EQUAL(paths.size(), 2);
- UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[0]), "my/path");
- UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[0]), 100500);
+ UNIT_ASSERT_VALUES_EQUAL(paths[0].Path, "my/path");
+ UNIT_ASSERT_VALUES_EQUAL(paths[0].Size, 100500);
+ UNIT_ASSERT_VALUES_EQUAL(paths[0].IsDirectory, false);
- UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[1]), "other/path");
- UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[1]), 1);
+ UNIT_ASSERT_VALUES_EQUAL(paths[1].Path, "other/path");
+ UNIT_ASSERT_VALUES_EQUAL(paths[1].Size, 1);
+ UNIT_ASSERT_VALUES_EQUAL(paths[1].IsDirectory, false);
}
Y_UNIT_TEST(ReadsFilesListFromParamsAndSourceSettings) {
@@ -69,8 +71,9 @@ Y_UNIT_TEST_SUITE(PathListReaderTest) {
UNIT_ASSERT_VALUES_EQUAL(startPathIndex, 42);
UNIT_ASSERT_VALUES_EQUAL(paths.size(), 1);
- UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[0]), "my/path");
- UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[0]), 100500);
+ UNIT_ASSERT_VALUES_EQUAL(paths[0].Path, "my/path");
+ UNIT_ASSERT_VALUES_EQUAL(paths[0].Size, 100500);
+ UNIT_ASSERT_VALUES_EQUAL(paths[0].IsDirectory, false);
}
NYql::NS3::TRange::TPath* SetPath(NYql::NS3::TRange::TPath* path, const TString& name = {}, ui64 size = 0, bool read = false) {
@@ -114,20 +117,25 @@ Y_UNIT_TEST_SUITE(PathListReaderTest) {
UNIT_ASSERT_VALUES_EQUAL(startPathIndex, 42);
UNIT_ASSERT_VALUES_EQUAL(paths.size(), 5);
- UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[0]), "root");
- UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[0]), 1);
+ UNIT_ASSERT_VALUES_EQUAL(paths[0].Path, "root");
+ UNIT_ASSERT_VALUES_EQUAL(paths[0].Size, 1);
+ UNIT_ASSERT_VALUES_EQUAL(paths[0].IsDirectory, false);
- UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[1]), "root/folder/f1");
- UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[1]), 42);
+ UNIT_ASSERT_VALUES_EQUAL(paths[1].Path, "root/folder/f1");
+ UNIT_ASSERT_VALUES_EQUAL(paths[1].Size, 42);
+ UNIT_ASSERT_VALUES_EQUAL(paths[1].IsDirectory, false);
- UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[2]), "root/folder/f2");
- UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[2]), 100500);
+ UNIT_ASSERT_VALUES_EQUAL(paths[2].Path, "root/folder/f2");
+ UNIT_ASSERT_VALUES_EQUAL(paths[2].Size, 100500);
+ UNIT_ASSERT_VALUES_EQUAL(paths[2].IsDirectory, false);
- UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[3]), "root/f3");
- UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[3]), 0);
+ UNIT_ASSERT_VALUES_EQUAL(paths[3].Path, "root/f3");
+ UNIT_ASSERT_VALUES_EQUAL(paths[3].Size, 0);
+ UNIT_ASSERT_VALUES_EQUAL(paths[3].IsDirectory, false);
- UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[4]), "root2/f4");
- UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[4]), 42);
+ UNIT_ASSERT_VALUES_EQUAL(paths[4].Path, "root2/f4");
+ UNIT_ASSERT_VALUES_EQUAL(paths[4].Size, 42);
+ UNIT_ASSERT_VALUES_EQUAL(paths[4].IsDirectory, false);
}
}