diff options
author | auzhegov <auzhegov@yandex-team.com> | 2023-02-16 12:28:29 +0300 |
---|---|---|
committer | auzhegov <auzhegov@yandex-team.com> | 2023-02-16 12:28:29 +0300 |
commit | a550b15e307f7b41ef2cca24df4a4fd80efdbbc6 (patch) | |
tree | 51afb79bc436128ccfae2886e896eedd588371d2 | |
parent | 463633625b0cd4f6dac71bb122e40d0c2bd856d7 (diff) | |
download | ydb-a550b15e307f7b41ef2cca24df4a4fd80efdbbc6.tar.gz |
[ListingLargeDirectories] Change message format
Initial implementation
12 files changed, 153 insertions, 91 deletions
diff --git a/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp b/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp index 81ba304ce2..d29c26cd8d 100644 --- a/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp +++ b/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp @@ -310,11 +310,11 @@ public: void StartDownload(size_t index) { DownloadInflight++; const TPath& path = Paths[index]; - auto url = Url + std::get<TString>(path); + auto url = Url + path.Path; auto id = index + StartPathIndex; const TString requestId = CreateGuidAsString(); LOG_D("TS3ReadActor", "Download: " << url << ", ID: " << id << ", request id: [" << requestId << "]"); - Gateway->Download(url, MakeHeaders(Token, requestId), 0U, std::min(std::get<size_t>(path), SizeLimit), + Gateway->Download(url, MakeHeaders(Token, requestId), 0U, std::min(path.Size, SizeLimit), std::bind(&TS3ReadActor::OnDownloadFinished, ActorSystem, SelfId(), requestId, std::placeholders::_1, id), {}, RetryPolicy); } @@ -392,7 +392,7 @@ private: void Handle(TEvPrivate::TEvReadResult::TPtr& result) { ++IsDoneCounter; const auto id = result->Get()->PathIndex; - const auto path = std::get<TString>(Paths[id - StartPathIndex]); + const auto path = Paths[id - StartPathIndex].Path; const auto httpCode = result->Get()->Result.HttpResponseCode; const auto requestId = result->Get()->RequestId; IngressBytes += result->Get()->Result.size(); @@ -427,7 +427,7 @@ private: ++IsDoneCounter; auto id = result->Get()->PathIndex; const auto requestId = result->Get()->RequestId; - const auto path = std::get<TString>(Paths[id - StartPathIndex]); + const auto path = Paths[id - StartPathIndex].Path; LOG_W("TS3ReadActor", "Error while reading file " << path << ", details: ID: " << id << ", TEvReadError: " << result->Get()->Error.ToOneLineString() << ", request id: [" << requestId << "]"); auto issues = NS3Util::AddParentIssue(TStringBuilder{} << "Error while reading file " << path << " with request id [" << requestId << "]", TIssues{result->Get()->Error}); Send(ComputeActorId, new TEvAsyncInputError(InputIndex, std::move(issues), NYql::NDqProto::StatusIds::EXTERNAL_ERROR)); @@ -1266,15 +1266,14 @@ public: DownloadInflight++; const TPath& path = Paths[index]; const TString requestId = CreateGuidAsString(); - ui64 fileSize = std::get<std::size_t>(path); - auto stuff = std::make_shared<TRetryStuff>(Gateway, Url + std::get<TString>(path), MakeHeaders(Token, requestId), fileSize, TxId, requestId, RetryPolicy); + auto stuff = std::make_shared<TRetryStuff>(Gateway, Url + path.Path, MakeHeaders(Token, requestId), path.Size, TxId, requestId, RetryPolicy); auto pathIndex = index + StartPathIndex; RetryStuffForFile.emplace(pathIndex, stuff); if (TaskCounters) { HttpInflightLimit->Add(Gateway->GetBuffersSizePerStream()); } ::NMonitoring::TDynamicCounters::TCounterPtr inflightCounter; - auto impl = MakeHolder<TS3ReadCoroImpl>(InputIndex, TxId, ComputeActorId, stuff, ReadSpec, pathIndex, std::get<TString>(path), Url, MaxBlocksInFly, ArrowReader, ReadActorFactoryCfg, DeferredQueueSize, HttpInflightSize, HttpDataRps); + auto impl = MakeHolder<TS3ReadCoroImpl>(InputIndex, TxId, ComputeActorId, stuff, ReadSpec, pathIndex, path.Path, Url, MaxBlocksInFly, ArrowReader, ReadActorFactoryCfg, DeferredQueueSize, HttpInflightSize, HttpDataRps); CoroActors.insert(RegisterWithSameMailbox(std::make_unique<TS3ReadCoroActor>(std::move(impl), std::move(stuff), pathIndex, impl->IsDownloadNeeded(), impl->HttpInflightSize).release())); } diff --git a/ydb/library/yql/providers/s3/proto/range.proto b/ydb/library/yql/providers/s3/proto/range.proto index 975c0d559e..9e3ef2c58e 100644 --- a/ydb/library/yql/providers/s3/proto/range.proto +++ b/ydb/library/yql/providers/s3/proto/range.proto @@ -13,5 +13,6 @@ message TRange { repeated TPath Children = 2; uint64 Size = 3; bool Read = 4; // Read this path + bool IsDirectory = 5; } } diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp index 2b71b9d0cd..cb708b15fe 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp @@ -22,7 +22,7 @@ bool ValidateS3PackedPaths(TPositionHandle pos, TStringBuf blob, bool isTextEnco TPathList paths; UnpackPathsList(blob, isTextEncoded, paths); for (size_t i = 0; i < paths.size(); ++i) { - if (std::get<0>(paths[i]).empty()) { + if (paths[i].Path.empty()) { ctx.AddError(TIssue(ctx.GetPosition(pos), TStringBuilder() << "Expected non-empty path (index " << i << ")")); return false; } diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp index 56919944b9..28cabdb079 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp @@ -62,17 +62,20 @@ public: ui64 Partition(const TDqSettings&, size_t maxPartitions, const TExprNode& node, TVector<TString>& partitions, TString*, TExprContext&, bool) override { TString cluster; - std::vector<std::vector<std::pair<TString, ui64>>> parts; + std::vector<std::vector<TPath>> parts; if (const TMaybeNode<TDqSource> source = &node) { cluster = source.Cast().DataSource().Cast<TS3DataSource>().Cluster().Value(); const auto settings = source.Cast().Settings().Cast<TS3SourceSettingsBase>(); for (auto i = 0u; i < settings.Paths().Size(); ++i) { - const auto& path = settings.Paths().Item(i); + const auto& packed = settings.Paths().Item(i); TPathList paths; - UnpackPathsList(path.Data().Literal().Value(), FromString<bool>(path.IsText().Literal().Value()), paths); + UnpackPathsList( + packed.Data().Literal().Value(), + FromString<bool>(packed.IsText().Literal().Value()), + paths); parts.reserve(parts.size() + paths.size()); - for (auto& p : paths) { - parts.emplace_back(1U, std::pair(std::get<0>(p), std::get<1>(p))); + for (const auto& path : paths) { + parts.emplace_back(1U, path); } } } @@ -104,7 +107,10 @@ public: NS3::TRange range; range.SetStartPathIndex(startIdx); TFileTreeBuilder builder; - std::for_each(part.cbegin(), part.cend(), [&builder, &startIdx](const std::pair<TString, ui64>& f) { builder.AddPath(f.first, f.second); ++startIdx; }); + std::for_each(part.cbegin(), part.cend(), [&builder, &startIdx](const TPath& f) { + builder.AddPath(f.Path, f.Size, f.IsDirectory); + ++startIdx; + }); builder.Save(&range); partitions.emplace_back(); diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp index 44d463de94..976f3e3ff2 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp @@ -177,7 +177,7 @@ private: const auto& listEntries = std::get<IS3Lister::TListEntries>(listResult); for (auto& entry : listEntries) { - listedPaths.emplace_back(entry.Path, entry.Size); + listedPaths.emplace_back(entry.Path, entry.Size, false); } } @@ -340,7 +340,7 @@ private: } auto& pathList = pathsByExtraValues[extraValues]; - pathList.emplace_back(entry.Path, entry.Size); + pathList.emplace_back(entry.Path, entry.Size, false); readSize += entry.Size; } @@ -512,7 +512,7 @@ private: req.Token = tokenStr; req.Url = url; for (const auto& directory : directories) { - req.Pattern = NS3::NormalizePath(TStringBuilder() << std::get<0>(directory) << "/" << effectiveFilePattern); + req.Pattern = NS3::NormalizePath(TStringBuilder() << directory.Path << "/" << effectiveFilePattern); RequestsByNode_[source.Raw()].push_back(req); if (PendingRequests_.find(req) == PendingRequests_.end()) { auto future = Lister_->List(req.Token, req.Url, req.Pattern); diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_logical_opt.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_logical_opt.cpp index 551c0e1d80..e62c23fab9 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_logical_opt.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_logical_opt.cpp @@ -241,15 +241,13 @@ public: UnpackPathsList(packed, isTextEncoded, paths); for (auto& entry : paths) { - const TString path = std::get<0>(entry); - const size_t size = std::get<1>(entry); - if (size > fileSizeLimit) { + if (entry.Size > fileSizeLimit) { ctx.AddError(TIssue(ctx.GetPosition(batch.Pos()), - TStringBuilder() << "Size of object " << path << " = " << size << " and exceeds limit = " << fileSizeLimit << " specified for format " << formatName)); + TStringBuilder() << "Size of object " << entry.Path << " = " << entry.Size << " and exceeds limit = " << fileSizeLimit << " specified for format " << formatName)); hasErr = true; return false; } - totalSize += size; + totalSize += entry.Size; ++count; } } diff --git a/ydb/library/yql/providers/s3/range_helpers/file_tree_builder.cpp b/ydb/library/yql/providers/s3/range_helpers/file_tree_builder.cpp index 166e097b98..cc5c313ac2 100644 --- a/ydb/library/yql/providers/s3/range_helpers/file_tree_builder.cpp +++ b/ydb/library/yql/providers/s3/range_helpers/file_tree_builder.cpp @@ -2,18 +2,21 @@ namespace NYql::NS3Details { -void TFileTreeBuilder::AddPath(const TString& path, ui64 fileSize) { +void TFileTreeBuilder::AddPath(const TString& path, ui64 fileSize, bool isDirectory) { const auto parts = SplitPath(path); - std::map<TString, TPath>* currentChildren = &Roots; + TPath::TFileTreeMap* currentChildren = &Roots; for (size_t i = 0, size = parts.size(); i < size; ++i) { - TPath& p = (*currentChildren)[parts[i]]; - if (i == size - 1) { // last + bool isSubDirectory = i != size - 1; + if (!isSubDirectory) { + TPath& p = (*currentChildren)[TTreeKey{parts[i], isDirectory}]; Y_VERIFY(p.FileSize == 0); Y_VERIFY(!p.Read); p.FileSize = fileSize; p.Read = true; } else { + TPath& p = (*currentChildren)[TTreeKey{parts[i], isSubDirectory}]; currentChildren = &p.Children; + } } } @@ -24,8 +27,9 @@ void TFileTreeBuilder::Save(NS3::TRange* range) const { } } -void TFileTreeBuilder::SaveImpl(NS3::TRange::TPath* path, const TString& name, const TPath& srcPath) const { - path->SetName(name); +void TFileTreeBuilder::SaveImpl(NS3::TRange::TPath* path, const TTreeKey& nodeKey, const TPath& srcPath) const { + path->SetName(nodeKey.Name); + path->SetIsDirectory(nodeKey.IsDirectory); path->SetSize(srcPath.FileSize); path->SetRead(srcPath.Read); for (const auto& [n, p] : srcPath.Children) { @@ -35,6 +39,9 @@ void TFileTreeBuilder::SaveImpl(NS3::TRange::TPath* path, const TString& name, c std::vector<TString> TFileTreeBuilder::SplitPath(const TString& path) { std::vector<TString> parts = StringSplitter(path).Split('/'); + if (!path.empty() && path.back() == '/') { + parts.pop_back(); + } return parts; } diff --git a/ydb/library/yql/providers/s3/range_helpers/file_tree_builder.h b/ydb/library/yql/providers/s3/range_helpers/file_tree_builder.h index 363c60f78b..0398789d7b 100644 --- a/ydb/library/yql/providers/s3/range_helpers/file_tree_builder.h +++ b/ydb/library/yql/providers/s3/range_helpers/file_tree_builder.h @@ -9,22 +9,32 @@ namespace NYql::NS3Details { class TFileTreeBuilder { + + struct TTreeKey { + TString Name; + bool IsDirectory = false; + + std::strong_ordering operator<=>(const TTreeKey& other) const = default; + }; + struct TPath { + using TFileTreeMap = std::map<TTreeKey, TPath>; + ui64 FileSize = 0; bool Read = false; - std::map<TString, TPath> Children; + TFileTreeMap Children; }; public: - void AddPath(const TString& path, ui64 fileSize); + void AddPath(const TString& path, ui64 fileSize, bool isDirectory); void Save(NS3::TRange* range) const; private: - void SaveImpl(NS3::TRange::TPath* path, const TString& name, const TPath& srcPath) const; + void SaveImpl(NS3::TRange::TPath* path, const TTreeKey& nodeKey, const TPath& srcPath) const; static std::vector<TString> SplitPath(const TString& path); private: - std::map<TString, TPath> Roots; + TPath::TFileTreeMap Roots; }; } // namespace NYql::NS3Details diff --git a/ydb/library/yql/providers/s3/range_helpers/file_tree_builder_ut.cpp b/ydb/library/yql/providers/s3/range_helpers/file_tree_builder_ut.cpp index 8644c9acc0..cbecc1086a 100644 --- a/ydb/library/yql/providers/s3/range_helpers/file_tree_builder_ut.cpp +++ b/ydb/library/yql/providers/s3/range_helpers/file_tree_builder_ut.cpp @@ -18,7 +18,7 @@ Y_UNIT_TEST_SUITE(S3FileTreeBuilderTest) { Y_UNIT_TEST(Simple) { TFileTreeBuilder b; - b.AddPath("name", 42); + b.AddPath("name", 42, false); NS3::TRange range; b.Save(&range); @@ -27,68 +27,81 @@ Y_UNIT_TEST_SUITE(S3FileTreeBuilderTest) { UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(0).GetName(), "name"); UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(0).GetSize(), 42); UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(0).ChildrenSize(), 0); + UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(0).GetIsDirectory(), false); } Y_UNIT_TEST(Interesting) { TFileTreeBuilder b; - b.AddPath("name", 42); - b.AddPath("root/folder/file", 100500); - b.AddPath("root2/file", 10); - b.AddPath("root2", 42); - b.AddPath("root/folder/other_file", 22); - b.AddPath("root/file/", 12); + b.AddPath("name", 42, false); + b.AddPath("root/folder/file", 100500, false); + b.AddPath("root2/file", 10, false); + b.AddPath("root2", 42, false); + b.AddPath("root/folder/other_file", 22, false); + b.AddPath("root/file/", 0, true); NS3::TRange range; b.Save(&range); - UNIT_ASSERT_VALUES_EQUAL(range.PathsSize(), 3); + UNIT_ASSERT_VALUES_EQUAL(range.PathsSize(), 4); // name UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(0).GetName(), "name"); UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(0).GetSize(), 42); UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(0).ChildrenSize(), 0); + UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(0).GetIsDirectory(), false); - // root + // root/ UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetName(), "root"); UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetSize(), 0); UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).ChildrenSize(), 2); + UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetIsDirectory(), true); // root/file/ - UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(0).ChildrenSize(), 1); UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(0).GetName(), "file"); - UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(0).GetChildren(0).GetName(), ""); - UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(0).GetChildren(0).GetSize(), 12); - UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(0).GetChildren(0).ChildrenSize(), 0); + UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(0).GetSize(), 0); + UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(0).GetIsDirectory(), true); + UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(0).ChildrenSize(), 0); - // root/folder + // root/folder/ UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).GetName(), "folder"); UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).GetSize(), 0); + UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).GetIsDirectory(), true); UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).ChildrenSize(), 2); // root/folder/file UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).GetChildren(0).GetName(), "file"); UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).GetChildren(0).GetSize(), 100500); + UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).GetChildren(0).GetIsDirectory(), false); UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).GetChildren(0).ChildrenSize(), 0); // root/folder/other_file UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).GetChildren(1).GetName(), "other_file"); UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).GetChildren(1).GetSize(), 22); + UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).GetChildren(1).GetIsDirectory(), false); UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(1).GetChildren(1).GetChildren(1).ChildrenSize(), 0); // root2 UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(2).GetName(), "root2"); UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(2).GetSize(), 42); - UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(2).ChildrenSize(), 1); + UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(2).GetIsDirectory(), false); + UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(2).ChildrenSize(), 0); + + // root2/ + UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(3).GetName(), "root2"); + UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(3).GetSize(), 0); + UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(3).GetIsDirectory(), true); + UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(3).ChildrenSize(), 1); // root2/file - UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(2).GetChildren(0).GetName(), "file"); - UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(2).GetChildren(0).GetSize(), 10); - UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(2).GetChildren(0).ChildrenSize(), 0); + UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(3).GetChildren(0).GetName(), "file"); + UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(3).GetChildren(0).GetSize(), 10); + UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(3).GetChildren(0).GetIsDirectory(), false); + UNIT_ASSERT_VALUES_EQUAL(range.GetPaths(3).GetChildren(0).ChildrenSize(), 0); } Y_UNIT_TEST(PassesFileWithZeroSize) { TFileTreeBuilder b; - b.AddPath("name", 0); + b.AddPath("name", 0, false); NS3::TRange range; b.Save(&range); @@ -98,13 +111,14 @@ Y_UNIT_TEST_SUITE(S3FileTreeBuilderTest) { ReadPathsList({}, MakeParams(range), paths, startPathIndex); UNIT_ASSERT_VALUES_EQUAL(paths.size(), 1); - UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[0]), "name"); - UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[0]), 0); + UNIT_ASSERT_VALUES_EQUAL(paths[0].Path, "name"); + UNIT_ASSERT_VALUES_EQUAL(paths[0].Size, 0); + UNIT_ASSERT_VALUES_EQUAL(paths[0].IsDirectory, false); } Y_UNIT_TEST(DeserializesManySlashes) { TFileTreeBuilder b; - b.AddPath("a///b", 42); + b.AddPath("a///b", 42, false); NS3::TRange range; b.Save(&range); @@ -114,14 +128,15 @@ Y_UNIT_TEST_SUITE(S3FileTreeBuilderTest) { ReadPathsList({}, MakeParams(range), paths, startPathIndex); UNIT_ASSERT_VALUES_EQUAL(paths.size(), 1); - UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[0]), "a///b"); - UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[0]), 42); + UNIT_ASSERT_VALUES_EQUAL(paths[0].Path, "a///b"); + UNIT_ASSERT_VALUES_EQUAL(paths[0].Size, 42); + UNIT_ASSERT_VALUES_EQUAL(paths[0].IsDirectory, false); } Y_UNIT_TEST(DeserializesTrailingSlash) { TFileTreeBuilder b; - b.AddPath("root/name//", 3); - b.AddPath("root/name/", 0); + b.AddPath("root/name//", 3, true); + b.AddPath("root/name/", 0, true); NS3::TRange range; b.Save(&range); @@ -131,17 +146,19 @@ Y_UNIT_TEST_SUITE(S3FileTreeBuilderTest) { ReadPathsList({}, MakeParams(range), paths, startPathIndex); UNIT_ASSERT_VALUES_EQUAL(paths.size(), 2); - UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[0]), "root/name/"); - UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[0]), 0); + UNIT_ASSERT_VALUES_EQUAL(paths[0].Path, "root/name/"); + UNIT_ASSERT_VALUES_EQUAL(paths[0].Size, 0); + UNIT_ASSERT_VALUES_EQUAL(paths[0].IsDirectory, true); - UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[1]), "root/name//"); - UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[1]), 3); + UNIT_ASSERT_VALUES_EQUAL(paths[1].Path, "root/name//"); + UNIT_ASSERT_VALUES_EQUAL(paths[1].Size, 3); + UNIT_ASSERT_VALUES_EQUAL(paths[1].IsDirectory, true); } Y_UNIT_TEST(DeserializesLeadingSlash) { TFileTreeBuilder b; - b.AddPath("/root/name", 3); - b.AddPath("/", 42); + b.AddPath("/root/name", 3, false); + b.AddPath("/", 42, true); NS3::TRange range; b.Save(&range); @@ -152,12 +169,14 @@ Y_UNIT_TEST_SUITE(S3FileTreeBuilderTest) { UNIT_ASSERT_VALUES_EQUAL(paths.size(), 2); - UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[0]), "/"); - UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[0]), 42); + UNIT_ASSERT_VALUES_EQUAL(paths[0].Path, "/"); + UNIT_ASSERT_VALUES_EQUAL(paths[0].Size, 42); + UNIT_ASSERT_VALUES_EQUAL(paths[0].IsDirectory, true); - UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[1]), "/root/name"); - UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[1]), 3); + UNIT_ASSERT_VALUES_EQUAL(paths[1].Path, "/root/name"); + UNIT_ASSERT_VALUES_EQUAL(paths[1].Size, 3); + UNIT_ASSERT_VALUES_EQUAL(paths[1].IsDirectory, false); } } -} // namespace NYql::NS3Details +} // namespace NYql::NS3Details
\ No newline at end of file diff --git a/ydb/library/yql/providers/s3/range_helpers/path_list_reader.cpp b/ydb/library/yql/providers/s3/range_helpers/path_list_reader.cpp index 4e5d9b6163..8401f249f8 100644 --- a/ydb/library/yql/providers/s3/range_helpers/path_list_reader.cpp +++ b/ydb/library/yql/providers/s3/range_helpers/path_list_reader.cpp @@ -24,7 +24,9 @@ static void BuildPathsFromTree(const google::protobuf::RepeatedPtrField<NYql::NS const size_t prevSize = currentPath.size(); currentPath += path.GetName(); if (path.GetRead()) { - paths.emplace_back(currentPath, path.GetSize()); + auto isDirectory = path.GetIsDirectory(); + auto readPath = isDirectory ? currentPath + "/" : currentPath; + paths.emplace_back(TPath{readPath, path.GetSize(), path.GetIsDirectory()}); } BuildPathsFromTree(path.GetChildren(), paths, currentPath, currentDepth + 1); currentPath.resize(prevSize); @@ -53,19 +55,22 @@ void ReadPathsList(const NS3::TSource& sourceDesc, const THashMap<TString, TStri const auto& path = range.GetDeprecatedPath().Get(i); auto it = map.find(path); YQL_ENSURE(it != map.end()); - paths.emplace_back(path, it->second); + paths.emplace_back(TPath{path, it->second, false}); } } else { for (auto i = 0; i < sourceDesc.GetDeprecatedPath().size(); ++i) { - paths.emplace_back(sourceDesc.GetDeprecatedPath().Get(i).GetPath(), sourceDesc.GetDeprecatedPath().Get(i).GetSize()); + paths.emplace_back(TPath{ + sourceDesc.GetDeprecatedPath().Get(i).GetPath(), + sourceDesc.GetDeprecatedPath().Get(i).GetSize(), + false}); } } } void PackPathsList(const TPathList& paths, TString& packed, bool& isTextEncoded) { TFileTreeBuilder builder; - for (auto& item : paths) { - builder.AddPath(std::get<0>(item), std::get<1>(item)); + for (const auto& [path, size, isDirectory] : paths) { + builder.AddPath(path, size, isDirectory); } NS3::TRange range; builder.Save(&range); diff --git a/ydb/library/yql/providers/s3/range_helpers/path_list_reader.h b/ydb/library/yql/providers/s3/range_helpers/path_list_reader.h index 8f67336878..478360c4e0 100644 --- a/ydb/library/yql/providers/s3/range_helpers/path_list_reader.h +++ b/ydb/library/yql/providers/s3/range_helpers/path_list_reader.h @@ -9,7 +9,16 @@ namespace NYql::NS3Details { -using TPath = std::tuple<TString, size_t>; +struct TPath { + TString Path; + size_t Size = 0; + bool IsDirectory = false; + + TPath(TString path, size_t size, bool isDirectory) + : Path(std::move(path)) + , Size(size) + , IsDirectory(isDirectory) { } +}; using TPathList = std::vector<TPath>; void ReadPathsList(const NS3::TSource& sourceDesc, const THashMap<TString, TString>& taskParams, TPathList& paths, ui64& startPathIndex); diff --git a/ydb/library/yql/providers/s3/range_helpers/path_list_reader_ut.cpp b/ydb/library/yql/providers/s3/range_helpers/path_list_reader_ut.cpp index a736e505d5..4fe81aa11f 100644 --- a/ydb/library/yql/providers/s3/range_helpers/path_list_reader_ut.cpp +++ b/ydb/library/yql/providers/s3/range_helpers/path_list_reader_ut.cpp @@ -38,11 +38,13 @@ Y_UNIT_TEST_SUITE(PathListReaderTest) { UNIT_ASSERT_VALUES_EQUAL(startPathIndex, 42); UNIT_ASSERT_VALUES_EQUAL(paths.size(), 2); - UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[0]), "my/path"); - UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[0]), 100500); + UNIT_ASSERT_VALUES_EQUAL(paths[0].Path, "my/path"); + UNIT_ASSERT_VALUES_EQUAL(paths[0].Size, 100500); + UNIT_ASSERT_VALUES_EQUAL(paths[0].IsDirectory, false); - UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[1]), "other/path"); - UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[1]), 1); + UNIT_ASSERT_VALUES_EQUAL(paths[1].Path, "other/path"); + UNIT_ASSERT_VALUES_EQUAL(paths[1].Size, 1); + UNIT_ASSERT_VALUES_EQUAL(paths[1].IsDirectory, false); } Y_UNIT_TEST(ReadsFilesListFromParamsAndSourceSettings) { @@ -69,8 +71,9 @@ Y_UNIT_TEST_SUITE(PathListReaderTest) { UNIT_ASSERT_VALUES_EQUAL(startPathIndex, 42); UNIT_ASSERT_VALUES_EQUAL(paths.size(), 1); - UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[0]), "my/path"); - UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[0]), 100500); + UNIT_ASSERT_VALUES_EQUAL(paths[0].Path, "my/path"); + UNIT_ASSERT_VALUES_EQUAL(paths[0].Size, 100500); + UNIT_ASSERT_VALUES_EQUAL(paths[0].IsDirectory, false); } NYql::NS3::TRange::TPath* SetPath(NYql::NS3::TRange::TPath* path, const TString& name = {}, ui64 size = 0, bool read = false) { @@ -114,20 +117,25 @@ Y_UNIT_TEST_SUITE(PathListReaderTest) { UNIT_ASSERT_VALUES_EQUAL(startPathIndex, 42); UNIT_ASSERT_VALUES_EQUAL(paths.size(), 5); - UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[0]), "root"); - UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[0]), 1); + UNIT_ASSERT_VALUES_EQUAL(paths[0].Path, "root"); + UNIT_ASSERT_VALUES_EQUAL(paths[0].Size, 1); + UNIT_ASSERT_VALUES_EQUAL(paths[0].IsDirectory, false); - UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[1]), "root/folder/f1"); - UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[1]), 42); + UNIT_ASSERT_VALUES_EQUAL(paths[1].Path, "root/folder/f1"); + UNIT_ASSERT_VALUES_EQUAL(paths[1].Size, 42); + UNIT_ASSERT_VALUES_EQUAL(paths[1].IsDirectory, false); - UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[2]), "root/folder/f2"); - UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[2]), 100500); + UNIT_ASSERT_VALUES_EQUAL(paths[2].Path, "root/folder/f2"); + UNIT_ASSERT_VALUES_EQUAL(paths[2].Size, 100500); + UNIT_ASSERT_VALUES_EQUAL(paths[2].IsDirectory, false); - UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[3]), "root/f3"); - UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[3]), 0); + UNIT_ASSERT_VALUES_EQUAL(paths[3].Path, "root/f3"); + UNIT_ASSERT_VALUES_EQUAL(paths[3].Size, 0); + UNIT_ASSERT_VALUES_EQUAL(paths[3].IsDirectory, false); - UNIT_ASSERT_VALUES_EQUAL(std::get<TString>(paths[4]), "root2/f4"); - UNIT_ASSERT_VALUES_EQUAL(std::get<ui64>(paths[4]), 42); + UNIT_ASSERT_VALUES_EQUAL(paths[4].Path, "root2/f4"); + UNIT_ASSERT_VALUES_EQUAL(paths[4].Size, 42); + UNIT_ASSERT_VALUES_EQUAL(paths[4].IsDirectory, false); } } |