diff options
author | auzhegov <auzhegov@yandex-team.com> | 2023-04-20 16:05:35 +0300 |
---|---|---|
committer | auzhegov <auzhegov@yandex-team.com> | 2023-04-20 16:05:35 +0300 |
commit | ef238c42b52c0e459aff8a61d851a17df1eebbd3 (patch) | |
tree | a7b2a76a9b4c61347beee1995953a576c58946ac | |
parent | e9998b4f6415cafdc7daddedd89d1cbe25383163 (diff) | |
download | ydb-ef238c42b52c0e459aff8a61d851a17df1eebbd3.tar.gz |
URL using bad/illegal format or missing URL
Initial version
7 files changed, 37 insertions, 16 deletions
diff --git a/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp b/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp index 4e51f549a1e..4bcdcadc749 100644 --- a/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp +++ b/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp @@ -78,9 +78,9 @@ #ifdef THROW #undef THROW #endif +#include <library/cpp/string_utils/quote/quote.h> #include <library/cpp/xml/document/xml-document.h> - #define LOG_E(name, stream) \ LOG_ERROR_S(*TlsActivationContext, NKikimrServices::KQP_COMPUTE, name << ": " << this->SelfId() << ", TxId: " << TxId << ". " << stream) #define LOG_W(name, stream) \ @@ -771,7 +771,8 @@ public: auto id = index + StartPathIndex; const TString requestId = CreateGuidAsString(); LOG_D("TS3ReadActor", "Download: " << url << ", ID: " << id << ", request id: [" << requestId << "]"); - Gateway->Download(url, + Gateway->Download( + UrlEscapeRet(url, true), IHTTPGateway::MakeYcHeaders(requestId, Token), 0U, std::min(size, SizeLimit), @@ -844,7 +845,7 @@ private: actorSystem->Send(new IEventHandle(selfId, TActorId(), new TEvPrivate::TEvReadResult(std::move(result.Content), requestId, pathInd, path))); } else { actorSystem->Send(new IEventHandle(selfId, TActorId(), new TEvPrivate::TEvReadError(std::move(result.Issues), requestId, pathInd, path))); - } + } } i64 GetAsyncInputData(TUnboxedValueVector& buffer, TMaybe<TInstant>&, bool& finished, i64 freeSpace) final { @@ -1031,7 +1032,7 @@ struct TRetryStuff { const TString& requestId, const IRetryPolicy<long>::TPtr& retryPolicy ) : Gateway(std::move(gateway)) - , Url(std::move(url)) + , Url(UrlEscapeRet(url, true)) , Headers(headers) , Offset(0U) , SizeLimit(sizeLimit) @@ -1086,11 +1087,15 @@ void OnDownloadFinished(TActorSystem* actorSystem, const TActorId& self, const T } void DownloadStart(const TRetryStuff::TPtr& retryStuff, TActorSystem* actorSystem, const TActorId& self, const TActorId& parent, size_t pathIndex, const ::NMonitoring::TDynamicCounters::TCounterPtr& inflightCounter) { - retryStuff->CancelHook = retryStuff->Gateway->Download(retryStuff->Url, - retryStuff->Headers, retryStuff->Offset, retryStuff->SizeLimit, + retryStuff->CancelHook = retryStuff->Gateway->Download( + retryStuff->Url, + retryStuff->Headers, + retryStuff->Offset, + retryStuff->SizeLimit, std::bind(&OnDownloadStart, actorSystem, self, parent, std::placeholders::_1), std::bind(&OnNewData, actorSystem, self, parent, std::placeholders::_1), - std::bind(&OnDownloadFinished, actorSystem, self, parent, pathIndex, std::placeholders::_1), + std::bind( + &OnDownloadFinished, actorSystem, self, parent, pathIndex, std::placeholders::_1), inflightCounter); } diff --git a/ydb/library/yql/providers/s3/actors/yql_s3_write_actor.cpp b/ydb/library/yql/providers/s3/actors/yql_s3_write_actor.cpp index ae2e143327a..fd627a702c7 100644 --- a/ydb/library/yql/providers/s3/actors/yql_s3_write_actor.cpp +++ b/ydb/library/yql/providers/s3/actors/yql_s3_write_actor.cpp @@ -25,6 +25,7 @@ #ifdef THROW #undef THROW #endif +#include <library/cpp/string_utils/quote/quote.h> #include <library/cpp/xml/document/xml-document.h> @@ -62,10 +63,10 @@ struct TEvPrivate { // Events struct TEvUploadFinished : public TEventLocal<TEvUploadFinished, EvUploadFinished> { - TEvUploadFinished(const TString& key, const TString& url, ui64 uploadSize) + TEvUploadFinished(const TString& key, const TString& url, ui64 uploadSize) : Key(key), Url(url), UploadSize(uploadSize) { } - const TString Key, Url; + const TString Key, Url; const ui64 UploadSize; }; @@ -135,7 +136,7 @@ public: const IRetryPolicy<long>::TPtr& retryPolicy, bool dirtyWrite, const TString& token) - : TxId(txId) + : TxId(txId) , Gateway(std::move(gateway)) , CredProvider(std::move(credProvider)) , RetryPolicy(retryPolicy) @@ -364,7 +365,7 @@ private: } void FinalizeMultipartCommit() { - Become(nullptr); + Become(nullptr); if (DirtyWrite) { CommitUploadedParts(); } else { @@ -502,9 +503,15 @@ private: const auto& key = MakePartitionKey(v); const auto [keyIt, insertedNew] = FileWriteActors.emplace(key, std::vector<TS3FileWriteActor*>()); if (insertedNew || keyIt->second.empty() || keyIt->second.back()->IsFinishing()) { - auto fileWrite = std::make_unique<TS3FileWriteActor>(TxId, Gateway, CredProvider, key, Url + Path + key + MakeOutputName() + Extension, - Compression, RetryPolicy, DirtyWrite, Token); - keyIt->second.emplace_back(fileWrite.get()); + auto fileWrite = std::make_unique<TS3FileWriteActor>( + TxId, + Gateway, + CredProvider, + key, + UrlEscapeRet(Url + Path + key + MakeOutputName() + Extension, true), + Compression, + RetryPolicy, DirtyWrite, Token); + keyIt->second.emplace_back(fileWrite.get()); RegisterWithSameMailbox(fileWrite.release()); } diff --git a/ydb/library/yql/providers/s3/object_listers/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/providers/s3/object_listers/CMakeLists.darwin-x86_64.txt index 1288c09a65a..5b00b830ac8 100644 --- a/ydb/library/yql/providers/s3/object_listers/CMakeLists.darwin-x86_64.txt +++ b/ydb/library/yql/providers/s3/object_listers/CMakeLists.darwin-x86_64.txt @@ -19,6 +19,7 @@ target_link_libraries(providers-s3-object_listers PUBLIC yutil tools-enum_parser-enum_serialization_runtime contrib-libs-re2 + cpp-string_utils-quote cpp-xml-document providers-common-http_gateway library-yql-utils diff --git a/ydb/library/yql/providers/s3/object_listers/CMakeLists.linux-aarch64.txt b/ydb/library/yql/providers/s3/object_listers/CMakeLists.linux-aarch64.txt index 6aa84460917..0155191e4bf 100644 --- a/ydb/library/yql/providers/s3/object_listers/CMakeLists.linux-aarch64.txt +++ b/ydb/library/yql/providers/s3/object_listers/CMakeLists.linux-aarch64.txt @@ -20,6 +20,7 @@ target_link_libraries(providers-s3-object_listers PUBLIC yutil tools-enum_parser-enum_serialization_runtime contrib-libs-re2 + cpp-string_utils-quote cpp-xml-document providers-common-http_gateway library-yql-utils diff --git a/ydb/library/yql/providers/s3/object_listers/CMakeLists.linux-x86_64.txt b/ydb/library/yql/providers/s3/object_listers/CMakeLists.linux-x86_64.txt index 6aa84460917..0155191e4bf 100644 --- a/ydb/library/yql/providers/s3/object_listers/CMakeLists.linux-x86_64.txt +++ b/ydb/library/yql/providers/s3/object_listers/CMakeLists.linux-x86_64.txt @@ -20,6 +20,7 @@ target_link_libraries(providers-s3-object_listers PUBLIC yutil tools-enum_parser-enum_serialization_runtime contrib-libs-re2 + cpp-string_utils-quote cpp-xml-document providers-common-http_gateway library-yql-utils diff --git a/ydb/library/yql/providers/s3/object_listers/CMakeLists.windows-x86_64.txt b/ydb/library/yql/providers/s3/object_listers/CMakeLists.windows-x86_64.txt index 1288c09a65a..5b00b830ac8 100644 --- a/ydb/library/yql/providers/s3/object_listers/CMakeLists.windows-x86_64.txt +++ b/ydb/library/yql/providers/s3/object_listers/CMakeLists.windows-x86_64.txt @@ -19,6 +19,7 @@ target_link_libraries(providers-s3-object_listers PUBLIC yutil tools-enum_parser-enum_serialization_runtime contrib-libs-re2 + cpp-string_utils-quote cpp-xml-document providers-common-http_gateway library-yql-utils diff --git a/ydb/library/yql/providers/s3/object_listers/yql_s3_list.cpp b/ydb/library/yql/providers/s3/object_listers/yql_s3_list.cpp index 81e1f95bf23..986a0b2a64b 100644 --- a/ydb/library/yql/providers/s3/object_listers/yql_s3_list.cpp +++ b/ydb/library/yql/providers/s3/object_listers/yql_s3_list.cpp @@ -7,9 +7,12 @@ #include <ydb/library/yql/utils/url_builder.h> #include <ydb/library/yql/utils/yql_panic.h> +#include <contrib/libs/re2/re2/re2.h> + #ifdef THROW #undef THROW #endif +#include <library/cpp/string_utils/quote/quote.h> #include <library/cpp/threading/future/async_semaphore.h> #include <library/cpp/xml/document/xml-document.h> #include <util/folder/iterator.h> @@ -250,6 +253,8 @@ public: auto [filter, checker] = MakeFilter(listingRequest.Pattern, listingRequest.PatternType, sharedCtx); + auto request = listingRequest; + request.Url = UrlEscapeRet(request.Url, true); auto ctx = TListingContext{ std::move(sharedCtx), std::move(filter), @@ -260,10 +265,11 @@ public: IHTTPGateway::TWeakPtr(httpGateway), GetHTTPDefaultRetryPolicy(), CreateGuidAsString(), - listingRequest, + std::move(request), delimiter, Nothing(), MaxFilesPerQuery}; + YQL_CLOG(TRACE, ProviderS3) << "[TS3Lister] Got URL: '" << ctx.ListingRequest.Url << "' with path prefix '" << ctx.ListingRequest.Prefix @@ -276,7 +282,6 @@ public: } ~TS3Lister() override = default; - private: static void SubmitRequestIntoGateway(TListingContext& ctx) { IHTTPGateway::THeaders headers; |