aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorauzhegov <auzhegov@yandex-team.com>2023-04-20 16:05:35 +0300
committerauzhegov <auzhegov@yandex-team.com>2023-04-20 16:05:35 +0300
commitef238c42b52c0e459aff8a61d851a17df1eebbd3 (patch)
treea7b2a76a9b4c61347beee1995953a576c58946ac
parente9998b4f6415cafdc7daddedd89d1cbe25383163 (diff)
downloadydb-ef238c42b52c0e459aff8a61d851a17df1eebbd3.tar.gz
URL using bad/illegal format or missing URL
Initial version
-rw-r--r--ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp19
-rw-r--r--ydb/library/yql/providers/s3/actors/yql_s3_write_actor.cpp21
-rw-r--r--ydb/library/yql/providers/s3/object_listers/CMakeLists.darwin-x86_64.txt1
-rw-r--r--ydb/library/yql/providers/s3/object_listers/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/library/yql/providers/s3/object_listers/CMakeLists.linux-x86_64.txt1
-rw-r--r--ydb/library/yql/providers/s3/object_listers/CMakeLists.windows-x86_64.txt1
-rw-r--r--ydb/library/yql/providers/s3/object_listers/yql_s3_list.cpp9
7 files changed, 37 insertions, 16 deletions
diff --git a/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp b/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp
index 4e51f549a1e..4bcdcadc749 100644
--- a/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp
+++ b/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp
@@ -78,9 +78,9 @@
#ifdef THROW
#undef THROW
#endif
+#include <library/cpp/string_utils/quote/quote.h>
#include <library/cpp/xml/document/xml-document.h>
-
#define LOG_E(name, stream) \
LOG_ERROR_S(*TlsActivationContext, NKikimrServices::KQP_COMPUTE, name << ": " << this->SelfId() << ", TxId: " << TxId << ". " << stream)
#define LOG_W(name, stream) \
@@ -771,7 +771,8 @@ public:
auto id = index + StartPathIndex;
const TString requestId = CreateGuidAsString();
LOG_D("TS3ReadActor", "Download: " << url << ", ID: " << id << ", request id: [" << requestId << "]");
- Gateway->Download(url,
+ Gateway->Download(
+ UrlEscapeRet(url, true),
IHTTPGateway::MakeYcHeaders(requestId, Token),
0U,
std::min(size, SizeLimit),
@@ -844,7 +845,7 @@ private:
actorSystem->Send(new IEventHandle(selfId, TActorId(), new TEvPrivate::TEvReadResult(std::move(result.Content), requestId, pathInd, path)));
} else {
actorSystem->Send(new IEventHandle(selfId, TActorId(), new TEvPrivate::TEvReadError(std::move(result.Issues), requestId, pathInd, path)));
- }
+ }
}
i64 GetAsyncInputData(TUnboxedValueVector& buffer, TMaybe<TInstant>&, bool& finished, i64 freeSpace) final {
@@ -1031,7 +1032,7 @@ struct TRetryStuff {
const TString& requestId,
const IRetryPolicy<long>::TPtr& retryPolicy
) : Gateway(std::move(gateway))
- , Url(std::move(url))
+ , Url(UrlEscapeRet(url, true))
, Headers(headers)
, Offset(0U)
, SizeLimit(sizeLimit)
@@ -1086,11 +1087,15 @@ void OnDownloadFinished(TActorSystem* actorSystem, const TActorId& self, const T
}
void DownloadStart(const TRetryStuff::TPtr& retryStuff, TActorSystem* actorSystem, const TActorId& self, const TActorId& parent, size_t pathIndex, const ::NMonitoring::TDynamicCounters::TCounterPtr& inflightCounter) {
- retryStuff->CancelHook = retryStuff->Gateway->Download(retryStuff->Url,
- retryStuff->Headers, retryStuff->Offset, retryStuff->SizeLimit,
+ retryStuff->CancelHook = retryStuff->Gateway->Download(
+ retryStuff->Url,
+ retryStuff->Headers,
+ retryStuff->Offset,
+ retryStuff->SizeLimit,
std::bind(&OnDownloadStart, actorSystem, self, parent, std::placeholders::_1),
std::bind(&OnNewData, actorSystem, self, parent, std::placeholders::_1),
- std::bind(&OnDownloadFinished, actorSystem, self, parent, pathIndex, std::placeholders::_1),
+ std::bind(
+ &OnDownloadFinished, actorSystem, self, parent, pathIndex, std::placeholders::_1),
inflightCounter);
}
diff --git a/ydb/library/yql/providers/s3/actors/yql_s3_write_actor.cpp b/ydb/library/yql/providers/s3/actors/yql_s3_write_actor.cpp
index ae2e143327a..fd627a702c7 100644
--- a/ydb/library/yql/providers/s3/actors/yql_s3_write_actor.cpp
+++ b/ydb/library/yql/providers/s3/actors/yql_s3_write_actor.cpp
@@ -25,6 +25,7 @@
#ifdef THROW
#undef THROW
#endif
+#include <library/cpp/string_utils/quote/quote.h>
#include <library/cpp/xml/document/xml-document.h>
@@ -62,10 +63,10 @@ struct TEvPrivate {
// Events
struct TEvUploadFinished : public TEventLocal<TEvUploadFinished, EvUploadFinished> {
- TEvUploadFinished(const TString& key, const TString& url, ui64 uploadSize)
+ TEvUploadFinished(const TString& key, const TString& url, ui64 uploadSize)
: Key(key), Url(url), UploadSize(uploadSize) {
}
- const TString Key, Url;
+ const TString Key, Url;
const ui64 UploadSize;
};
@@ -135,7 +136,7 @@ public:
const IRetryPolicy<long>::TPtr& retryPolicy,
bool dirtyWrite,
const TString& token)
- : TxId(txId)
+ : TxId(txId)
, Gateway(std::move(gateway))
, CredProvider(std::move(credProvider))
, RetryPolicy(retryPolicy)
@@ -364,7 +365,7 @@ private:
}
void FinalizeMultipartCommit() {
- Become(nullptr);
+ Become(nullptr);
if (DirtyWrite) {
CommitUploadedParts();
} else {
@@ -502,9 +503,15 @@ private:
const auto& key = MakePartitionKey(v);
const auto [keyIt, insertedNew] = FileWriteActors.emplace(key, std::vector<TS3FileWriteActor*>());
if (insertedNew || keyIt->second.empty() || keyIt->second.back()->IsFinishing()) {
- auto fileWrite = std::make_unique<TS3FileWriteActor>(TxId, Gateway, CredProvider, key, Url + Path + key + MakeOutputName() + Extension,
- Compression, RetryPolicy, DirtyWrite, Token);
- keyIt->second.emplace_back(fileWrite.get());
+ auto fileWrite = std::make_unique<TS3FileWriteActor>(
+ TxId,
+ Gateway,
+ CredProvider,
+ key,
+ UrlEscapeRet(Url + Path + key + MakeOutputName() + Extension, true),
+ Compression,
+ RetryPolicy, DirtyWrite, Token);
+ keyIt->second.emplace_back(fileWrite.get());
RegisterWithSameMailbox(fileWrite.release());
}
diff --git a/ydb/library/yql/providers/s3/object_listers/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/providers/s3/object_listers/CMakeLists.darwin-x86_64.txt
index 1288c09a65a..5b00b830ac8 100644
--- a/ydb/library/yql/providers/s3/object_listers/CMakeLists.darwin-x86_64.txt
+++ b/ydb/library/yql/providers/s3/object_listers/CMakeLists.darwin-x86_64.txt
@@ -19,6 +19,7 @@ target_link_libraries(providers-s3-object_listers PUBLIC
yutil
tools-enum_parser-enum_serialization_runtime
contrib-libs-re2
+ cpp-string_utils-quote
cpp-xml-document
providers-common-http_gateway
library-yql-utils
diff --git a/ydb/library/yql/providers/s3/object_listers/CMakeLists.linux-aarch64.txt b/ydb/library/yql/providers/s3/object_listers/CMakeLists.linux-aarch64.txt
index 6aa84460917..0155191e4bf 100644
--- a/ydb/library/yql/providers/s3/object_listers/CMakeLists.linux-aarch64.txt
+++ b/ydb/library/yql/providers/s3/object_listers/CMakeLists.linux-aarch64.txt
@@ -20,6 +20,7 @@ target_link_libraries(providers-s3-object_listers PUBLIC
yutil
tools-enum_parser-enum_serialization_runtime
contrib-libs-re2
+ cpp-string_utils-quote
cpp-xml-document
providers-common-http_gateway
library-yql-utils
diff --git a/ydb/library/yql/providers/s3/object_listers/CMakeLists.linux-x86_64.txt b/ydb/library/yql/providers/s3/object_listers/CMakeLists.linux-x86_64.txt
index 6aa84460917..0155191e4bf 100644
--- a/ydb/library/yql/providers/s3/object_listers/CMakeLists.linux-x86_64.txt
+++ b/ydb/library/yql/providers/s3/object_listers/CMakeLists.linux-x86_64.txt
@@ -20,6 +20,7 @@ target_link_libraries(providers-s3-object_listers PUBLIC
yutil
tools-enum_parser-enum_serialization_runtime
contrib-libs-re2
+ cpp-string_utils-quote
cpp-xml-document
providers-common-http_gateway
library-yql-utils
diff --git a/ydb/library/yql/providers/s3/object_listers/CMakeLists.windows-x86_64.txt b/ydb/library/yql/providers/s3/object_listers/CMakeLists.windows-x86_64.txt
index 1288c09a65a..5b00b830ac8 100644
--- a/ydb/library/yql/providers/s3/object_listers/CMakeLists.windows-x86_64.txt
+++ b/ydb/library/yql/providers/s3/object_listers/CMakeLists.windows-x86_64.txt
@@ -19,6 +19,7 @@ target_link_libraries(providers-s3-object_listers PUBLIC
yutil
tools-enum_parser-enum_serialization_runtime
contrib-libs-re2
+ cpp-string_utils-quote
cpp-xml-document
providers-common-http_gateway
library-yql-utils
diff --git a/ydb/library/yql/providers/s3/object_listers/yql_s3_list.cpp b/ydb/library/yql/providers/s3/object_listers/yql_s3_list.cpp
index 81e1f95bf23..986a0b2a64b 100644
--- a/ydb/library/yql/providers/s3/object_listers/yql_s3_list.cpp
+++ b/ydb/library/yql/providers/s3/object_listers/yql_s3_list.cpp
@@ -7,9 +7,12 @@
#include <ydb/library/yql/utils/url_builder.h>
#include <ydb/library/yql/utils/yql_panic.h>
+#include <contrib/libs/re2/re2/re2.h>
+
#ifdef THROW
#undef THROW
#endif
+#include <library/cpp/string_utils/quote/quote.h>
#include <library/cpp/threading/future/async_semaphore.h>
#include <library/cpp/xml/document/xml-document.h>
#include <util/folder/iterator.h>
@@ -250,6 +253,8 @@ public:
auto [filter, checker] =
MakeFilter(listingRequest.Pattern, listingRequest.PatternType, sharedCtx);
+ auto request = listingRequest;
+ request.Url = UrlEscapeRet(request.Url, true);
auto ctx = TListingContext{
std::move(sharedCtx),
std::move(filter),
@@ -260,10 +265,11 @@ public:
IHTTPGateway::TWeakPtr(httpGateway),
GetHTTPDefaultRetryPolicy(),
CreateGuidAsString(),
- listingRequest,
+ std::move(request),
delimiter,
Nothing(),
MaxFilesPerQuery};
+
YQL_CLOG(TRACE, ProviderS3)
<< "[TS3Lister] Got URL: '" << ctx.ListingRequest.Url
<< "' with path prefix '" << ctx.ListingRequest.Prefix
@@ -276,7 +282,6 @@ public:
}
~TS3Lister() override = default;
-
private:
static void SubmitRequestIntoGateway(TListingContext& ctx) {
IHTTPGateway::THeaders headers;