aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorudovichenko-r <rvu@ydb.tech>2022-10-17 10:18:12 +0300
committerudovichenko-r <rvu@ydb.tech>2022-10-17 10:18:12 +0300
commit99884e915f4d3edcb1588b61fa689659e1a8e1d0 (patch)
tree9994ea8a54d1b16ab4484ac77b80729754db53f6
parentbbe39d44d0036e8b67f8204f00658cf38d8bda65 (diff)
downloadydb-99884e915f4d3edcb1588b61fa689659e1a8e1d0.tar.gz
[yql] Refactor file storage
-rw-r--r--ydb/library/yql/core/file_storage/CMakeLists.txt4
-rw-r--r--ydb/library/yql/core/file_storage/defs/downloader.h2
-rw-r--r--ydb/library/yql/core/file_storage/file_storage.cpp166
-rw-r--r--ydb/library/yql/core/file_storage/file_storage.h15
-rw-r--r--ydb/library/yql/core/file_storage/file_storage_decorator.cpp42
-rw-r--r--ydb/library/yql/core/file_storage/file_storage_decorator.h26
-rw-r--r--ydb/library/yql/core/file_storage/file_storage_ut.cpp53
-rw-r--r--ydb/library/yql/core/file_storage/http_download/CMakeLists.txt2
-rw-r--r--ydb/library/yql/core/file_storage/http_download/http_download.cpp36
-rw-r--r--ydb/library/yql/core/file_storage/http_download/http_download.h2
-rw-r--r--ydb/library/yql/core/file_storage/http_download/pattern_group.cpp34
-rw-r--r--ydb/library/yql/core/file_storage/http_download/pattern_group.h20
-rw-r--r--ydb/library/yql/core/file_storage/http_download/proto/http_download.proto2
-rw-r--r--ydb/library/yql/core/file_storage/proto/file_storage.proto4
-rw-r--r--ydb/library/yql/core/file_storage/url_mapper.cpp19
-rw-r--r--ydb/library/yql/core/file_storage/url_mapper.h32
-rw-r--r--ydb/library/yql/core/file_storage/ut/CMakeLists.darwin.txt3
-rw-r--r--ydb/library/yql/core/file_storage/ut/CMakeLists.linux.txt3
-rw-r--r--ydb/library/yql/providers/common/proto/gateways_config.proto16
-rw-r--r--ydb/library/yql/utils/CMakeLists.txt1
-rw-r--r--ydb/library/yql/utils/test_http_server/CMakeLists.txt19
-rw-r--r--ydb/library/yql/utils/test_http_server/test_http_server.cpp (renamed from ydb/library/yql/core/file_storage/ut/test_http_server.cpp)5
-rw-r--r--ydb/library/yql/utils/test_http_server/test_http_server.h (renamed from ydb/library/yql/core/file_storage/ut/test_http_server.h)4
23 files changed, 226 insertions, 284 deletions
diff --git a/ydb/library/yql/core/file_storage/CMakeLists.txt b/ydb/library/yql/core/file_storage/CMakeLists.txt
index b9a2aff3ab3..48dc4929f70 100644
--- a/ydb/library/yql/core/file_storage/CMakeLists.txt
+++ b/ydb/library/yql/core/file_storage/CMakeLists.txt
@@ -20,20 +20,20 @@ target_link_libraries(yql-core-file_storage PUBLIC
cpp-digest-md5
cpp-logger-global
cpp-threading-future
- cpp-regex-pcre
cpp-protobuf-util
library-cpp-uri
core-file_storage-proto
core-file_storage-defs
core-file_storage-download
+ core-file_storage-http_download
library-yql-utils
yql-utils-log
yql-utils-fetch
)
target_sources(yql-core-file_storage PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/file_storage_decorator.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/file_storage.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/sized_cache.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/storage.cpp
- ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/url_mapper.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/url_meta.cpp
)
diff --git a/ydb/library/yql/core/file_storage/defs/downloader.h b/ydb/library/yql/core/file_storage/defs/downloader.h
index fe74a1c9708..c7227f17ba0 100644
--- a/ydb/library/yql/core/file_storage/defs/downloader.h
+++ b/ydb/library/yql/core/file_storage/defs/downloader.h
@@ -13,7 +13,7 @@ namespace NYql::NFS {
struct IDownloader : public TThrRefBase {
virtual bool Accept(const THttpURL& url) = 0;
- virtual std::tuple<TDataProvider, TString, TString> Download(const THttpURL& url, const TString& oauthToken, const TString& etag, const TString& lastModified) = 0;
+ virtual std::tuple<TDataProvider, TString, TString> Download(const THttpURL& url, const TString& token, const TString& etag, const TString& lastModified) = 0;
};
using IDownloaderPtr = TIntrusivePtr<IDownloader>;
diff --git a/ydb/library/yql/core/file_storage/file_storage.cpp b/ydb/library/yql/core/file_storage/file_storage.cpp
index a507ce9f6e8..29518b39da6 100644
--- a/ydb/library/yql/core/file_storage/file_storage.cpp
+++ b/ydb/library/yql/core/file_storage/file_storage.cpp
@@ -1,10 +1,11 @@
#include "file_storage.h"
+#include "file_storage_decorator.h"
#include "storage.h"
-#include "url_mapper.h"
#include "url_meta.h"
#include <ydb/library/yql/core/file_storage/proto/file_storage.pb.h>
#include <ydb/library/yql/core/file_storage/download/download_stream.h>
+#include <ydb/library/yql/core/file_storage/http_download/http_download.h>
#include <ydb/library/yql/core/file_storage/defs/provider.h>
#include <ydb/library/yql/utils/fetch/fetch.h>
@@ -40,38 +41,18 @@ namespace NYql {
class TFileStorageImpl: public IFileStorage {
public:
- explicit TFileStorageImpl(const TFileStorageConfig& params)
+ explicit TFileStorageImpl(const TFileStorageConfig& params, const std::vector<NFS::IDownloaderPtr>& downloaders)
: Storage(params.GetMaxFiles(), ui64(params.GetMaxSizeMb()) << 20ull, params.GetPath())
, Config(params)
- , QueueStarted(0)
{
- try {
- for (const auto& sc : params.GetCustomSchemes()) {
- Mapper.AddMapping(sc.GetPattern(), sc.GetTargetUrl());
- }
- } catch (const yexception& e) {
- ythrow yexception() << "FileStorage: " << e.what();
- }
-
- auto numThreads = params.GetThreads();
- if (1 == numThreads) {
- MtpQueue.Reset(new TFakeThreadPool());
- } else {
- MtpQueue.Reset(new TSimpleThreadPool(TThreadPoolParams{"FileStorage"}));
- }
-
- // do not call MtpQueue->Start here as we have to do it _after_ fork
+ Downloaders.push_back(MakeHttpDownloader(params));
+ Downloaders.insert(Downloaders.begin(), downloaders.begin(), downloaders.end());
}
~TFileStorageImpl() {
- MtpQueue->Stop();
}
- void AddDownloader(NFS::IDownloaderPtr downloader) override {
- Downloaders.push_back(std::move(downloader));
- }
-
- TFileLinkPtr PutFile(const TString& file, const TString& outFileName = {}) override {
+ TFileLinkPtr PutFile(const TString& file, const TString& outFileName = {}) final {
YQL_LOG(INFO) << "PutFile to cache: " << file;
const auto md5 = MD5::File(file);
const TString storageFileName = md5 + ".file";
@@ -92,7 +73,7 @@ public:
});
}
- TFileLinkPtr PutFileStripped(const TString& file, const TString& originalMd5 = {}) override {
+ TFileLinkPtr PutFileStripped(const TString& file, const TString& originalMd5 = {}) final {
YQL_LOG(INFO) << "PutFileStripped to cache: " << file;
if (originalMd5.empty()) {
YQL_LOG(WARN) << "Empty md5 for: " << file;
@@ -133,7 +114,7 @@ public:
return result;
}
- TFileLinkPtr PutInline(const TString& data) override {
+ TFileLinkPtr PutInline(const TString& data) final {
const auto md5 = MD5::Calc(data);
const TString storageFileName = md5 + ".file";
YQL_LOG(INFO) << "PutInline to cache. md5=" << md5;
@@ -155,85 +136,59 @@ public:
});
}
- TFileLinkPtr PutUrl(const TString& urlStr, const TString& oauthToken) override {
+ TFileLinkPtr PutUrl(const TString& urlStr, const TString& token) final {
try {
- TString convertedUrl;
- if (!Mapper.MapUrl(urlStr, convertedUrl)) {
- convertedUrl = urlStr;
- }
-
- YQL_LOG(INFO) << "PutUrl to cache: " << convertedUrl;
- THttpURL url = ParseURL(convertedUrl);
+ YQL_LOG(INFO) << "PutUrl to cache: " << urlStr;
+ THttpURL url = ParseURL(urlStr);
for (const auto& d: Downloaders) {
if (d->Accept(url)) {
- return PutUrl(url, oauthToken, d);
+ return PutUrl(url, token, d);
}
}
- ythrow yexception() << "Unsupported url: " << convertedUrl;
+ ythrow yexception() << "Unsupported url: " << urlStr;
} catch (const std::exception& e) {
- YQL_LOG(ERROR) << "Failed to download file by URL \"" << urlStr << "\", details: " << e.what();
- YQL_LOG_CTX_THROW yexception() << "FileStorage: Failed to download file by URL \"" << urlStr << "\", details: " << e.what();
+ const TString msg = TStringBuilder() << "FileStorage: Failed to download file by URL \"" << urlStr << "\", details: " << e.what();
+ YQL_LOG(ERROR) << msg;
+ YQL_LOG_CTX_THROW yexception() << msg;
}
}
- NThreading::TFuture<TFileLinkPtr> PutFileAsync(const TString& file, const TString& outFileName = {}) override {
- StartQueueOnce();
- auto logCtx = NLog::CurrentLogContextPath();
- return NThreading::Async([=]() {
- YQL_LOG_CTX_ROOT_SCOPE(logCtx);
- return this->PutFile(file, outFileName);
- }, *MtpQueue);
+ NThreading::TFuture<TFileLinkPtr> PutFileAsync(const TString& /*file*/, const TString& /*outFileName*/) final {
+ ythrow yexception() << "Async method is not implemeted";
}
- NThreading::TFuture<TFileLinkPtr> PutInlineAsync(const TString& data) override {
- StartQueueOnce();
- auto logCtx = NLog::CurrentLogContextPath();
- return NThreading::Async([=]() {
- YQL_LOG_CTX_ROOT_SCOPE(logCtx);
- return this->PutInline(data);
- }, *MtpQueue);
+ NThreading::TFuture<TFileLinkPtr> PutInlineAsync(const TString& /*data*/) final {
+ ythrow yexception() << "Async method is not implemeted";
}
- NThreading::TFuture<TFileLinkPtr> PutUrlAsync(const TString& url, const TString& oauthToken) override {
- StartQueueOnce();
- auto logCtx = NLog::CurrentLogContextPath();
- return NThreading::Async([=]() {
- YQL_LOG_CTX_ROOT_SCOPE(logCtx);
- return this->PutUrl(url, oauthToken);
- }, *MtpQueue);
+ NThreading::TFuture<TFileLinkPtr> PutUrlAsync(const TString& /*url*/, const TString& /*token*/) final {
+ ythrow yexception() << "Async method is not implemeted";
}
- TFsPath GetRoot() const override {
+ TFsPath GetRoot() const final {
return Storage.GetRoot();
}
- TFsPath GetTemp() const override {
+ TFsPath GetTemp() const final {
return Storage.GetTemp();
}
- const TFileStorageConfig& GetConfig() const override {
+ const TFileStorageConfig& GetConfig() const final {
return Config;
}
private:
- void StartQueueOnce() {
- // we shall call Start only once
- if (AtomicTryLock(&QueueStarted)) {
- MtpQueue->Start(Config.GetThreads());
- }
- }
-
- TFileLinkPtr PutUrl(const THttpURL& url, const TString& oauthToken, const NFS::IDownloaderPtr& downloader) {
+ TFileLinkPtr PutUrl(const THttpURL& url, const TString& token, const NFS::IDownloaderPtr& downloader) {
return WithRetry<TDownloadError>(Config.GetRetryCount(), [&, this]() {
- return this->DoPutUrl(url, oauthToken, downloader);
+ return this->DoPutUrl(url, token, downloader);
}, [&](const auto& e, int attempt, int attemptCount) {
YQL_LOG(WARN) << "Error while downloading url " << url.PrintS() << ", attempt " << attempt << "/" << attemptCount << ", details: " << e.what();
Sleep(TDuration::MilliSeconds(Config.GetRetryDelayMs()));
});
}
- TFileLinkPtr DoPutUrl(const THttpURL& url, const TString& oauthToken, const NFS::IDownloaderPtr& downloader) {
+ TFileLinkPtr DoPutUrl(const THttpURL& url, const TString& token, const NFS::IDownloaderPtr& downloader) {
const auto urlMetaFile = BuildUrlMetaFileName(url);
auto lock = MultiResourceLock.Acquire(urlMetaFile); // let's use meta file as lock name
@@ -256,7 +211,7 @@ private:
NFS::TDataProvider puller;
TString etag;
TString lastModified;
- std::tie(puller, etag, lastModified) = downloader->Download(url, oauthToken, urlMeta.ETag, urlMeta.LastModified);
+ std::tie(puller, etag, lastModified) = downloader->Download(url, token, urlMeta.ETag, urlMeta.LastModified);
if (!puller) {
Y_ENSURE(oldContentLink); // should not fire
return oldContentLink;
@@ -305,16 +260,71 @@ private:
TStorage Storage;
const TFileStorageConfig Config;
std::vector<NFS::IDownloaderPtr> Downloaders;
- TUrlMapper Mapper;
+ TMultiResourceLock MultiResourceLock;
+};
+
+class TFileStorageWithAsync: public TFileStorageDecorator {
+public:
+ TFileStorageWithAsync(TFileStoragePtr fs)
+ : TFileStorageDecorator(std::move(fs))
+ , QueueStarted(0)
+ {
+ auto numThreads = Inner_->GetConfig().GetThreads();
+ if (1 == numThreads) {
+ MtpQueue.Reset(new TFakeThreadPool());
+ } else {
+ MtpQueue.Reset(new TSimpleThreadPool(TThreadPoolParams{"FileStorage"}));
+ }
+
+ // do not call MtpQueue->Start here as we have to do it _after_ fork
+ }
+
+ ~TFileStorageWithAsync() {
+ MtpQueue->Stop();
+ }
+
+ NThreading::TFuture<TFileLinkPtr> PutFileAsync(const TString& file, const TString& outFileName) final {
+ return DoAsync([file, outFileName](const TFileStoragePtr& fs) {
+ return fs->PutFile(file, outFileName);
+ });
+ }
+ NThreading::TFuture<TFileLinkPtr> PutInlineAsync(const TString& data) final {
+ return DoAsync([data](const TFileStoragePtr& fs) {
+ return fs->PutInline(data);
+ });
+ }
+ NThreading::TFuture<TFileLinkPtr> PutUrlAsync(const TString& url, const TString& token) final {
+ return DoAsync([url, token](const TFileStoragePtr& fs) {
+ return fs->PutUrl(url, token);
+ });
+ }
+
+private:
+ NThreading::TFuture<TFileLinkPtr> DoAsync(std::function<TFileLinkPtr(const TFileStoragePtr&)> action) {
+ if (AtomicTryLock(&QueueStarted)) {
+ MtpQueue->Start(Inner_->GetConfig().GetThreads());
+ }
+ auto logCtx = NLog::CurrentLogContextPath();
+ return NThreading::Async([logCtx, fs = Inner_, action]() {
+ YQL_LOG_CTX_ROOT_SCOPE(logCtx);
+ return action(fs);
+ }, *MtpQueue);
+ }
+
+private:
TAtomic QueueStarted;
THolder<IThreadPool> MtpQueue;
- TMultiResourceLock MultiResourceLock;
};
-TFileStoragePtr CreateFileStorage(const TFileStorageConfig& params) {
+
+TFileStoragePtr CreateFileStorage(const TFileStorageConfig& params, const std::vector<NFS::IDownloaderPtr>& downloaders) {
Y_ENSURE(0 != params.GetMaxFiles(), "FileStorage: MaxFiles must be greater than 0");
Y_ENSURE(0 != params.GetMaxSizeMb(), "FileStorage: MaxSizeMb must be greater than 0");
- return new TFileStorageImpl(params);
+ return new TFileStorageImpl(params, downloaders);
+}
+
+TFileStoragePtr WithAsync(TFileStoragePtr fs) {
+ return new TFileStorageWithAsync(std::move(fs));
}
void LoadFsConfigFromFile(TStringBuf path, TFileStorageConfig& params) {
diff --git a/ydb/library/yql/core/file_storage/file_storage.h b/ydb/library/yql/core/file_storage/file_storage.h
index ab5fd5c03e8..b95ccabaaff 100644
--- a/ydb/library/yql/core/file_storage/file_storage.h
+++ b/ydb/library/yql/core/file_storage/file_storage.h
@@ -11,7 +11,7 @@
#include <util/generic/ptr.h>
#include <util/generic/string.h>
-#include <tuple>
+#include <vector>
namespace NYql {
@@ -19,15 +19,14 @@ class TFileStorageConfig;
struct IFileStorage: public TThrRefBase {
virtual ~IFileStorage() = default;
- virtual void AddDownloader(NYql::NFS::IDownloaderPtr downloader) = 0;
virtual TFileLinkPtr PutFile(const TString& file, const TString& outFileName = {}) = 0;
virtual TFileLinkPtr PutFileStripped(const TString& file, const TString& originalMd5 = {}) = 0;
virtual TFileLinkPtr PutInline(const TString& data) = 0;
- virtual TFileLinkPtr PutUrl(const TString& url, const TString& oauthToken) = 0;
+ virtual TFileLinkPtr PutUrl(const TString& url, const TString& token) = 0;
// async versions
virtual NThreading::TFuture<TFileLinkPtr> PutFileAsync(const TString& file, const TString& outFileName = {}) = 0;
virtual NThreading::TFuture<TFileLinkPtr> PutInlineAsync(const TString& data) = 0;
- virtual NThreading::TFuture<TFileLinkPtr> PutUrlAsync(const TString& url, const TString& oauthToken) = 0;
+ virtual NThreading::TFuture<TFileLinkPtr> PutUrlAsync(const TString& url, const TString& token) = 0;
virtual TFsPath GetRoot() const = 0;
virtual TFsPath GetTemp() const = 0;
@@ -37,7 +36,13 @@ struct IFileStorage: public TThrRefBase {
using TFileStoragePtr = TIntrusivePtr<IFileStorage>;
// Will use auto-cleaned temporary directory if storagePath is empty
-TFileStoragePtr CreateFileStorage(const TFileStorageConfig& params);
+TFileStoragePtr CreateFileStorage(const TFileStorageConfig& params, const std::vector<NFS::IDownloaderPtr>& downloaders = {});
+
+TFileStoragePtr WithAsync(TFileStoragePtr fs);
+
+inline TFileStoragePtr CreateAsyncFileStorage(const TFileStorageConfig& params, const std::vector<NFS::IDownloaderPtr>& downloaders = {}) {
+ return WithAsync(CreateFileStorage(params, downloaders));
+}
void LoadFsConfigFromFile(TStringBuf path, TFileStorageConfig& params);
void LoadFsConfigFromResource(TStringBuf path, TFileStorageConfig& params);
diff --git a/ydb/library/yql/core/file_storage/file_storage_decorator.cpp b/ydb/library/yql/core/file_storage/file_storage_decorator.cpp
new file mode 100644
index 00000000000..ec9e5bfac88
--- /dev/null
+++ b/ydb/library/yql/core/file_storage/file_storage_decorator.cpp
@@ -0,0 +1,42 @@
+#include "file_storage_decorator.h"
+
+
+namespace NYql {
+
+TFileStorageDecorator::TFileStorageDecorator(TFileStoragePtr fs)
+ : Inner_(std::move(fs))
+{
+}
+
+TFileLinkPtr TFileStorageDecorator::PutFile(const TString& file, const TString& outFileName) {
+ return Inner_->PutFile(file, outFileName);
+}
+TFileLinkPtr TFileStorageDecorator::PutFileStripped(const TString& file, const TString& originalMd5) {
+ return Inner_->PutFileStripped(file, originalMd5);
+}
+TFileLinkPtr TFileStorageDecorator::PutInline(const TString& data) {
+ return Inner_->PutInline(data);
+}
+TFileLinkPtr TFileStorageDecorator::PutUrl(const TString& url, const TString& token) {
+ return Inner_->PutUrl(url, token);
+}
+NThreading::TFuture<TFileLinkPtr> TFileStorageDecorator::PutFileAsync(const TString& file, const TString& outFileName) {
+ return Inner_->PutFileAsync(file, outFileName);
+}
+NThreading::TFuture<TFileLinkPtr> TFileStorageDecorator::PutInlineAsync(const TString& data) {
+ return Inner_->PutInlineAsync(data);
+}
+NThreading::TFuture<TFileLinkPtr> TFileStorageDecorator::PutUrlAsync(const TString& url, const TString& token) {
+ return Inner_->PutUrlAsync(url, token);
+}
+TFsPath TFileStorageDecorator::GetRoot() const {
+ return Inner_->GetRoot();
+}
+TFsPath TFileStorageDecorator::GetTemp() const {
+ return Inner_->GetTemp();
+}
+const TFileStorageConfig& TFileStorageDecorator::GetConfig() const {
+ return Inner_->GetConfig();
+}
+
+} // NYql
diff --git a/ydb/library/yql/core/file_storage/file_storage_decorator.h b/ydb/library/yql/core/file_storage/file_storage_decorator.h
new file mode 100644
index 00000000000..9d6f2b8f3e4
--- /dev/null
+++ b/ydb/library/yql/core/file_storage/file_storage_decorator.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include "file_storage.h"
+
+namespace NYql {
+
+class TFileStorageDecorator: public IFileStorage {
+public:
+ TFileStorageDecorator(TFileStoragePtr fs);
+
+ TFileLinkPtr PutFile(const TString& file, const TString& outFileName) override;
+ TFileLinkPtr PutFileStripped(const TString& file, const TString& originalMd5) override;
+ TFileLinkPtr PutInline(const TString& data) override;
+ TFileLinkPtr PutUrl(const TString& url, const TString& token) override;
+ NThreading::TFuture<TFileLinkPtr> PutFileAsync(const TString& file, const TString& outFileName) override;
+ NThreading::TFuture<TFileLinkPtr> PutInlineAsync(const TString& data) override;
+ NThreading::TFuture<TFileLinkPtr> PutUrlAsync(const TString& url, const TString& token) override;
+ TFsPath GetRoot() const override;
+ TFsPath GetTemp() const override;
+ const TFileStorageConfig& GetConfig() const override;
+
+protected:
+ TFileStoragePtr Inner_;
+};
+
+} // NYql
diff --git a/ydb/library/yql/core/file_storage/file_storage_ut.cpp b/ydb/library/yql/core/file_storage/file_storage_ut.cpp
index 9e012489141..d5a5d7da8c8 100644
--- a/ydb/library/yql/core/file_storage/file_storage_ut.cpp
+++ b/ydb/library/yql/core/file_storage/file_storage_ut.cpp
@@ -1,6 +1,6 @@
#include "file_storage.h"
-#include <ydb/library/yql/core/file_storage/ut/test_http_server.h>
+#include <ydb/library/yql/utils/test_http_server/test_http_server.h>
#include <ydb/library/yql/core/file_storage/proto/file_storage.pb.h>
#include <ydb/library/yql/core/file_storage/http_download/http_download.h>
#include <ydb/library/yql/core/file_storage/http_download/proto/http_download.pb.h>
@@ -25,16 +25,14 @@ Y_UNIT_TEST_SUITE(TFileStorageTests) {
return TIFStream(path).ReadAll();
}
- static TFileStoragePtr CreateTestFS(TFileStorageConfig params = {}, const THttpDownloaderConfig* httpCfg = nullptr, const std::vector<TString>& extraPatterns = {}) {
+ static TFileStoragePtr CreateTestFS(TFileStorageConfig params = {}, const THttpDownloaderConfig* httpCfg = nullptr) {
if (httpCfg) {
TStringStream strCfg;
SerializeToTextFormat(*httpCfg, strCfg);
(*params.MutableDownloaderConfig())["http"] = strCfg.Str();
}
- TFileStoragePtr fs = CreateFileStorage(params);
- fs->AddDownloader(MakeHttpDownloader(false, params, extraPatterns));
- return fs;
+ return CreateFileStorage(params);
}
static std::unique_ptr<TTestHttpServer> CreateTestHttpServer() {
@@ -475,51 +473,6 @@ Y_UNIT_TEST_SUITE(TFileStorageTests) {
UNIT_ASSERT_VALUES_EQUAL(3, downloadCount);
}
- Y_UNIT_TEST(AllowedUrls) {
- auto server = CreateTestHttpServer();
-
- TString currentContent = "ABC";
- server->SetRequestHandler([&](auto&) {
- return TTestHttpServer::TReply::Ok(currentContent);
- });
-
- auto url = server->GetUrl();
-
- {
- // not in whitelist
- THttpDownloaderConfig httpCfg;
- httpCfg.AddAllowedUrlPatterns("^XXXX$");
- TFileStorageConfig params;
- TFileStoragePtr fs = CreateTestFS(params, &httpCfg);
-
- UNIT_ASSERT_EXCEPTION_CONTAINS(fs->PutUrl(url, {}), std::exception, "It is not allowed to download url http://localhost:");
- }
-
- {
- // have in whitelist
- THttpDownloaderConfig httpCfg;
- httpCfg.SetSocketTimeoutMs(4000);
- httpCfg.AddAllowedUrlPatterns("^http://localhost:");
- TFileStorageConfig params;
- TFileStoragePtr fs = CreateTestFS(params, &httpCfg);
-
- auto link = fs->PutUrl(url, {});
- UNIT_ASSERT_VALUES_EQUAL(currentContent, ReadFileContent(link->GetPath()));
- }
-
- {
- // have eaxtra url in whitelist
- THttpDownloaderConfig httpCfg;
- httpCfg.SetSocketTimeoutMs(4000);
- httpCfg.AddAllowedUrlPatterns("^XXXX$");
- TFileStorageConfig params;
- TFileStoragePtr fs = CreateTestFS(params, &httpCfg, std::vector{TString{"^http://localhost:"}});
-
- auto link = fs->PutUrl(url, {});
- UNIT_ASSERT_VALUES_EQUAL(currentContent, ReadFileContent(link->GetPath()));
- }
- }
-
Y_UNIT_TEST(SocketTimeout) {
auto server = CreateTestHttpServer();
diff --git a/ydb/library/yql/core/file_storage/http_download/CMakeLists.txt b/ydb/library/yql/core/file_storage/http_download/CMakeLists.txt
index eeb6a664522..73addbbd721 100644
--- a/ydb/library/yql/core/file_storage/http_download/CMakeLists.txt
+++ b/ydb/library/yql/core/file_storage/http_download/CMakeLists.txt
@@ -19,11 +19,9 @@ target_link_libraries(core-file_storage-http_download PUBLIC
yql-utils-fetch
yql-utils-log
library-yql-utils
- cpp-regex-pcre
cpp-digest-md5
cpp-http-misc
)
target_sources(core-file_storage-http_download PRIVATE
${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/http_download/http_download.cpp
- ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/http_download/pattern_group.cpp
)
diff --git a/ydb/library/yql/core/file_storage/http_download/http_download.cpp b/ydb/library/yql/core/file_storage/http_download/http_download.cpp
index 597103c19cb..c7654554125 100644
--- a/ydb/library/yql/core/file_storage/http_download/http_download.cpp
+++ b/ydb/library/yql/core/file_storage/http_download/http_download.cpp
@@ -1,5 +1,4 @@
#include "http_download.h"
-#include "pattern_group.h"
#include <ydb/library/yql/core/file_storage/proto/file_storage.pb.h>
#include <ydb/library/yql/core/file_storage/http_download/proto/http_download.pb.h>
@@ -9,7 +8,6 @@
#include <ydb/library/yql/utils/fetch/fetch.h>
#include <ydb/library/yql/utils/log/log.h>
#include <ydb/library/yql/utils/log/context.h>
-#include <ydb/library/yql/utils/multi_resource_lock.h>
#include <ydb/library/yql/utils/md5_stream.h>
#include <ydb/library/yql/utils/retry.h>
#include <ydb/library/yql/utils/yql_panic.h>
@@ -27,22 +25,13 @@ namespace NYql {
class THttpDownloader: public TDownloadConfig<THttpDownloader, THttpDownloaderConfig>, public NYql::NFS::IDownloader {
public:
- THttpDownloader(bool restictedUser, const TFileStorageConfig& config, const std::vector<TString>& extraAllowedUrls)
- : RestictedUser(restictedUser)
- {
+ THttpDownloader(const TFileStorageConfig& config) {
Configure(config, "http");
- for (auto p: extraAllowedUrls) {
- AllowedUrls.Add(p);
- }
}
~THttpDownloader() = default;
void DoConfigure(const THttpDownloaderConfig& cfg) {
SocketTimeoutMs = cfg.GetSocketTimeoutMs();
-
- for (const auto& p : RestictedUser ? cfg.GetExternalAllowedUrlPatterns() : cfg.GetAllowedUrlPatterns()) {
- AllowedUrls.Add(p);
- }
}
bool Accept(const THttpURL& url) final {
@@ -56,15 +45,8 @@ public:
return false;
}
- std::tuple<NYql::NFS::TDataProvider, TString, TString> Download(const THttpURL& url, const TString& oauthToken, const TString& oldEtag, const TString& oldLastModified) final {
- if (!AllowedUrls.IsEmpty()) {
- if (auto strUrl = url.PrintS(); !AllowedUrls.Match(strUrl)) {
- YQL_LOG(WARN) << "FileStorage: url " << strUrl << " is not in whitelist, reject downloading";
- throw yexception() << "It is not allowed to download url " << strUrl;
- }
- }
-
- TFetchResultPtr fr1 = FetchWithETagAndLastModified(url, oauthToken, oldEtag, oldLastModified, SocketTimeoutMs);
+ std::tuple<NYql::NFS::TDataProvider, TString, TString> Download(const THttpURL& url, const TString& token, const TString& oldEtag, const TString& oldLastModified) final {
+ TFetchResultPtr fr1 = FetchWithETagAndLastModified(url, token, oldEtag, oldLastModified, SocketTimeoutMs);
switch (fr1->GetRetCode()) {
case HTTP_NOT_MODIFIED:
return std::make_tuple(NYql::NFS::TDataProvider{}, TString{}, TString{});
@@ -84,11 +66,11 @@ public:
}
private:
- static TFetchResultPtr FetchWithETagAndLastModified(const THttpURL& url, const TString& oauthToken, const TString& oldEtag, const TString& oldLastModified, ui32 socketTimeoutMs) {
+ static TFetchResultPtr FetchWithETagAndLastModified(const THttpURL& url, const TString& token, const TString& oldEtag, const TString& oldLastModified, ui32 socketTimeoutMs) {
// more details about ETag and ModifiedSince: https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.26
THttpHeaders headers;
- if (!oauthToken.empty()) {
- headers.AddHeader(THttpInputHeader("Authorization", "OAuth " + oauthToken));
+ if (!token.empty()) {
+ headers.AddHeader(THttpInputHeader("Authorization", "OAuth " + token));
}
// ETag has priority over modification time
@@ -158,13 +140,11 @@ private:
}
private:
- const bool RestictedUser;
ui32 SocketTimeoutMs = 300000;
- TPatternGroup AllowedUrls;
};
-NYql::NFS::IDownloaderPtr MakeHttpDownloader(bool restictedUser, const TFileStorageConfig& config, const std::vector<TString>& extraAllowedUrls) {
- return MakeIntrusive<THttpDownloader>(restictedUser, config, extraAllowedUrls);
+NYql::NFS::IDownloaderPtr MakeHttpDownloader(const TFileStorageConfig& config) {
+ return MakeIntrusive<THttpDownloader>(config);
}
} // NYql
diff --git a/ydb/library/yql/core/file_storage/http_download/http_download.h b/ydb/library/yql/core/file_storage/http_download/http_download.h
index af39d7976a8..ee0ff964d04 100644
--- a/ydb/library/yql/core/file_storage/http_download/http_download.h
+++ b/ydb/library/yql/core/file_storage/http_download/http_download.h
@@ -8,6 +8,6 @@ namespace NYql {
class TFileStorageConfig;
-NYql::NFS::IDownloaderPtr MakeHttpDownloader(bool restictedUser, const TFileStorageConfig& config, const std::vector<TString>& extraAllowedUrls);
+NYql::NFS::IDownloaderPtr MakeHttpDownloader(const TFileStorageConfig& config);
} // NYql
diff --git a/ydb/library/yql/core/file_storage/http_download/pattern_group.cpp b/ydb/library/yql/core/file_storage/http_download/pattern_group.cpp
deleted file mode 100644
index 689c97d322a..00000000000
--- a/ydb/library/yql/core/file_storage/http_download/pattern_group.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-#include "pattern_group.h"
-
-namespace NYql {
-
-TPatternGroup::TPatternGroup(const TVector<TString>& patterns) {
- for (auto& p : patterns) {
- Add(p);
- }
-}
-
-void TPatternGroup::Add(const TString& pattern) {
- auto it = CompiledPatterns.find(pattern);
- if (it != CompiledPatterns.end()) {
- return;
- }
-
- CompiledPatterns.emplace(pattern, TRegExMatch(pattern));
-}
-
-bool TPatternGroup::IsEmpty() const {
- return CompiledPatterns.empty();
-}
-
-bool TPatternGroup::Match(const TString& s) const {
- for (auto& p : CompiledPatterns) {
- if (p.second.Match(s.c_str())) {
- return true;
- }
- }
-
- return false;
-}
-
-}
diff --git a/ydb/library/yql/core/file_storage/http_download/pattern_group.h b/ydb/library/yql/core/file_storage/http_download/pattern_group.h
deleted file mode 100644
index 53d0d64a7fa..00000000000
--- a/ydb/library/yql/core/file_storage/http_download/pattern_group.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#pragma once
-
-#include <library/cpp/regex/pcre/regexp.h>
-#include <util/generic/map.h>
-#include <util/generic/vector.h>
-
-namespace NYql {
-
-class TPatternGroup {
-public:
- explicit TPatternGroup(const TVector<TString>& patterns = {});
- void Add(const TString& pattern);
- bool IsEmpty() const;
- bool Match(const TString& s) const;
-
-private:
- TMap<TString, TRegExMatch> CompiledPatterns;
-};
-
-}
diff --git a/ydb/library/yql/core/file_storage/http_download/proto/http_download.proto b/ydb/library/yql/core/file_storage/http_download/proto/http_download.proto
index 49e2214f1dc..ead8a463e69 100644
--- a/ydb/library/yql/core/file_storage/http_download/proto/http_download.proto
+++ b/ydb/library/yql/core/file_storage/http_download/proto/http_download.proto
@@ -1,7 +1,9 @@
package NYql;
message THttpDownloaderConfig {
+ // TODO: remove
repeated string AllowedUrlPatterns = 1; // Whitelist of url regexps; disabled if empty
+ // TODO: remove
repeated string ExternalAllowedUrlPatterns = 2; // Whitelist of url regexps for external users; disabled if empty
optional uint32 SocketTimeoutMs = 3 [default = 300000];
}
diff --git a/ydb/library/yql/core/file_storage/proto/file_storage.proto b/ydb/library/yql/core/file_storage/proto/file_storage.proto
index 8a4e42decf6..c76b0ce01ec 100644
--- a/ydb/library/yql/core/file_storage/proto/file_storage.proto
+++ b/ydb/library/yql/core/file_storage/proto/file_storage.proto
@@ -1,6 +1,6 @@
package NYql;
-message TSchemeTranslate {
+message TSchemeTranslate { // TODO: remove
required string Pattern = 1; // regexp to match
required string TargetUrl = 2; // replacement string for target URL
}
@@ -9,7 +9,7 @@ message TFileStorageConfig {
optional string Path = 1; // Path to file storage. An auto-cleaned temp directory is used for empty value
optional uint32 MaxFiles = 2 [default = 1000]; // Maximum number of files in the storage
optional uint32 MaxSizeMb = 3 [default = 100]; // Maximum total size of all files in the storage
- repeated TSchemeTranslate CustomSchemes = 4;
+ repeated TSchemeTranslate CustomSchemes = 4; // TODO: remove
optional uint32 Threads = 5 [default = 1]; // Number of download threads for async downloading
optional uint32 RetryCount = 6 [default = 0]; // Number of additional attempts to download file
optional uint32 RetryDelayMs = 7 [default = 1000]; // Delay in ms between attempts to download file
diff --git a/ydb/library/yql/core/file_storage/url_mapper.cpp b/ydb/library/yql/core/file_storage/url_mapper.cpp
deleted file mode 100644
index 0c646ed0d68..00000000000
--- a/ydb/library/yql/core/file_storage/url_mapper.cpp
+++ /dev/null
@@ -1,19 +0,0 @@
-#include "url_mapper.h"
-
-namespace NYql {
-
-void TUrlMapper::AddMapping(const TString& pattern, const TString& targetUrl) {
- CustomSchemes.push_back(TCustomScheme(pattern, targetUrl));
-}
-
-bool TUrlMapper::MapUrl(const TString& url, TString& mappedUrl) const {
- for (const auto& sc : CustomSchemes) {
- if (sc.Pattern.Match(url.data())) {
- mappedUrl = TRegExSubst(sc.TargetUrlSubst).Replace(url.data());
- return true;
- }
- }
- return false;
-}
-
-}
diff --git a/ydb/library/yql/core/file_storage/url_mapper.h b/ydb/library/yql/core/file_storage/url_mapper.h
deleted file mode 100644
index f6438ecca3a..00000000000
--- a/ydb/library/yql/core/file_storage/url_mapper.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#pragma once
-
-#include <library/cpp/regex/pcre/regexp.h>
-#include <util/generic/vector.h>
-
-namespace NYql {
-
-class TUrlMapper {
-public:
- void AddMapping(const TString& pattern, const TString& targetUrl);
- bool MapUrl(const TString& url, TString& mappedUrl) const;
-
-private:
- struct TCustomScheme {
- TCustomScheme(const TString& pattern, const TString& url)
- : Pattern(pattern)
- , TargetUrlHolder(url)
- , TargetUrlSubst(pattern.data()) {
- if (0 == TargetUrlSubst.ParseReplacement(TargetUrlHolder.data())) {
- ythrow yexception() << "Bad url replacement: " << TargetUrlHolder;
- }
- }
- TRegExMatch Pattern;
- TString TargetUrlHolder;
- TRegExSubst TargetUrlSubst;
- };
-
-private:
- TVector<TCustomScheme> CustomSchemes;
-};
-
-}
diff --git a/ydb/library/yql/core/file_storage/ut/CMakeLists.darwin.txt b/ydb/library/yql/core/file_storage/ut/CMakeLists.darwin.txt
index f949b8fc405..18cab17ac4c 100644
--- a/ydb/library/yql/core/file_storage/ut/CMakeLists.darwin.txt
+++ b/ydb/library/yql/core/file_storage/ut/CMakeLists.darwin.txt
@@ -19,8 +19,8 @@ target_link_libraries(ydb-library-yql-core-file_storage-ut PUBLIC
yql-core-file_storage
cpp-http-server
cpp-threading-future
- core-file_storage-http_download
cpp-deprecated-atomic
+ yql-utils-test_http_server
)
target_link_options(ydb-library-yql-core-file_storage-ut PRIVATE
-Wl,-no_deduplicate
@@ -32,7 +32,6 @@ target_sources(ydb-library-yql-core-file_storage-ut PRIVATE
${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/file_storage_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/sized_cache_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/storage_ut.cpp
- ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/ut/test_http_server.cpp
)
add_test(
NAME
diff --git a/ydb/library/yql/core/file_storage/ut/CMakeLists.linux.txt b/ydb/library/yql/core/file_storage/ut/CMakeLists.linux.txt
index 3c6b4caa57a..fcf6e42a526 100644
--- a/ydb/library/yql/core/file_storage/ut/CMakeLists.linux.txt
+++ b/ydb/library/yql/core/file_storage/ut/CMakeLists.linux.txt
@@ -21,8 +21,8 @@ target_link_libraries(ydb-library-yql-core-file_storage-ut PUBLIC
yql-core-file_storage
cpp-http-server
cpp-threading-future
- core-file_storage-http_download
cpp-deprecated-atomic
+ yql-utils-test_http_server
)
target_link_options(ydb-library-yql-core-file_storage-ut PRIVATE
-ldl
@@ -38,7 +38,6 @@ target_sources(ydb-library-yql-core-file_storage-ut PRIVATE
${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/file_storage_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/sized_cache_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/storage_ut.cpp
- ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/ut/test_http_server.cpp
)
add_test(
NAME
diff --git a/ydb/library/yql/providers/common/proto/gateways_config.proto b/ydb/library/yql/providers/common/proto/gateways_config.proto
index 63ed448a0a1..b43ecb0b91e 100644
--- a/ydb/library/yql/providers/common/proto/gateways_config.proto
+++ b/ydb/library/yql/providers/common/proto/gateways_config.proto
@@ -394,7 +394,21 @@ message TSolomonGatewayConfig {
}
message TFileStorageAdditionalConfig {
- repeated string AllowedUrlPatterns = 1;
+ message TUrlPattern {
+ required string Pattern = 1;
+ optional string Alias = 2;
+ }
+
+ message TSchemeTranslate {
+ required string Pattern = 1; // regexp to match
+ required string TargetUrl = 2; // replacement string for target URL
+ }
+
+ repeated string AllowedUrlPatterns = 1; // Depricated
+
+ repeated TUrlPattern AllowedUrls = 2; // Whitelist of url regexps; disabled if empty
+ repeated TUrlPattern ExternalAllowedUrls = 3; // Whitelist of url regexps for external users; disabled if empty
+ repeated TSchemeTranslate CustomSchemes = 4;
}
/////////////////////////////// Postgresql /////////////////////////////
diff --git a/ydb/library/yql/utils/CMakeLists.txt b/ydb/library/yql/utils/CMakeLists.txt
index 39346521619..385bf233ac2 100644
--- a/ydb/library/yql/utils/CMakeLists.txt
+++ b/ydb/library/yql/utils/CMakeLists.txt
@@ -12,6 +12,7 @@ add_subdirectory(backtrace)
add_subdirectory(failure_injector)
add_subdirectory(fetch)
add_subdirectory(log)
+add_subdirectory(test_http_server)
add_subdirectory(threading)
add_subdirectory(ut)
diff --git a/ydb/library/yql/utils/test_http_server/CMakeLists.txt b/ydb/library/yql/utils/test_http_server/CMakeLists.txt
new file mode 100644
index 00000000000..6bec57c9ad9
--- /dev/null
+++ b/ydb/library/yql/utils/test_http_server/CMakeLists.txt
@@ -0,0 +1,19 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(yql-utils-test_http_server)
+target_link_libraries(yql-utils-test_http_server PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ cpp-http-server
+ cpp-http-misc
+)
+target_sources(yql-utils-test_http_server PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/utils/test_http_server/test_http_server.cpp
+)
diff --git a/ydb/library/yql/core/file_storage/ut/test_http_server.cpp b/ydb/library/yql/utils/test_http_server/test_http_server.cpp
index cf5b2536043..5d55230c410 100644
--- a/ydb/library/yql/core/file_storage/ut/test_http_server.cpp
+++ b/ydb/library/yql/utils/test_http_server/test_http_server.cpp
@@ -2,9 +2,8 @@
#include <library/cpp/http/misc/httpcodes.h>
#include <library/cpp/http/server/http_ex.h>
-#include <library/cpp/testing/unittest/registar.h>
-#include <library/cpp/deprecated/atomic/atomic_ops.h>
+#include <util/generic/yexception.h>
namespace NYql {
@@ -103,7 +102,7 @@ public:
}
void Start() {
- UNIT_ASSERT(HttpServer_.Start());
+ Y_ENSURE(HttpServer_.Start());
}
void Stop() {
diff --git a/ydb/library/yql/core/file_storage/ut/test_http_server.h b/ydb/library/yql/utils/test_http_server/test_http_server.h
index 170b45ca62c..385cfaf9706 100644
--- a/ydb/library/yql/core/file_storage/ut/test_http_server.h
+++ b/ydb/library/yql/utils/test_http_server/test_http_server.h
@@ -1,11 +1,11 @@
#pragma once
+#include <library/cpp/http/misc/httpcodes.h>
+
#include <util/generic/maybe.h>
#include <util/generic/ptr.h>
#include <util/generic/string.h>
-#include <library/cpp/http/misc/httpcodes.h>
-#include <library/cpp/threading/future/future.h>
#include <functional>
namespace NYql {