diff options
author | udovichenko-r <rvu@ydb.tech> | 2022-10-17 10:18:12 +0300 |
---|---|---|
committer | udovichenko-r <rvu@ydb.tech> | 2022-10-17 10:18:12 +0300 |
commit | 99884e915f4d3edcb1588b61fa689659e1a8e1d0 (patch) | |
tree | 9994ea8a54d1b16ab4484ac77b80729754db53f6 | |
parent | bbe39d44d0036e8b67f8204f00658cf38d8bda65 (diff) | |
download | ydb-99884e915f4d3edcb1588b61fa689659e1a8e1d0.tar.gz |
[yql] Refactor file storage
23 files changed, 226 insertions, 284 deletions
diff --git a/ydb/library/yql/core/file_storage/CMakeLists.txt b/ydb/library/yql/core/file_storage/CMakeLists.txt index b9a2aff3ab3..48dc4929f70 100644 --- a/ydb/library/yql/core/file_storage/CMakeLists.txt +++ b/ydb/library/yql/core/file_storage/CMakeLists.txt @@ -20,20 +20,20 @@ target_link_libraries(yql-core-file_storage PUBLIC cpp-digest-md5 cpp-logger-global cpp-threading-future - cpp-regex-pcre cpp-protobuf-util library-cpp-uri core-file_storage-proto core-file_storage-defs core-file_storage-download + core-file_storage-http_download library-yql-utils yql-utils-log yql-utils-fetch ) target_sources(yql-core-file_storage PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/file_storage_decorator.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/file_storage.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/sized_cache.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/storage.cpp - ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/url_mapper.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/url_meta.cpp ) diff --git a/ydb/library/yql/core/file_storage/defs/downloader.h b/ydb/library/yql/core/file_storage/defs/downloader.h index fe74a1c9708..c7227f17ba0 100644 --- a/ydb/library/yql/core/file_storage/defs/downloader.h +++ b/ydb/library/yql/core/file_storage/defs/downloader.h @@ -13,7 +13,7 @@ namespace NYql::NFS { struct IDownloader : public TThrRefBase { virtual bool Accept(const THttpURL& url) = 0; - virtual std::tuple<TDataProvider, TString, TString> Download(const THttpURL& url, const TString& oauthToken, const TString& etag, const TString& lastModified) = 0; + virtual std::tuple<TDataProvider, TString, TString> Download(const THttpURL& url, const TString& token, const TString& etag, const TString& lastModified) = 0; }; using IDownloaderPtr = TIntrusivePtr<IDownloader>; diff --git a/ydb/library/yql/core/file_storage/file_storage.cpp b/ydb/library/yql/core/file_storage/file_storage.cpp index a507ce9f6e8..29518b39da6 100644 --- a/ydb/library/yql/core/file_storage/file_storage.cpp +++ b/ydb/library/yql/core/file_storage/file_storage.cpp @@ -1,10 +1,11 @@ #include "file_storage.h" +#include "file_storage_decorator.h" #include "storage.h" -#include "url_mapper.h" #include "url_meta.h" #include <ydb/library/yql/core/file_storage/proto/file_storage.pb.h> #include <ydb/library/yql/core/file_storage/download/download_stream.h> +#include <ydb/library/yql/core/file_storage/http_download/http_download.h> #include <ydb/library/yql/core/file_storage/defs/provider.h> #include <ydb/library/yql/utils/fetch/fetch.h> @@ -40,38 +41,18 @@ namespace NYql { class TFileStorageImpl: public IFileStorage { public: - explicit TFileStorageImpl(const TFileStorageConfig& params) + explicit TFileStorageImpl(const TFileStorageConfig& params, const std::vector<NFS::IDownloaderPtr>& downloaders) : Storage(params.GetMaxFiles(), ui64(params.GetMaxSizeMb()) << 20ull, params.GetPath()) , Config(params) - , QueueStarted(0) { - try { - for (const auto& sc : params.GetCustomSchemes()) { - Mapper.AddMapping(sc.GetPattern(), sc.GetTargetUrl()); - } - } catch (const yexception& e) { - ythrow yexception() << "FileStorage: " << e.what(); - } - - auto numThreads = params.GetThreads(); - if (1 == numThreads) { - MtpQueue.Reset(new TFakeThreadPool()); - } else { - MtpQueue.Reset(new TSimpleThreadPool(TThreadPoolParams{"FileStorage"})); - } - - // do not call MtpQueue->Start here as we have to do it _after_ fork + Downloaders.push_back(MakeHttpDownloader(params)); + Downloaders.insert(Downloaders.begin(), downloaders.begin(), downloaders.end()); } ~TFileStorageImpl() { - MtpQueue->Stop(); } - void AddDownloader(NFS::IDownloaderPtr downloader) override { - Downloaders.push_back(std::move(downloader)); - } - - TFileLinkPtr PutFile(const TString& file, const TString& outFileName = {}) override { + TFileLinkPtr PutFile(const TString& file, const TString& outFileName = {}) final { YQL_LOG(INFO) << "PutFile to cache: " << file; const auto md5 = MD5::File(file); const TString storageFileName = md5 + ".file"; @@ -92,7 +73,7 @@ public: }); } - TFileLinkPtr PutFileStripped(const TString& file, const TString& originalMd5 = {}) override { + TFileLinkPtr PutFileStripped(const TString& file, const TString& originalMd5 = {}) final { YQL_LOG(INFO) << "PutFileStripped to cache: " << file; if (originalMd5.empty()) { YQL_LOG(WARN) << "Empty md5 for: " << file; @@ -133,7 +114,7 @@ public: return result; } - TFileLinkPtr PutInline(const TString& data) override { + TFileLinkPtr PutInline(const TString& data) final { const auto md5 = MD5::Calc(data); const TString storageFileName = md5 + ".file"; YQL_LOG(INFO) << "PutInline to cache. md5=" << md5; @@ -155,85 +136,59 @@ public: }); } - TFileLinkPtr PutUrl(const TString& urlStr, const TString& oauthToken) override { + TFileLinkPtr PutUrl(const TString& urlStr, const TString& token) final { try { - TString convertedUrl; - if (!Mapper.MapUrl(urlStr, convertedUrl)) { - convertedUrl = urlStr; - } - - YQL_LOG(INFO) << "PutUrl to cache: " << convertedUrl; - THttpURL url = ParseURL(convertedUrl); + YQL_LOG(INFO) << "PutUrl to cache: " << urlStr; + THttpURL url = ParseURL(urlStr); for (const auto& d: Downloaders) { if (d->Accept(url)) { - return PutUrl(url, oauthToken, d); + return PutUrl(url, token, d); } } - ythrow yexception() << "Unsupported url: " << convertedUrl; + ythrow yexception() << "Unsupported url: " << urlStr; } catch (const std::exception& e) { - YQL_LOG(ERROR) << "Failed to download file by URL \"" << urlStr << "\", details: " << e.what(); - YQL_LOG_CTX_THROW yexception() << "FileStorage: Failed to download file by URL \"" << urlStr << "\", details: " << e.what(); + const TString msg = TStringBuilder() << "FileStorage: Failed to download file by URL \"" << urlStr << "\", details: " << e.what(); + YQL_LOG(ERROR) << msg; + YQL_LOG_CTX_THROW yexception() << msg; } } - NThreading::TFuture<TFileLinkPtr> PutFileAsync(const TString& file, const TString& outFileName = {}) override { - StartQueueOnce(); - auto logCtx = NLog::CurrentLogContextPath(); - return NThreading::Async([=]() { - YQL_LOG_CTX_ROOT_SCOPE(logCtx); - return this->PutFile(file, outFileName); - }, *MtpQueue); + NThreading::TFuture<TFileLinkPtr> PutFileAsync(const TString& /*file*/, const TString& /*outFileName*/) final { + ythrow yexception() << "Async method is not implemeted"; } - NThreading::TFuture<TFileLinkPtr> PutInlineAsync(const TString& data) override { - StartQueueOnce(); - auto logCtx = NLog::CurrentLogContextPath(); - return NThreading::Async([=]() { - YQL_LOG_CTX_ROOT_SCOPE(logCtx); - return this->PutInline(data); - }, *MtpQueue); + NThreading::TFuture<TFileLinkPtr> PutInlineAsync(const TString& /*data*/) final { + ythrow yexception() << "Async method is not implemeted"; } - NThreading::TFuture<TFileLinkPtr> PutUrlAsync(const TString& url, const TString& oauthToken) override { - StartQueueOnce(); - auto logCtx = NLog::CurrentLogContextPath(); - return NThreading::Async([=]() { - YQL_LOG_CTX_ROOT_SCOPE(logCtx); - return this->PutUrl(url, oauthToken); - }, *MtpQueue); + NThreading::TFuture<TFileLinkPtr> PutUrlAsync(const TString& /*url*/, const TString& /*token*/) final { + ythrow yexception() << "Async method is not implemeted"; } - TFsPath GetRoot() const override { + TFsPath GetRoot() const final { return Storage.GetRoot(); } - TFsPath GetTemp() const override { + TFsPath GetTemp() const final { return Storage.GetTemp(); } - const TFileStorageConfig& GetConfig() const override { + const TFileStorageConfig& GetConfig() const final { return Config; } private: - void StartQueueOnce() { - // we shall call Start only once - if (AtomicTryLock(&QueueStarted)) { - MtpQueue->Start(Config.GetThreads()); - } - } - - TFileLinkPtr PutUrl(const THttpURL& url, const TString& oauthToken, const NFS::IDownloaderPtr& downloader) { + TFileLinkPtr PutUrl(const THttpURL& url, const TString& token, const NFS::IDownloaderPtr& downloader) { return WithRetry<TDownloadError>(Config.GetRetryCount(), [&, this]() { - return this->DoPutUrl(url, oauthToken, downloader); + return this->DoPutUrl(url, token, downloader); }, [&](const auto& e, int attempt, int attemptCount) { YQL_LOG(WARN) << "Error while downloading url " << url.PrintS() << ", attempt " << attempt << "/" << attemptCount << ", details: " << e.what(); Sleep(TDuration::MilliSeconds(Config.GetRetryDelayMs())); }); } - TFileLinkPtr DoPutUrl(const THttpURL& url, const TString& oauthToken, const NFS::IDownloaderPtr& downloader) { + TFileLinkPtr DoPutUrl(const THttpURL& url, const TString& token, const NFS::IDownloaderPtr& downloader) { const auto urlMetaFile = BuildUrlMetaFileName(url); auto lock = MultiResourceLock.Acquire(urlMetaFile); // let's use meta file as lock name @@ -256,7 +211,7 @@ private: NFS::TDataProvider puller; TString etag; TString lastModified; - std::tie(puller, etag, lastModified) = downloader->Download(url, oauthToken, urlMeta.ETag, urlMeta.LastModified); + std::tie(puller, etag, lastModified) = downloader->Download(url, token, urlMeta.ETag, urlMeta.LastModified); if (!puller) { Y_ENSURE(oldContentLink); // should not fire return oldContentLink; @@ -305,16 +260,71 @@ private: TStorage Storage; const TFileStorageConfig Config; std::vector<NFS::IDownloaderPtr> Downloaders; - TUrlMapper Mapper; + TMultiResourceLock MultiResourceLock; +}; + +class TFileStorageWithAsync: public TFileStorageDecorator { +public: + TFileStorageWithAsync(TFileStoragePtr fs) + : TFileStorageDecorator(std::move(fs)) + , QueueStarted(0) + { + auto numThreads = Inner_->GetConfig().GetThreads(); + if (1 == numThreads) { + MtpQueue.Reset(new TFakeThreadPool()); + } else { + MtpQueue.Reset(new TSimpleThreadPool(TThreadPoolParams{"FileStorage"})); + } + + // do not call MtpQueue->Start here as we have to do it _after_ fork + } + + ~TFileStorageWithAsync() { + MtpQueue->Stop(); + } + + NThreading::TFuture<TFileLinkPtr> PutFileAsync(const TString& file, const TString& outFileName) final { + return DoAsync([file, outFileName](const TFileStoragePtr& fs) { + return fs->PutFile(file, outFileName); + }); + } + NThreading::TFuture<TFileLinkPtr> PutInlineAsync(const TString& data) final { + return DoAsync([data](const TFileStoragePtr& fs) { + return fs->PutInline(data); + }); + } + NThreading::TFuture<TFileLinkPtr> PutUrlAsync(const TString& url, const TString& token) final { + return DoAsync([url, token](const TFileStoragePtr& fs) { + return fs->PutUrl(url, token); + }); + } + +private: + NThreading::TFuture<TFileLinkPtr> DoAsync(std::function<TFileLinkPtr(const TFileStoragePtr&)> action) { + if (AtomicTryLock(&QueueStarted)) { + MtpQueue->Start(Inner_->GetConfig().GetThreads()); + } + auto logCtx = NLog::CurrentLogContextPath(); + return NThreading::Async([logCtx, fs = Inner_, action]() { + YQL_LOG_CTX_ROOT_SCOPE(logCtx); + return action(fs); + }, *MtpQueue); + } + +private: TAtomic QueueStarted; THolder<IThreadPool> MtpQueue; - TMultiResourceLock MultiResourceLock; }; -TFileStoragePtr CreateFileStorage(const TFileStorageConfig& params) { + +TFileStoragePtr CreateFileStorage(const TFileStorageConfig& params, const std::vector<NFS::IDownloaderPtr>& downloaders) { Y_ENSURE(0 != params.GetMaxFiles(), "FileStorage: MaxFiles must be greater than 0"); Y_ENSURE(0 != params.GetMaxSizeMb(), "FileStorage: MaxSizeMb must be greater than 0"); - return new TFileStorageImpl(params); + return new TFileStorageImpl(params, downloaders); +} + +TFileStoragePtr WithAsync(TFileStoragePtr fs) { + return new TFileStorageWithAsync(std::move(fs)); } void LoadFsConfigFromFile(TStringBuf path, TFileStorageConfig& params) { diff --git a/ydb/library/yql/core/file_storage/file_storage.h b/ydb/library/yql/core/file_storage/file_storage.h index ab5fd5c03e8..b95ccabaaff 100644 --- a/ydb/library/yql/core/file_storage/file_storage.h +++ b/ydb/library/yql/core/file_storage/file_storage.h @@ -11,7 +11,7 @@ #include <util/generic/ptr.h> #include <util/generic/string.h> -#include <tuple> +#include <vector> namespace NYql { @@ -19,15 +19,14 @@ class TFileStorageConfig; struct IFileStorage: public TThrRefBase { virtual ~IFileStorage() = default; - virtual void AddDownloader(NYql::NFS::IDownloaderPtr downloader) = 0; virtual TFileLinkPtr PutFile(const TString& file, const TString& outFileName = {}) = 0; virtual TFileLinkPtr PutFileStripped(const TString& file, const TString& originalMd5 = {}) = 0; virtual TFileLinkPtr PutInline(const TString& data) = 0; - virtual TFileLinkPtr PutUrl(const TString& url, const TString& oauthToken) = 0; + virtual TFileLinkPtr PutUrl(const TString& url, const TString& token) = 0; // async versions virtual NThreading::TFuture<TFileLinkPtr> PutFileAsync(const TString& file, const TString& outFileName = {}) = 0; virtual NThreading::TFuture<TFileLinkPtr> PutInlineAsync(const TString& data) = 0; - virtual NThreading::TFuture<TFileLinkPtr> PutUrlAsync(const TString& url, const TString& oauthToken) = 0; + virtual NThreading::TFuture<TFileLinkPtr> PutUrlAsync(const TString& url, const TString& token) = 0; virtual TFsPath GetRoot() const = 0; virtual TFsPath GetTemp() const = 0; @@ -37,7 +36,13 @@ struct IFileStorage: public TThrRefBase { using TFileStoragePtr = TIntrusivePtr<IFileStorage>; // Will use auto-cleaned temporary directory if storagePath is empty -TFileStoragePtr CreateFileStorage(const TFileStorageConfig& params); +TFileStoragePtr CreateFileStorage(const TFileStorageConfig& params, const std::vector<NFS::IDownloaderPtr>& downloaders = {}); + +TFileStoragePtr WithAsync(TFileStoragePtr fs); + +inline TFileStoragePtr CreateAsyncFileStorage(const TFileStorageConfig& params, const std::vector<NFS::IDownloaderPtr>& downloaders = {}) { + return WithAsync(CreateFileStorage(params, downloaders)); +} void LoadFsConfigFromFile(TStringBuf path, TFileStorageConfig& params); void LoadFsConfigFromResource(TStringBuf path, TFileStorageConfig& params); diff --git a/ydb/library/yql/core/file_storage/file_storage_decorator.cpp b/ydb/library/yql/core/file_storage/file_storage_decorator.cpp new file mode 100644 index 00000000000..ec9e5bfac88 --- /dev/null +++ b/ydb/library/yql/core/file_storage/file_storage_decorator.cpp @@ -0,0 +1,42 @@ +#include "file_storage_decorator.h" + + +namespace NYql { + +TFileStorageDecorator::TFileStorageDecorator(TFileStoragePtr fs) + : Inner_(std::move(fs)) +{ +} + +TFileLinkPtr TFileStorageDecorator::PutFile(const TString& file, const TString& outFileName) { + return Inner_->PutFile(file, outFileName); +} +TFileLinkPtr TFileStorageDecorator::PutFileStripped(const TString& file, const TString& originalMd5) { + return Inner_->PutFileStripped(file, originalMd5); +} +TFileLinkPtr TFileStorageDecorator::PutInline(const TString& data) { + return Inner_->PutInline(data); +} +TFileLinkPtr TFileStorageDecorator::PutUrl(const TString& url, const TString& token) { + return Inner_->PutUrl(url, token); +} +NThreading::TFuture<TFileLinkPtr> TFileStorageDecorator::PutFileAsync(const TString& file, const TString& outFileName) { + return Inner_->PutFileAsync(file, outFileName); +} +NThreading::TFuture<TFileLinkPtr> TFileStorageDecorator::PutInlineAsync(const TString& data) { + return Inner_->PutInlineAsync(data); +} +NThreading::TFuture<TFileLinkPtr> TFileStorageDecorator::PutUrlAsync(const TString& url, const TString& token) { + return Inner_->PutUrlAsync(url, token); +} +TFsPath TFileStorageDecorator::GetRoot() const { + return Inner_->GetRoot(); +} +TFsPath TFileStorageDecorator::GetTemp() const { + return Inner_->GetTemp(); +} +const TFileStorageConfig& TFileStorageDecorator::GetConfig() const { + return Inner_->GetConfig(); +} + +} // NYql diff --git a/ydb/library/yql/core/file_storage/file_storage_decorator.h b/ydb/library/yql/core/file_storage/file_storage_decorator.h new file mode 100644 index 00000000000..9d6f2b8f3e4 --- /dev/null +++ b/ydb/library/yql/core/file_storage/file_storage_decorator.h @@ -0,0 +1,26 @@ +#pragma once + +#include "file_storage.h" + +namespace NYql { + +class TFileStorageDecorator: public IFileStorage { +public: + TFileStorageDecorator(TFileStoragePtr fs); + + TFileLinkPtr PutFile(const TString& file, const TString& outFileName) override; + TFileLinkPtr PutFileStripped(const TString& file, const TString& originalMd5) override; + TFileLinkPtr PutInline(const TString& data) override; + TFileLinkPtr PutUrl(const TString& url, const TString& token) override; + NThreading::TFuture<TFileLinkPtr> PutFileAsync(const TString& file, const TString& outFileName) override; + NThreading::TFuture<TFileLinkPtr> PutInlineAsync(const TString& data) override; + NThreading::TFuture<TFileLinkPtr> PutUrlAsync(const TString& url, const TString& token) override; + TFsPath GetRoot() const override; + TFsPath GetTemp() const override; + const TFileStorageConfig& GetConfig() const override; + +protected: + TFileStoragePtr Inner_; +}; + +} // NYql diff --git a/ydb/library/yql/core/file_storage/file_storage_ut.cpp b/ydb/library/yql/core/file_storage/file_storage_ut.cpp index 9e012489141..d5a5d7da8c8 100644 --- a/ydb/library/yql/core/file_storage/file_storage_ut.cpp +++ b/ydb/library/yql/core/file_storage/file_storage_ut.cpp @@ -1,6 +1,6 @@ #include "file_storage.h" -#include <ydb/library/yql/core/file_storage/ut/test_http_server.h> +#include <ydb/library/yql/utils/test_http_server/test_http_server.h> #include <ydb/library/yql/core/file_storage/proto/file_storage.pb.h> #include <ydb/library/yql/core/file_storage/http_download/http_download.h> #include <ydb/library/yql/core/file_storage/http_download/proto/http_download.pb.h> @@ -25,16 +25,14 @@ Y_UNIT_TEST_SUITE(TFileStorageTests) { return TIFStream(path).ReadAll(); } - static TFileStoragePtr CreateTestFS(TFileStorageConfig params = {}, const THttpDownloaderConfig* httpCfg = nullptr, const std::vector<TString>& extraPatterns = {}) { + static TFileStoragePtr CreateTestFS(TFileStorageConfig params = {}, const THttpDownloaderConfig* httpCfg = nullptr) { if (httpCfg) { TStringStream strCfg; SerializeToTextFormat(*httpCfg, strCfg); (*params.MutableDownloaderConfig())["http"] = strCfg.Str(); } - TFileStoragePtr fs = CreateFileStorage(params); - fs->AddDownloader(MakeHttpDownloader(false, params, extraPatterns)); - return fs; + return CreateFileStorage(params); } static std::unique_ptr<TTestHttpServer> CreateTestHttpServer() { @@ -475,51 +473,6 @@ Y_UNIT_TEST_SUITE(TFileStorageTests) { UNIT_ASSERT_VALUES_EQUAL(3, downloadCount); } - Y_UNIT_TEST(AllowedUrls) { - auto server = CreateTestHttpServer(); - - TString currentContent = "ABC"; - server->SetRequestHandler([&](auto&) { - return TTestHttpServer::TReply::Ok(currentContent); - }); - - auto url = server->GetUrl(); - - { - // not in whitelist - THttpDownloaderConfig httpCfg; - httpCfg.AddAllowedUrlPatterns("^XXXX$"); - TFileStorageConfig params; - TFileStoragePtr fs = CreateTestFS(params, &httpCfg); - - UNIT_ASSERT_EXCEPTION_CONTAINS(fs->PutUrl(url, {}), std::exception, "It is not allowed to download url http://localhost:"); - } - - { - // have in whitelist - THttpDownloaderConfig httpCfg; - httpCfg.SetSocketTimeoutMs(4000); - httpCfg.AddAllowedUrlPatterns("^http://localhost:"); - TFileStorageConfig params; - TFileStoragePtr fs = CreateTestFS(params, &httpCfg); - - auto link = fs->PutUrl(url, {}); - UNIT_ASSERT_VALUES_EQUAL(currentContent, ReadFileContent(link->GetPath())); - } - - { - // have eaxtra url in whitelist - THttpDownloaderConfig httpCfg; - httpCfg.SetSocketTimeoutMs(4000); - httpCfg.AddAllowedUrlPatterns("^XXXX$"); - TFileStorageConfig params; - TFileStoragePtr fs = CreateTestFS(params, &httpCfg, std::vector{TString{"^http://localhost:"}}); - - auto link = fs->PutUrl(url, {}); - UNIT_ASSERT_VALUES_EQUAL(currentContent, ReadFileContent(link->GetPath())); - } - } - Y_UNIT_TEST(SocketTimeout) { auto server = CreateTestHttpServer(); diff --git a/ydb/library/yql/core/file_storage/http_download/CMakeLists.txt b/ydb/library/yql/core/file_storage/http_download/CMakeLists.txt index eeb6a664522..73addbbd721 100644 --- a/ydb/library/yql/core/file_storage/http_download/CMakeLists.txt +++ b/ydb/library/yql/core/file_storage/http_download/CMakeLists.txt @@ -19,11 +19,9 @@ target_link_libraries(core-file_storage-http_download PUBLIC yql-utils-fetch yql-utils-log library-yql-utils - cpp-regex-pcre cpp-digest-md5 cpp-http-misc ) target_sources(core-file_storage-http_download PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/http_download/http_download.cpp - ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/http_download/pattern_group.cpp ) diff --git a/ydb/library/yql/core/file_storage/http_download/http_download.cpp b/ydb/library/yql/core/file_storage/http_download/http_download.cpp index 597103c19cb..c7654554125 100644 --- a/ydb/library/yql/core/file_storage/http_download/http_download.cpp +++ b/ydb/library/yql/core/file_storage/http_download/http_download.cpp @@ -1,5 +1,4 @@ #include "http_download.h" -#include "pattern_group.h" #include <ydb/library/yql/core/file_storage/proto/file_storage.pb.h> #include <ydb/library/yql/core/file_storage/http_download/proto/http_download.pb.h> @@ -9,7 +8,6 @@ #include <ydb/library/yql/utils/fetch/fetch.h> #include <ydb/library/yql/utils/log/log.h> #include <ydb/library/yql/utils/log/context.h> -#include <ydb/library/yql/utils/multi_resource_lock.h> #include <ydb/library/yql/utils/md5_stream.h> #include <ydb/library/yql/utils/retry.h> #include <ydb/library/yql/utils/yql_panic.h> @@ -27,22 +25,13 @@ namespace NYql { class THttpDownloader: public TDownloadConfig<THttpDownloader, THttpDownloaderConfig>, public NYql::NFS::IDownloader { public: - THttpDownloader(bool restictedUser, const TFileStorageConfig& config, const std::vector<TString>& extraAllowedUrls) - : RestictedUser(restictedUser) - { + THttpDownloader(const TFileStorageConfig& config) { Configure(config, "http"); - for (auto p: extraAllowedUrls) { - AllowedUrls.Add(p); - } } ~THttpDownloader() = default; void DoConfigure(const THttpDownloaderConfig& cfg) { SocketTimeoutMs = cfg.GetSocketTimeoutMs(); - - for (const auto& p : RestictedUser ? cfg.GetExternalAllowedUrlPatterns() : cfg.GetAllowedUrlPatterns()) { - AllowedUrls.Add(p); - } } bool Accept(const THttpURL& url) final { @@ -56,15 +45,8 @@ public: return false; } - std::tuple<NYql::NFS::TDataProvider, TString, TString> Download(const THttpURL& url, const TString& oauthToken, const TString& oldEtag, const TString& oldLastModified) final { - if (!AllowedUrls.IsEmpty()) { - if (auto strUrl = url.PrintS(); !AllowedUrls.Match(strUrl)) { - YQL_LOG(WARN) << "FileStorage: url " << strUrl << " is not in whitelist, reject downloading"; - throw yexception() << "It is not allowed to download url " << strUrl; - } - } - - TFetchResultPtr fr1 = FetchWithETagAndLastModified(url, oauthToken, oldEtag, oldLastModified, SocketTimeoutMs); + std::tuple<NYql::NFS::TDataProvider, TString, TString> Download(const THttpURL& url, const TString& token, const TString& oldEtag, const TString& oldLastModified) final { + TFetchResultPtr fr1 = FetchWithETagAndLastModified(url, token, oldEtag, oldLastModified, SocketTimeoutMs); switch (fr1->GetRetCode()) { case HTTP_NOT_MODIFIED: return std::make_tuple(NYql::NFS::TDataProvider{}, TString{}, TString{}); @@ -84,11 +66,11 @@ public: } private: - static TFetchResultPtr FetchWithETagAndLastModified(const THttpURL& url, const TString& oauthToken, const TString& oldEtag, const TString& oldLastModified, ui32 socketTimeoutMs) { + static TFetchResultPtr FetchWithETagAndLastModified(const THttpURL& url, const TString& token, const TString& oldEtag, const TString& oldLastModified, ui32 socketTimeoutMs) { // more details about ETag and ModifiedSince: https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.26 THttpHeaders headers; - if (!oauthToken.empty()) { - headers.AddHeader(THttpInputHeader("Authorization", "OAuth " + oauthToken)); + if (!token.empty()) { + headers.AddHeader(THttpInputHeader("Authorization", "OAuth " + token)); } // ETag has priority over modification time @@ -158,13 +140,11 @@ private: } private: - const bool RestictedUser; ui32 SocketTimeoutMs = 300000; - TPatternGroup AllowedUrls; }; -NYql::NFS::IDownloaderPtr MakeHttpDownloader(bool restictedUser, const TFileStorageConfig& config, const std::vector<TString>& extraAllowedUrls) { - return MakeIntrusive<THttpDownloader>(restictedUser, config, extraAllowedUrls); +NYql::NFS::IDownloaderPtr MakeHttpDownloader(const TFileStorageConfig& config) { + return MakeIntrusive<THttpDownloader>(config); } } // NYql diff --git a/ydb/library/yql/core/file_storage/http_download/http_download.h b/ydb/library/yql/core/file_storage/http_download/http_download.h index af39d7976a8..ee0ff964d04 100644 --- a/ydb/library/yql/core/file_storage/http_download/http_download.h +++ b/ydb/library/yql/core/file_storage/http_download/http_download.h @@ -8,6 +8,6 @@ namespace NYql { class TFileStorageConfig; -NYql::NFS::IDownloaderPtr MakeHttpDownloader(bool restictedUser, const TFileStorageConfig& config, const std::vector<TString>& extraAllowedUrls); +NYql::NFS::IDownloaderPtr MakeHttpDownloader(const TFileStorageConfig& config); } // NYql diff --git a/ydb/library/yql/core/file_storage/http_download/pattern_group.cpp b/ydb/library/yql/core/file_storage/http_download/pattern_group.cpp deleted file mode 100644 index 689c97d322a..00000000000 --- a/ydb/library/yql/core/file_storage/http_download/pattern_group.cpp +++ /dev/null @@ -1,34 +0,0 @@ -#include "pattern_group.h" - -namespace NYql { - -TPatternGroup::TPatternGroup(const TVector<TString>& patterns) { - for (auto& p : patterns) { - Add(p); - } -} - -void TPatternGroup::Add(const TString& pattern) { - auto it = CompiledPatterns.find(pattern); - if (it != CompiledPatterns.end()) { - return; - } - - CompiledPatterns.emplace(pattern, TRegExMatch(pattern)); -} - -bool TPatternGroup::IsEmpty() const { - return CompiledPatterns.empty(); -} - -bool TPatternGroup::Match(const TString& s) const { - for (auto& p : CompiledPatterns) { - if (p.second.Match(s.c_str())) { - return true; - } - } - - return false; -} - -} diff --git a/ydb/library/yql/core/file_storage/http_download/pattern_group.h b/ydb/library/yql/core/file_storage/http_download/pattern_group.h deleted file mode 100644 index 53d0d64a7fa..00000000000 --- a/ydb/library/yql/core/file_storage/http_download/pattern_group.h +++ /dev/null @@ -1,20 +0,0 @@ -#pragma once - -#include <library/cpp/regex/pcre/regexp.h> -#include <util/generic/map.h> -#include <util/generic/vector.h> - -namespace NYql { - -class TPatternGroup { -public: - explicit TPatternGroup(const TVector<TString>& patterns = {}); - void Add(const TString& pattern); - bool IsEmpty() const; - bool Match(const TString& s) const; - -private: - TMap<TString, TRegExMatch> CompiledPatterns; -}; - -} diff --git a/ydb/library/yql/core/file_storage/http_download/proto/http_download.proto b/ydb/library/yql/core/file_storage/http_download/proto/http_download.proto index 49e2214f1dc..ead8a463e69 100644 --- a/ydb/library/yql/core/file_storage/http_download/proto/http_download.proto +++ b/ydb/library/yql/core/file_storage/http_download/proto/http_download.proto @@ -1,7 +1,9 @@ package NYql; message THttpDownloaderConfig { + // TODO: remove repeated string AllowedUrlPatterns = 1; // Whitelist of url regexps; disabled if empty + // TODO: remove repeated string ExternalAllowedUrlPatterns = 2; // Whitelist of url regexps for external users; disabled if empty optional uint32 SocketTimeoutMs = 3 [default = 300000]; } diff --git a/ydb/library/yql/core/file_storage/proto/file_storage.proto b/ydb/library/yql/core/file_storage/proto/file_storage.proto index 8a4e42decf6..c76b0ce01ec 100644 --- a/ydb/library/yql/core/file_storage/proto/file_storage.proto +++ b/ydb/library/yql/core/file_storage/proto/file_storage.proto @@ -1,6 +1,6 @@ package NYql; -message TSchemeTranslate { +message TSchemeTranslate { // TODO: remove required string Pattern = 1; // regexp to match required string TargetUrl = 2; // replacement string for target URL } @@ -9,7 +9,7 @@ message TFileStorageConfig { optional string Path = 1; // Path to file storage. An auto-cleaned temp directory is used for empty value optional uint32 MaxFiles = 2 [default = 1000]; // Maximum number of files in the storage optional uint32 MaxSizeMb = 3 [default = 100]; // Maximum total size of all files in the storage - repeated TSchemeTranslate CustomSchemes = 4; + repeated TSchemeTranslate CustomSchemes = 4; // TODO: remove optional uint32 Threads = 5 [default = 1]; // Number of download threads for async downloading optional uint32 RetryCount = 6 [default = 0]; // Number of additional attempts to download file optional uint32 RetryDelayMs = 7 [default = 1000]; // Delay in ms between attempts to download file diff --git a/ydb/library/yql/core/file_storage/url_mapper.cpp b/ydb/library/yql/core/file_storage/url_mapper.cpp deleted file mode 100644 index 0c646ed0d68..00000000000 --- a/ydb/library/yql/core/file_storage/url_mapper.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include "url_mapper.h" - -namespace NYql { - -void TUrlMapper::AddMapping(const TString& pattern, const TString& targetUrl) { - CustomSchemes.push_back(TCustomScheme(pattern, targetUrl)); -} - -bool TUrlMapper::MapUrl(const TString& url, TString& mappedUrl) const { - for (const auto& sc : CustomSchemes) { - if (sc.Pattern.Match(url.data())) { - mappedUrl = TRegExSubst(sc.TargetUrlSubst).Replace(url.data()); - return true; - } - } - return false; -} - -} diff --git a/ydb/library/yql/core/file_storage/url_mapper.h b/ydb/library/yql/core/file_storage/url_mapper.h deleted file mode 100644 index f6438ecca3a..00000000000 --- a/ydb/library/yql/core/file_storage/url_mapper.h +++ /dev/null @@ -1,32 +0,0 @@ -#pragma once - -#include <library/cpp/regex/pcre/regexp.h> -#include <util/generic/vector.h> - -namespace NYql { - -class TUrlMapper { -public: - void AddMapping(const TString& pattern, const TString& targetUrl); - bool MapUrl(const TString& url, TString& mappedUrl) const; - -private: - struct TCustomScheme { - TCustomScheme(const TString& pattern, const TString& url) - : Pattern(pattern) - , TargetUrlHolder(url) - , TargetUrlSubst(pattern.data()) { - if (0 == TargetUrlSubst.ParseReplacement(TargetUrlHolder.data())) { - ythrow yexception() << "Bad url replacement: " << TargetUrlHolder; - } - } - TRegExMatch Pattern; - TString TargetUrlHolder; - TRegExSubst TargetUrlSubst; - }; - -private: - TVector<TCustomScheme> CustomSchemes; -}; - -} diff --git a/ydb/library/yql/core/file_storage/ut/CMakeLists.darwin.txt b/ydb/library/yql/core/file_storage/ut/CMakeLists.darwin.txt index f949b8fc405..18cab17ac4c 100644 --- a/ydb/library/yql/core/file_storage/ut/CMakeLists.darwin.txt +++ b/ydb/library/yql/core/file_storage/ut/CMakeLists.darwin.txt @@ -19,8 +19,8 @@ target_link_libraries(ydb-library-yql-core-file_storage-ut PUBLIC yql-core-file_storage cpp-http-server cpp-threading-future - core-file_storage-http_download cpp-deprecated-atomic + yql-utils-test_http_server ) target_link_options(ydb-library-yql-core-file_storage-ut PRIVATE -Wl,-no_deduplicate @@ -32,7 +32,6 @@ target_sources(ydb-library-yql-core-file_storage-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/file_storage_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/sized_cache_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/storage_ut.cpp - ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/ut/test_http_server.cpp ) add_test( NAME diff --git a/ydb/library/yql/core/file_storage/ut/CMakeLists.linux.txt b/ydb/library/yql/core/file_storage/ut/CMakeLists.linux.txt index 3c6b4caa57a..fcf6e42a526 100644 --- a/ydb/library/yql/core/file_storage/ut/CMakeLists.linux.txt +++ b/ydb/library/yql/core/file_storage/ut/CMakeLists.linux.txt @@ -21,8 +21,8 @@ target_link_libraries(ydb-library-yql-core-file_storage-ut PUBLIC yql-core-file_storage cpp-http-server cpp-threading-future - core-file_storage-http_download cpp-deprecated-atomic + yql-utils-test_http_server ) target_link_options(ydb-library-yql-core-file_storage-ut PRIVATE -ldl @@ -38,7 +38,6 @@ target_sources(ydb-library-yql-core-file_storage-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/file_storage_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/sized_cache_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/storage_ut.cpp - ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/file_storage/ut/test_http_server.cpp ) add_test( NAME diff --git a/ydb/library/yql/providers/common/proto/gateways_config.proto b/ydb/library/yql/providers/common/proto/gateways_config.proto index 63ed448a0a1..b43ecb0b91e 100644 --- a/ydb/library/yql/providers/common/proto/gateways_config.proto +++ b/ydb/library/yql/providers/common/proto/gateways_config.proto @@ -394,7 +394,21 @@ message TSolomonGatewayConfig { } message TFileStorageAdditionalConfig { - repeated string AllowedUrlPatterns = 1; + message TUrlPattern { + required string Pattern = 1; + optional string Alias = 2; + } + + message TSchemeTranslate { + required string Pattern = 1; // regexp to match + required string TargetUrl = 2; // replacement string for target URL + } + + repeated string AllowedUrlPatterns = 1; // Depricated + + repeated TUrlPattern AllowedUrls = 2; // Whitelist of url regexps; disabled if empty + repeated TUrlPattern ExternalAllowedUrls = 3; // Whitelist of url regexps for external users; disabled if empty + repeated TSchemeTranslate CustomSchemes = 4; } /////////////////////////////// Postgresql ///////////////////////////// diff --git a/ydb/library/yql/utils/CMakeLists.txt b/ydb/library/yql/utils/CMakeLists.txt index 39346521619..385bf233ac2 100644 --- a/ydb/library/yql/utils/CMakeLists.txt +++ b/ydb/library/yql/utils/CMakeLists.txt @@ -12,6 +12,7 @@ add_subdirectory(backtrace) add_subdirectory(failure_injector) add_subdirectory(fetch) add_subdirectory(log) +add_subdirectory(test_http_server) add_subdirectory(threading) add_subdirectory(ut) diff --git a/ydb/library/yql/utils/test_http_server/CMakeLists.txt b/ydb/library/yql/utils/test_http_server/CMakeLists.txt new file mode 100644 index 00000000000..6bec57c9ad9 --- /dev/null +++ b/ydb/library/yql/utils/test_http_server/CMakeLists.txt @@ -0,0 +1,19 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(yql-utils-test_http_server) +target_link_libraries(yql-utils-test_http_server PUBLIC + contrib-libs-cxxsupp + yutil + cpp-http-server + cpp-http-misc +) +target_sources(yql-utils-test_http_server PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/utils/test_http_server/test_http_server.cpp +) diff --git a/ydb/library/yql/core/file_storage/ut/test_http_server.cpp b/ydb/library/yql/utils/test_http_server/test_http_server.cpp index cf5b2536043..5d55230c410 100644 --- a/ydb/library/yql/core/file_storage/ut/test_http_server.cpp +++ b/ydb/library/yql/utils/test_http_server/test_http_server.cpp @@ -2,9 +2,8 @@ #include <library/cpp/http/misc/httpcodes.h> #include <library/cpp/http/server/http_ex.h> -#include <library/cpp/testing/unittest/registar.h> -#include <library/cpp/deprecated/atomic/atomic_ops.h> +#include <util/generic/yexception.h> namespace NYql { @@ -103,7 +102,7 @@ public: } void Start() { - UNIT_ASSERT(HttpServer_.Start()); + Y_ENSURE(HttpServer_.Start()); } void Stop() { diff --git a/ydb/library/yql/core/file_storage/ut/test_http_server.h b/ydb/library/yql/utils/test_http_server/test_http_server.h index 170b45ca62c..385cfaf9706 100644 --- a/ydb/library/yql/core/file_storage/ut/test_http_server.h +++ b/ydb/library/yql/utils/test_http_server/test_http_server.h @@ -1,11 +1,11 @@ #pragma once +#include <library/cpp/http/misc/httpcodes.h> + #include <util/generic/maybe.h> #include <util/generic/ptr.h> #include <util/generic/string.h> -#include <library/cpp/http/misc/httpcodes.h> -#include <library/cpp/threading/future/future.h> #include <functional> namespace NYql { |