diff options
author | Sergey Uzhakov <uzhastik@gmail.com> | 2022-06-09 11:51:22 +0300 |
---|---|---|
committer | Sergey Uzhakov <uzhastik@gmail.com> | 2022-06-09 11:51:22 +0300 |
commit | f34e230eb02f287a6a731705de7e6112c5f35db8 (patch) | |
tree | f6e55c2f612c96f316e765887848912be03cf17b | |
parent | 71f1d3833dc656f57b03171d691f249134df7c85 (diff) | |
download | ydb-f34e230eb02f287a6a731705de7e6112c5f35db8.tar.gz |
YQ-1139: s3: replace 9K file count limit with cfg parameter
ref:ae5343cf60357a2b986af02653e16469a6a7660d
4 files changed, 13 insertions, 7 deletions
diff --git a/ydb/library/yql/providers/common/proto/gateways_config.proto b/ydb/library/yql/providers/common/proto/gateways_config.proto index 2910c9c248..e9141644b1 100644 --- a/ydb/library/yql/providers/common/proto/gateways_config.proto +++ b/ydb/library/yql/providers/common/proto/gateways_config.proto @@ -328,8 +328,9 @@ message TS3GatewayConfig { repeated TS3ClusterConfig ClusterMapping = 1; optional uint64 FileSizeLimit = 2; - optional uint32 MaxFilesPerQuery = 3; + optional uint64 MaxFilesPerQuery = 3; optional uint64 MaxReadSizePerQuery = 4; + optional uint64 MaxDiscoveryFilesPerQuery = 5; repeated TAttr DefaultSettings = 100; } diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp index 49df936a91..91079b22d2 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp @@ -45,7 +45,8 @@ void OnDiscovery( NThreading::TPromise<void> promise, std::weak_ptr<TPendingBuckets> pendingBucketsWPtr, int promiseInd, - const IRetryPolicy<long>::TPtr& retryPolicy) { + const IRetryPolicy<long>::TPtr& retryPolicy, + ui64 maxDiscoveryFilesPerQuery) { auto pendingBuckets = pendingBucketsWPtr.lock(); // keys and output could be used only when TPendingBuckets is alive if (!pendingBuckets) { return; @@ -67,8 +68,8 @@ void OnDiscovery( root.Node("s3:KeyCount", false, nss).Value<unsigned>() > 0U) { const auto& contents = root.XPath("s3:Contents", false, nss); auto& items = std::get<TItemsMap>(output); - if (items.size() + contents.size() > 9000ULL) { - std::get<TIssues>(output) = { TIssue(pos, TStringBuilder() << "It's over nine thousand items under '" << std::get<0U>(keys) << std::get<1U>(keys) << "'!")}; + if (maxDiscoveryFilesPerQuery && items.size() + contents.size() > maxDiscoveryFilesPerQuery) { + std::get<TIssues>(output) = { TIssue(pos, TStringBuilder() << "Over " << maxDiscoveryFilesPerQuery << " files discovered in '" << std::get<0U>(keys) << std::get<1U>(keys) << "'")}; break; } @@ -97,7 +98,7 @@ void OnDiscovery( url, std::move(headers), 0U, - std::bind(&OnDiscovery, gateway, pos, std::placeholders::_1, std::cref(keys), std::ref(output), std::move(promise), pendingBucketsWPtr, promiseInd, retryPolicy), + std::bind(&OnDiscovery, gateway, pos, std::placeholders::_1, std::cref(keys), std::ref(output), std::move(promise), pendingBucketsWPtr, promiseInd, retryPolicy, maxDiscoveryFilesPerQuery), /*data=*/"", retryPolicy); } @@ -121,6 +122,8 @@ void OnDiscovery( break; } + // this logging does not work at the moment since we are trying to do it in non-pipeline thread (http gateway thread) + // todo: fix logging YQL_CLOG(DEBUG, ProviderS3) << "Set promise with log message: " << logMsg; promise.SetValue(); } @@ -232,7 +235,7 @@ public: 0U, std::bind(&OnDiscovery, IHTTPGateway::TWeakPtr(Gateway_), ctx.GetPosition((*std::get<TNodeSet>(bucket.second).cbegin())->Pos()), std::placeholders::_1, - std::cref(bucket.first), std::ref(bucket.second), std::move(promise), pendingBucketsWPtr, i++, retryPolicy), + std::cref(bucket.first), std::ref(bucket.second), std::move(promise), pendingBucketsWPtr, i++, retryPolicy, State_->Configuration->MaxDiscoveryFilesPerQuery), /*data=*/"", retryPolicy ); diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp index 2707685a26..cde8d3834e 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp @@ -23,6 +23,7 @@ void TS3Configuration::Init(const TS3GatewayConfig& config, TIntrusivePtr<TTypeA { FileSizeLimit = config.HasFileSizeLimit() ? config.GetFileSizeLimit() : 2_GB; MaxFilesPerQuery = config.HasMaxFilesPerQuery() ? config.GetMaxFilesPerQuery() : 7000; + MaxDiscoveryFilesPerQuery = config.HasMaxDiscoveryFilesPerQuery() ? config.GetMaxDiscoveryFilesPerQuery() : 9000; MaxReadSizePerQuery = config.HasMaxReadSizePerQuery() ? config.GetMaxReadSizePerQuery() : 4_GB; TVector<TString> clusters(Reserve(config.ClusterMappingSize())); diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_settings.h b/ydb/library/yql/providers/s3/provider/yql_s3_settings.h index 1d3a8b985a..ebac3f89d1 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_settings.h +++ b/ydb/library/yql/providers/s3/provider/yql_s3_settings.h @@ -32,7 +32,8 @@ struct TS3Configuration : public TS3Settings, public NCommon::TSettingDispatcher std::unordered_map<TString, TS3ClusterSettings> Clusters; ui64 FileSizeLimit; - ui32 MaxFilesPerQuery; + ui64 MaxFilesPerQuery; + ui64 MaxDiscoveryFilesPerQuery; ui64 MaxReadSizePerQuery; }; |