aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSergey Uzhakov <uzhastik@gmail.com>2022-06-09 11:51:22 +0300
committerSergey Uzhakov <uzhastik@gmail.com>2022-06-09 11:51:22 +0300
commitf34e230eb02f287a6a731705de7e6112c5f35db8 (patch)
treef6e55c2f612c96f316e765887848912be03cf17b
parent71f1d3833dc656f57b03171d691f249134df7c85 (diff)
downloadydb-f34e230eb02f287a6a731705de7e6112c5f35db8.tar.gz
YQ-1139: s3: replace 9K file count limit with cfg parameter
ref:ae5343cf60357a2b986af02653e16469a6a7660d
-rw-r--r--ydb/library/yql/providers/common/proto/gateways_config.proto3
-rw-r--r--ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp13
-rw-r--r--ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp1
-rw-r--r--ydb/library/yql/providers/s3/provider/yql_s3_settings.h3
4 files changed, 13 insertions, 7 deletions
diff --git a/ydb/library/yql/providers/common/proto/gateways_config.proto b/ydb/library/yql/providers/common/proto/gateways_config.proto
index 2910c9c248..e9141644b1 100644
--- a/ydb/library/yql/providers/common/proto/gateways_config.proto
+++ b/ydb/library/yql/providers/common/proto/gateways_config.proto
@@ -328,8 +328,9 @@ message TS3GatewayConfig {
repeated TS3ClusterConfig ClusterMapping = 1;
optional uint64 FileSizeLimit = 2;
- optional uint32 MaxFilesPerQuery = 3;
+ optional uint64 MaxFilesPerQuery = 3;
optional uint64 MaxReadSizePerQuery = 4;
+ optional uint64 MaxDiscoveryFilesPerQuery = 5;
repeated TAttr DefaultSettings = 100;
}
diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp
index 49df936a91..91079b22d2 100644
--- a/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp
+++ b/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp
@@ -45,7 +45,8 @@ void OnDiscovery(
NThreading::TPromise<void> promise,
std::weak_ptr<TPendingBuckets> pendingBucketsWPtr,
int promiseInd,
- const IRetryPolicy<long>::TPtr& retryPolicy) {
+ const IRetryPolicy<long>::TPtr& retryPolicy,
+ ui64 maxDiscoveryFilesPerQuery) {
auto pendingBuckets = pendingBucketsWPtr.lock(); // keys and output could be used only when TPendingBuckets is alive
if (!pendingBuckets) {
return;
@@ -67,8 +68,8 @@ void OnDiscovery(
root.Node("s3:KeyCount", false, nss).Value<unsigned>() > 0U) {
const auto& contents = root.XPath("s3:Contents", false, nss);
auto& items = std::get<TItemsMap>(output);
- if (items.size() + contents.size() > 9000ULL) {
- std::get<TIssues>(output) = { TIssue(pos, TStringBuilder() << "It's over nine thousand items under '" << std::get<0U>(keys) << std::get<1U>(keys) << "'!")};
+ if (maxDiscoveryFilesPerQuery && items.size() + contents.size() > maxDiscoveryFilesPerQuery) {
+ std::get<TIssues>(output) = { TIssue(pos, TStringBuilder() << "Over " << maxDiscoveryFilesPerQuery << " files discovered in '" << std::get<0U>(keys) << std::get<1U>(keys) << "'")};
break;
}
@@ -97,7 +98,7 @@ void OnDiscovery(
url,
std::move(headers),
0U,
- std::bind(&OnDiscovery, gateway, pos, std::placeholders::_1, std::cref(keys), std::ref(output), std::move(promise), pendingBucketsWPtr, promiseInd, retryPolicy),
+ std::bind(&OnDiscovery, gateway, pos, std::placeholders::_1, std::cref(keys), std::ref(output), std::move(promise), pendingBucketsWPtr, promiseInd, retryPolicy, maxDiscoveryFilesPerQuery),
/*data=*/"",
retryPolicy);
}
@@ -121,6 +122,8 @@ void OnDiscovery(
break;
}
+ // this logging does not work at the moment since we are trying to do it in non-pipeline thread (http gateway thread)
+ // todo: fix logging
YQL_CLOG(DEBUG, ProviderS3) << "Set promise with log message: " << logMsg;
promise.SetValue();
}
@@ -232,7 +235,7 @@ public:
0U,
std::bind(&OnDiscovery,
IHTTPGateway::TWeakPtr(Gateway_), ctx.GetPosition((*std::get<TNodeSet>(bucket.second).cbegin())->Pos()), std::placeholders::_1,
- std::cref(bucket.first), std::ref(bucket.second), std::move(promise), pendingBucketsWPtr, i++, retryPolicy),
+ std::cref(bucket.first), std::ref(bucket.second), std::move(promise), pendingBucketsWPtr, i++, retryPolicy, State_->Configuration->MaxDiscoveryFilesPerQuery),
/*data=*/"",
retryPolicy
);
diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp
index 2707685a26..cde8d3834e 100644
--- a/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp
+++ b/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp
@@ -23,6 +23,7 @@ void TS3Configuration::Init(const TS3GatewayConfig& config, TIntrusivePtr<TTypeA
{
FileSizeLimit = config.HasFileSizeLimit() ? config.GetFileSizeLimit() : 2_GB;
MaxFilesPerQuery = config.HasMaxFilesPerQuery() ? config.GetMaxFilesPerQuery() : 7000;
+ MaxDiscoveryFilesPerQuery = config.HasMaxDiscoveryFilesPerQuery() ? config.GetMaxDiscoveryFilesPerQuery() : 9000;
MaxReadSizePerQuery = config.HasMaxReadSizePerQuery() ? config.GetMaxReadSizePerQuery() : 4_GB;
TVector<TString> clusters(Reserve(config.ClusterMappingSize()));
diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_settings.h b/ydb/library/yql/providers/s3/provider/yql_s3_settings.h
index 1d3a8b985a..ebac3f89d1 100644
--- a/ydb/library/yql/providers/s3/provider/yql_s3_settings.h
+++ b/ydb/library/yql/providers/s3/provider/yql_s3_settings.h
@@ -32,7 +32,8 @@ struct TS3Configuration : public TS3Settings, public NCommon::TSettingDispatcher
std::unordered_map<TString, TS3ClusterSettings> Clusters;
ui64 FileSizeLimit;
- ui32 MaxFilesPerQuery;
+ ui64 MaxFilesPerQuery;
+ ui64 MaxDiscoveryFilesPerQuery;
ui64 MaxReadSizePerQuery;
};