aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorhor911 <hor911@ydb.tech>2022-07-14 00:12:17 +0300
committerhor911 <hor911@ydb.tech>2022-07-14 00:12:17 +0300
commit097554f7aafcd63fd8ecd8581df8abe303f4941e (patch)
tree3a27e969228c3d339fdfa8a29052adfd7e5b15e5
parenta1a06afc6c893a7d5ec3c23320887fc2cabd324f (diff)
downloadydb-097554f7aafcd63fd8ecd8581df8abe303f4941e.tar.gz
Per format S3 file limits
-rw-r--r--ydb/library/yql/providers/common/proto/gateways_config.proto8
-rw-r--r--ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp24
-rw-r--r--ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp5
-rw-r--r--ydb/library/yql/providers/s3/provider/yql_s3_settings.h1
4 files changed, 37 insertions, 1 deletions
diff --git a/ydb/library/yql/providers/common/proto/gateways_config.proto b/ydb/library/yql/providers/common/proto/gateways_config.proto
index a00d12b826..95c4a17cc1 100644
--- a/ydb/library/yql/providers/common/proto/gateways_config.proto
+++ b/ydb/library/yql/providers/common/proto/gateways_config.proto
@@ -337,10 +337,16 @@ message TS3ClusterConfig {
repeated TAttr Settings = 100;
}
+message TS3FormatSizeLimit {
+ optional string Name = 1; // Format name i.e. csv_with_names
+ optional uint64 FileSizeLimit = 2; // Max allowed size for this format
+}
+
message TS3GatewayConfig {
repeated TS3ClusterConfig ClusterMapping = 1;
- optional uint64 FileSizeLimit = 2;
+ optional uint64 FileSizeLimit = 2; // Global limit
+ repeated TS3FormatSizeLimit FormatSizeLimit = 6; // Format limits
optional uint64 MaxFilesPerQuery = 3;
optional uint64 MaxReadSizePerQuery = 4;
optional uint64 MaxDiscoveryFilesPerQuery = 5;
diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp
index 5822ec42a4..2abc4c4b16 100644
--- a/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp
+++ b/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp
@@ -110,6 +110,23 @@ public:
YQL_ENSURE(object.IsCallable("MrTableConcat"));
size_t readSize = 0;
TExprNode::TListType pathNodes;
+
+ TString formatName;
+ {
+ const auto& settings = *read.Ref().Child(4);
+ auto format = GetSetting(settings, "format");
+ if (format && format->ChildrenSize() >= 2) {
+ formatName = format->Child(1)->Content();
+ }
+ }
+ auto fileSizeLimit = State_->Configuration->FileSizeLimit;
+ if (formatName) {
+ auto it = State_->Configuration->FormatSizeLimits.find(formatName);
+ if (it != State_->Configuration->FormatSizeLimits.end() && fileSizeLimit > it->second) {
+ fileSizeLimit = it->second;
+ }
+ }
+
for (auto& req : requests) {
auto it = pendingRequests.find(req);
YQL_ENSURE(it != pendingRequests.end());
@@ -134,6 +151,13 @@ public:
return TStatus::Error;
}
for (auto& entry : listEntries) {
+
+ if (entry.Size > fileSizeLimit) {
+ ctx.AddError(TIssue(ctx.GetPosition(object.Pos()),
+ TStringBuilder() << "Size of object " << entry.Path << " = " << entry.Size << " and exceeds limit = " << fileSizeLimit << " specified for format " << formatName));
+ return TStatus::Error;
+ }
+
TExprNodeList extraColumnsAsStructArgs;
if (auto confIt = GenColumnsByNode_.find(node); confIt != GenColumnsByNode_.end()) {
const TGeneratedColumnsConfig& config = confIt->second;
diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp
index cde8d3834e..d7b4b0bfa0 100644
--- a/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp
+++ b/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp
@@ -21,6 +21,11 @@ bool TS3Configuration::HasCluster(TStringBuf cluster) const {
void TS3Configuration::Init(const TS3GatewayConfig& config, TIntrusivePtr<TTypeAnnotationContext> typeCtx)
{
+ for (auto& formatSizeLimit: config.GetFormatSizeLimit()) {
+ if (formatSizeLimit.GetName()) { // ignore unnamed limits
+ FormatSizeLimits.emplace(formatSizeLimit.GetName(), formatSizeLimit.GetFileSizeLimit());
+ }
+ }
FileSizeLimit = config.HasFileSizeLimit() ? config.GetFileSizeLimit() : 2_GB;
MaxFilesPerQuery = config.HasMaxFilesPerQuery() ? config.GetMaxFilesPerQuery() : 7000;
MaxDiscoveryFilesPerQuery = config.HasMaxDiscoveryFilesPerQuery() ? config.GetMaxDiscoveryFilesPerQuery() : 9000;
diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_settings.h b/ydb/library/yql/providers/s3/provider/yql_s3_settings.h
index ebac3f89d1..3c800540e6 100644
--- a/ydb/library/yql/providers/s3/provider/yql_s3_settings.h
+++ b/ydb/library/yql/providers/s3/provider/yql_s3_settings.h
@@ -32,6 +32,7 @@ struct TS3Configuration : public TS3Settings, public NCommon::TSettingDispatcher
std::unordered_map<TString, TS3ClusterSettings> Clusters;
ui64 FileSizeLimit;
+ std::unordered_map<TString, ui64> FormatSizeLimits;
ui64 MaxFilesPerQuery;
ui64 MaxDiscoveryFilesPerQuery;
ui64 MaxReadSizePerQuery;