diff options
author | hor911 <hor911@ydb.tech> | 2022-07-14 00:12:17 +0300 |
---|---|---|
committer | hor911 <hor911@ydb.tech> | 2022-07-14 00:12:17 +0300 |
commit | 097554f7aafcd63fd8ecd8581df8abe303f4941e (patch) | |
tree | 3a27e969228c3d339fdfa8a29052adfd7e5b15e5 | |
parent | a1a06afc6c893a7d5ec3c23320887fc2cabd324f (diff) | |
download | ydb-097554f7aafcd63fd8ecd8581df8abe303f4941e.tar.gz |
Per format S3 file limits
4 files changed, 37 insertions, 1 deletions
diff --git a/ydb/library/yql/providers/common/proto/gateways_config.proto b/ydb/library/yql/providers/common/proto/gateways_config.proto index a00d12b826..95c4a17cc1 100644 --- a/ydb/library/yql/providers/common/proto/gateways_config.proto +++ b/ydb/library/yql/providers/common/proto/gateways_config.proto @@ -337,10 +337,16 @@ message TS3ClusterConfig { repeated TAttr Settings = 100; } +message TS3FormatSizeLimit { + optional string Name = 1; // Format name i.e. csv_with_names + optional uint64 FileSizeLimit = 2; // Max allowed size for this format +} + message TS3GatewayConfig { repeated TS3ClusterConfig ClusterMapping = 1; - optional uint64 FileSizeLimit = 2; + optional uint64 FileSizeLimit = 2; // Global limit + repeated TS3FormatSizeLimit FormatSizeLimit = 6; // Format limits optional uint64 MaxFilesPerQuery = 3; optional uint64 MaxReadSizePerQuery = 4; optional uint64 MaxDiscoveryFilesPerQuery = 5; diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp index 5822ec42a4..2abc4c4b16 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_io_discovery.cpp @@ -110,6 +110,23 @@ public: YQL_ENSURE(object.IsCallable("MrTableConcat")); size_t readSize = 0; TExprNode::TListType pathNodes; + + TString formatName; + { + const auto& settings = *read.Ref().Child(4); + auto format = GetSetting(settings, "format"); + if (format && format->ChildrenSize() >= 2) { + formatName = format->Child(1)->Content(); + } + } + auto fileSizeLimit = State_->Configuration->FileSizeLimit; + if (formatName) { + auto it = State_->Configuration->FormatSizeLimits.find(formatName); + if (it != State_->Configuration->FormatSizeLimits.end() && fileSizeLimit > it->second) { + fileSizeLimit = it->second; + } + } + for (auto& req : requests) { auto it = pendingRequests.find(req); YQL_ENSURE(it != pendingRequests.end()); @@ -134,6 +151,13 @@ public: return TStatus::Error; } for (auto& entry : listEntries) { + + if (entry.Size > fileSizeLimit) { + ctx.AddError(TIssue(ctx.GetPosition(object.Pos()), + TStringBuilder() << "Size of object " << entry.Path << " = " << entry.Size << " and exceeds limit = " << fileSizeLimit << " specified for format " << formatName)); + return TStatus::Error; + } + TExprNodeList extraColumnsAsStructArgs; if (auto confIt = GenColumnsByNode_.find(node); confIt != GenColumnsByNode_.end()) { const TGeneratedColumnsConfig& config = confIt->second; diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp index cde8d3834e..d7b4b0bfa0 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_settings.cpp @@ -21,6 +21,11 @@ bool TS3Configuration::HasCluster(TStringBuf cluster) const { void TS3Configuration::Init(const TS3GatewayConfig& config, TIntrusivePtr<TTypeAnnotationContext> typeCtx) { + for (auto& formatSizeLimit: config.GetFormatSizeLimit()) { + if (formatSizeLimit.GetName()) { // ignore unnamed limits + FormatSizeLimits.emplace(formatSizeLimit.GetName(), formatSizeLimit.GetFileSizeLimit()); + } + } FileSizeLimit = config.HasFileSizeLimit() ? config.GetFileSizeLimit() : 2_GB; MaxFilesPerQuery = config.HasMaxFilesPerQuery() ? config.GetMaxFilesPerQuery() : 7000; MaxDiscoveryFilesPerQuery = config.HasMaxDiscoveryFilesPerQuery() ? config.GetMaxDiscoveryFilesPerQuery() : 9000; diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_settings.h b/ydb/library/yql/providers/s3/provider/yql_s3_settings.h index ebac3f89d1..3c800540e6 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_settings.h +++ b/ydb/library/yql/providers/s3/provider/yql_s3_settings.h @@ -32,6 +32,7 @@ struct TS3Configuration : public TS3Settings, public NCommon::TSettingDispatcher std::unordered_map<TString, TS3ClusterSettings> Clusters; ui64 FileSizeLimit; + std::unordered_map<TString, ui64> FormatSizeLimits; ui64 MaxFilesPerQuery; ui64 MaxDiscoveryFilesPerQuery; ui64 MaxReadSizePerQuery; |