summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordinmukhammed <[email protected]>2022-03-04 11:13:51 +0300
committerdinmukhammed <[email protected]>2022-03-04 11:13:51 +0300
commitb45021d3597dc78ff3b6939d65e4365634b88f78 (patch)
tree740aa383ab92a65245b35a68347cae4db31c8025
parentaa7e838c4b7bbdd8d9a1c82046e7081ebd1cbe2e (diff)
YQ-937 Validating type formats
Validate formats ref:a48dba1a222639cbf03699ec343fb021a1ee94df
-rw-r--r--ydb/library/yql/providers/common/provider/yql_provider.cpp41
-rw-r--r--ydb/library/yql/providers/common/provider/yql_provider.h2
-rw-r--r--ydb/library/yql/providers/pq/provider/yql_pq_datasource_type_ann.cpp4
-rw-r--r--ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp23
4 files changed, 55 insertions, 15 deletions
diff --git a/ydb/library/yql/providers/common/provider/yql_provider.cpp b/ydb/library/yql/providers/common/provider/yql_provider.cpp
index 4d2ee60c379..cca0d60ed3a 100644
--- a/ydb/library/yql/providers/common/provider/yql_provider.cpp
+++ b/ydb/library/yql/providers/common/provider/yql_provider.cpp
@@ -13,6 +13,7 @@
#include <util/folder/path.h>
#include <util/generic/is_in.h>
#include <util/generic/utility.h>
+#include <util/string/join.h>
namespace NYql {
@@ -20,6 +21,25 @@ namespace NCommon {
using namespace NNodes;
+namespace {
+ std::array<std::string_view, 6> Formats = {
+ "csv_with_names"sv,
+ "tsv_with_names"sv,
+ "json_list"sv,
+ "json"sv,
+ "raw"sv,
+ "json_each_row"sv
+ };
+ std::array<std::string_view, 6> Compressions = {
+ "gzip"sv,
+ "zstd"sv,
+ "lz4"sv,
+ "brotli"sv,
+ "bzip2"sv,
+ "xz"sv
+ };
+} // namespace
+
bool TCommitSettings::EnsureModeEmpty(TExprContext& ctx) {
if (Mode) {
ctx.AddError(TIssue(ctx.GetPosition(Pos), TStringBuilder()
@@ -1050,23 +1070,22 @@ void WriteStatistics(NYson::TYsonWriter& writer, bool totalOnly, const THashMap<
}
bool ValidateCompression(TStringBuf compression, TExprContext& ctx) {
- if (compression.empty() ||
- IsIn({
- "gzip"sv,
- "zstd"sv,
- "lz4"sv,
- "brotli"sv,
- "bzip2"sv,
- "xz"sv
- }, compression))
- {
+ if (compression.empty() || IsIn(Compressions, compression)) {
return true;
}
ctx.AddError(TIssue(TStringBuilder() << "Unknown compression: " << compression
- << ". Use one of: gzip, zstd, lz4, brotli, bzip2, xz"));
+ << ". Use one of: " << JoinSeq(", ", Compressions)));
return false;
}
+bool ValidateFormat(TStringBuf format, TExprContext& ctx) {
+ if (format.empty() || IsIn(Formats, format)) {
+ return true;
+ }
+ ctx.AddError(TIssue(TStringBuilder() << "Unknown format: " << format
+ << ". Use one of: " << JoinSeq(", ", Formats)));
+ return false;
+}
} // namespace NCommon
} // namespace NYql
diff --git a/ydb/library/yql/providers/common/provider/yql_provider.h b/ydb/library/yql/providers/common/provider/yql_provider.h
index 7163fe247ca..a09f759a766 100644
--- a/ydb/library/yql/providers/common/provider/yql_provider.h
+++ b/ydb/library/yql/providers/common/provider/yql_provider.h
@@ -122,5 +122,7 @@ void WriteStatistics(NYson::TYsonWriter& writer, bool totalOnly, const THashMap<
bool ValidateCompression(TStringBuf compression, TExprContext& ctx);
+bool ValidateFormat(TStringBuf format, TExprContext& ctx);
+
} // namespace NCommon
} // namespace NYql
diff --git a/ydb/library/yql/providers/pq/provider/yql_pq_datasource_type_ann.cpp b/ydb/library/yql/providers/pq/provider/yql_pq_datasource_type_ann.cpp
index cc0c8f42420..e2d26fd84cc 100644
--- a/ydb/library/yql/providers/pq/provider/yql_pq_datasource_type_ann.cpp
+++ b/ydb/library/yql/providers/pq/provider/yql_pq_datasource_type_ann.cpp
@@ -85,6 +85,10 @@ public:
return TStatus::Error;
}
+ if (!NCommon::ValidateFormat(read.Format().Ref().Content(), ctx)) {
+ return TStatus::Error;
+ }
+
if (!NCommon::ValidateCompression(read.Compression().Ref().Content(), ctx)) {
return TStatus::Error;
}
diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp
index f0597bad330..b102c7ab046 100644
--- a/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp
+++ b/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp
@@ -15,14 +15,21 @@ using namespace NNodes;
namespace {
-TStringBuf GetCompression(const TExprNode& settings) {
+std::pair<TStringBuf, TStringBuf> GetFormatAndCompression(const TExprNode& settings) {
+ TStringBuf compression;
+ TStringBuf format;
for (auto i = 0U; i < settings.ChildrenSize(); ++i) {
const auto& child = *settings.Child(i);
if (child.Head().IsAtom("compression") && child.Tail().IsCallable({"String", "Utf8"}))
if (const auto& comp = child.Tail().Head().Content(); !comp.empty())
- return comp;
+ compression = comp;
+ if (child.Head().IsAtom("format"))
+ if (const auto& form = child.Tail().Head().Content(); !form.empty())
+ format = form;
+ if (compression && format)
+ break;
}
- return "";
+ return std::make_pair(format, compression);
}
class TS3DataSourceTypeAnnotationTransformer : public TVisitorTransformerBase {
@@ -132,7 +139,15 @@ public:
return TStatus::Error;
}
- if (const auto& compression = GetCompression(*input->Child(TS3Object::idx_Settings)); !NCommon::ValidateCompression(compression, ctx)) {
+ const auto formatAndCompression = GetFormatAndCompression(*input->Child(TS3Object::idx_Settings)); // used for win32 build
+ const auto& format = formatAndCompression.first;
+ const auto& compression = formatAndCompression.second;
+
+ if (!NCommon::ValidateFormat(format, ctx)) {
+ return TStatus::Error;
+ }
+
+ if (!NCommon::ValidateCompression(compression, ctx)) {
return TStatus::Error;
}