summaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1
diff options
context:
space:
mode:
authorgrigoriypisar <[email protected]>2025-09-04 12:04:05 +0300
committergrigoriypisar <[email protected]>2025-09-04 12:26:12 +0300
commitc029afad9f05609faea295c8ba76996f9a07fbef (patch)
tree74bd8c20f7089ff6aa5a8a9535d58bd16c422450 /yql/essentials/sql/v1
parent89ffb9c4ebdd8a2eedfbf7a2f4778dfb6ef50161 (diff)
fixed parsing for BEGIN / END in streaming queries
Добавлена возможность отключения отложенного применения '\\n' и '\\r' в TextWalker, чтобы позиции генерируемые им были в точности равны позициям токенов от antlr4 лексера: <https://nda.ya.ru/t/hmKq_iWN7JVCGe> commit_hash:15049d23b9ac1232b9e1d281d86d6b51d5822f85
Diffstat (limited to 'yql/essentials/sql/v1')
-rw-r--r--yql/essentials/sql/v1/sql_translation.cpp42
-rw-r--r--yql/essentials/sql/v1/sql_translation.h2
-rw-r--r--yql/essentials/sql/v1/sql_ut.cpp1
-rw-r--r--yql/essentials/sql/v1/sql_ut_antlr4.cpp1
-rw-r--r--yql/essentials/sql/v1/sql_ut_common.h118
5 files changed, 135 insertions, 29 deletions
diff --git a/yql/essentials/sql/v1/sql_translation.cpp b/yql/essentials/sql/v1/sql_translation.cpp
index e3ff7464795..635146f77cf 100644
--- a/yql/essentials/sql/v1/sql_translation.cpp
+++ b/yql/essentials/sql/v1/sql_translation.cpp
@@ -5270,27 +5270,6 @@ bool TSqlTranslation::ParseViewQuery(
namespace {
-static std::string::size_type GetQueryPosition(const TString& query, const NSQLv1Generated::TToken& token, bool antlr4) {
- if (1 == token.GetLine() && 0 == token.GetColumn()) {
- return 0;
- }
-
- TPosition pos = {0, 1};
- TTextWalker walker(pos, antlr4);
-
- std::string::size_type position = 0;
- for (char c : query) {
- walker.Advance(c);
- ++position;
-
- if (pos.Row == token.GetLine() && pos.Column == token.GetColumn()) {
- return position;
- }
- }
-
- return std::string::npos;
-}
-
static TString GetLambdaText(TTranslation& ctx, TContext& Ctx, const TRule_lambda_or_parameter& lambdaOrParameter) {
static const TString statementSeparator = ";\n";
@@ -5343,6 +5322,27 @@ static TString GetLambdaText(TTranslation& ctx, TContext& Ctx, const TRule_lambd
}
}
+} // anonymous namespace
+
+std::string::size_type GetQueryPosition(const TString& query, const NSQLv1Generated::TToken& token, bool antlr4) {
+ if (1 == token.GetLine() && 0 == token.GetColumn()) {
+ return 0;
+ }
+
+ TPosition pos = {0, 1};
+ TTextWalker walker(pos, antlr4);
+
+ std::string::size_type position = 0;
+ for (char c : query) {
+ walker.Advance(c);
+ ++position;
+
+ if (pos.Row == token.GetLine() && pos.Column == token.GetColumn()) {
+ return position;
+ }
+ }
+
+ return std::string::npos;
}
bool TSqlTranslation::ParseTransferLambda(
diff --git a/yql/essentials/sql/v1/sql_translation.h b/yql/essentials/sql/v1/sql_translation.h
index c17a2d43a48..976846a2f3b 100644
--- a/yql/essentials/sql/v1/sql_translation.h
+++ b/yql/essentials/sql/v1/sql_translation.h
@@ -354,4 +354,6 @@ TVector<TPatternComponent<TChar>> SplitPattern(const TBasicString<TChar>& patter
bool ParseNumbers(TContext& ctx, const TString& strOrig, ui64& value, TString& suffix);
+std::string::size_type GetQueryPosition(const TString& query, const NSQLv1Generated::TToken& token, bool antlr4);
+
} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql_ut.cpp b/yql/essentials/sql/v1/sql_ut.cpp
index 66c9a159fbe..c29c49ee59a 100644
--- a/yql/essentials/sql/v1/sql_ut.cpp
+++ b/yql/essentials/sql/v1/sql_ut.cpp
@@ -1,4 +1,5 @@
#include "sql_ut.h"
+#include "sql_translation.h"
#include "format/sql_format.h"
#include "lexer/lexer.h"
diff --git a/yql/essentials/sql/v1/sql_ut_antlr4.cpp b/yql/essentials/sql/v1/sql_ut_antlr4.cpp
index 18480d048cd..bc29c949ddd 100644
--- a/yql/essentials/sql/v1/sql_ut_antlr4.cpp
+++ b/yql/essentials/sql/v1/sql_ut_antlr4.cpp
@@ -1,4 +1,5 @@
#include "sql_ut_antlr4.h"
+#include "sql_translation.h"
#include "format/sql_format.h"
#include "lexer/lexer.h"
diff --git a/yql/essentials/sql/v1/sql_ut_common.h b/yql/essentials/sql/v1/sql_ut_common.h
index d961a33ea6c..b6ae6ead2ec 100644
--- a/yql/essentials/sql/v1/sql_ut_common.h
+++ b/yql/essentials/sql/v1/sql_ut_common.h
@@ -9393,18 +9393,51 @@ USE hahn;
UNIT_ASSERT_VALUES_EQUAL(1, elementStat["__query_text"]);
}
+ Y_UNIT_TEST(CreateStreamingQueryCrlfCheck) {
+ NYql::TAstParseResult res = SqlToYql(TStringBuilder() << R"sql(
+USE plato;
+-- Some comment
+CREATE STREAMING QUERY MyQuery AS DO )sql" << "\r" << R"sql(BEGIN
+USE plato;
+$source = SELECT * FROM Input;
+INSERT INTO Output1 SELECT * FROM $source;
+INSERT INTO Output2 SELECT * FROM $source;
+END DO;
+USE hahn;
+-- Other comment
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "createObject") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('"__query_ast" (block '()#");
+ }
+
+ if (word == "__query_text") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('"__query_text" '"\nUSE plato;\n$source = SELECT * FROM Input;\nINSERT INTO Output1 SELECT * FROM $source;\nINSERT INTO Output2 SELECT * FROM $source;\n")))#");
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("createObject"), 0}, {TString("__query_text"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["createObject"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["__query_text"]);
+ }
+
Y_UNIT_TEST(CreateStreamingQueryWithSettings) {
- NYql::TAstParseResult res = SqlToYql(R"sql(
+ NYql::TAstParseResult res = SqlToYql(TStringBuilder() << R"sql(
USE plato;
-- Some comment
CREATE STREAMING QUERY MyQuery WITH (
RUN = TRUE,
RESOURCE_POOL = my_pool
-) AS DO BEGIN
+) AS DO )sql" << "\r" << R"sql(BEGIN
USE plato;
$source = SELECT * FROM Input;
INSERT INTO Output1 SELECT * FROM $source;
-INSERT INTO Output2 SELECT * FROM $source;END DO;
+INSERT INTO Output2 SELECT * FROM $source;
+END DO;
USE hahn;
-- Other comment
)sql");
@@ -9416,7 +9449,7 @@ USE hahn;
}
if (word == "__query_text") {
- UNIT_ASSERT_STRING_CONTAINS(line, R"#('('"__query_text" '"\nUSE plato;\n$source = SELECT * FROM Input;\nINSERT INTO Output1 SELECT * FROM $source;\nINSERT INTO Output2 SELECT * FROM $source;") '('"resource_pool" '"my_pool") '('"run" (Bool '"true")))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('"__query_text" '"\nUSE plato;\n$source = SELECT * FROM Input;\nINSERT INTO Output1 SELECT * FROM $source;\nINSERT INTO Output2 SELECT * FROM $source;\n") '('"resource_pool" '"my_pool") '('"run" (Bool '"true")))#");
}
};
@@ -9548,14 +9581,15 @@ USE hahn;
}
Y_UNIT_TEST(AlterStreamingQuerySetQuery) {
- NYql::TAstParseResult res = SqlToYql(R"sql(
+ NYql::TAstParseResult res = SqlToYql(TStringBuilder() << R"sql(
USE plato;
-- Some comment
-ALTER STREAMING QUERY MyQuery AS DO BEGIN
+ALTER STREAMING QUERY MyQuery AS DO )sql" << "\r" << R"sql(BEGIN
USE plato;
$source = SELECT * FROM Input;
INSERT INTO Output1 SELECT * FROM $source;
-INSERT INTO Output2 SELECT * FROM $source;END DO;
+INSERT INTO Output2 SELECT * FROM $source;
+END DO;
USE hahn;
-- Other comment
)sql");
@@ -9567,7 +9601,7 @@ USE hahn;
}
if (word == "__query_text") {
- UNIT_ASSERT_STRING_CONTAINS(line, R"#('('"__query_text" '"\nUSE plato;\n$source = SELECT * FROM Input;\nINSERT INTO Output1 SELECT * FROM $source;\nINSERT INTO Output2 SELECT * FROM $source;")))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('"__query_text" '"\nUSE plato;\n$source = SELECT * FROM Input;\nINSERT INTO Output1 SELECT * FROM $source;\nINSERT INTO Output2 SELECT * FROM $source;\n")))#");
}
};
@@ -9756,3 +9790,71 @@ USE hahn;
UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
}
}
+
+Y_UNIT_TEST_SUITE(TestGetQueryPosition) {
+ Y_UNIT_TEST(TestTokenFinding) {
+ const TString query = TStringBuilder() << R"(
+)" << "\r" << R"(BEGIN)" << "\r\n" << R"(
+ )" << "\n\r" << R"(END
+$b = ()" << "\r\r" << R"($x) -> {
+
+)" << "\n" << R"(
+-- comment A
+return /*Комментарий*/ $x;
+-- Comment B
+};
+)";
+
+ NSQLTranslationV1::TLexers lexers;
+#if ANTLR_VER == 3
+ bool antlr4 = false;
+ lexers.Antlr3 = NSQLTranslationV1::MakeAntlr3LexerFactory();
+#else
+ bool antlr4 = true;
+ lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory();
+#endif
+
+ ui64 lexerPosition = 0;
+ const auto onNextToken = [&](NSQLTranslation::TParsedToken&& token) {
+ NSQLv1Generated::TToken tokenProto;
+ tokenProto.SetLine(token.Line);
+ tokenProto.SetColumn(token.LinePos);
+ UNIT_ASSERT_VALUES_EQUAL_C(lexerPosition, NSQLTranslationV1::GetQueryPosition(query, tokenProto, antlr4), token.Line << ":" << token.LinePos << ":'" << token.Content << "'");
+
+ lexerPosition += token.Content.size();
+ };
+
+ const auto lexer = NSQLTranslationV1::MakeLexer(lexers, false, antlr4);
+
+ NYql::TIssues issues;
+ const bool result = lexer->Tokenize(query, {}, onNextToken, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS);
+ UNIT_ASSERT_C(result, issues.ToOneLineString());
+ }
+
+ Y_UNIT_TEST(TestTokenMissing) {
+ const TString query = "BEGIN /*Комментарий*/ \nEND";
+ NSQLv1Generated::TToken tokenProto;
+
+#if ANTLR_VER == 3
+ bool antlr4 = false;
+#else
+ bool antlr4 = true;
+#endif
+
+ tokenProto.SetLine(3);
+ tokenProto.SetColumn(0);
+ UNIT_ASSERT_VALUES_EQUAL(std::string::npos, NSQLTranslationV1::GetQueryPosition(query, tokenProto, antlr4));
+
+ tokenProto.SetLine(2);
+ tokenProto.SetColumn(4);
+ UNIT_ASSERT_VALUES_EQUAL(std::string::npos, NSQLTranslationV1::GetQueryPosition(query, tokenProto, antlr4));
+
+ tokenProto.SetLine(1);
+ tokenProto.SetColumn(34);
+ UNIT_ASSERT_VALUES_EQUAL(std::string::npos, NSQLTranslationV1::GetQueryPosition(query, tokenProto, antlr4));
+
+ tokenProto.SetLine(1);
+ tokenProto.SetColumn(0);
+ UNIT_ASSERT_VALUES_EQUAL(0, NSQLTranslationV1::GetQueryPosition(query, tokenProto, antlr4));
+ }
+}