diff options
| author | grigoriypisar <[email protected]> | 2025-09-04 12:04:05 +0300 |
|---|---|---|
| committer | grigoriypisar <[email protected]> | 2025-09-04 12:26:12 +0300 |
| commit | c029afad9f05609faea295c8ba76996f9a07fbef (patch) | |
| tree | 74bd8c20f7089ff6aa5a8a9535d58bd16c422450 /yql/essentials/sql/v1 | |
| parent | 89ffb9c4ebdd8a2eedfbf7a2f4778dfb6ef50161 (diff) | |
fixed parsing for BEGIN / END in streaming queries
Добавлена возможность отключения отложенного применения '\\n' и '\\r' в TextWalker, чтобы позиции генерируемые им были в точности равны позициям токенов от antlr4 лексера:
<https://nda.ya.ru/t/hmKq_iWN7JVCGe>
commit_hash:15049d23b9ac1232b9e1d281d86d6b51d5822f85
Diffstat (limited to 'yql/essentials/sql/v1')
| -rw-r--r-- | yql/essentials/sql/v1/sql_translation.cpp | 42 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/sql_translation.h | 2 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/sql_ut.cpp | 1 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/sql_ut_antlr4.cpp | 1 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/sql_ut_common.h | 118 |
5 files changed, 135 insertions, 29 deletions
diff --git a/yql/essentials/sql/v1/sql_translation.cpp b/yql/essentials/sql/v1/sql_translation.cpp index e3ff7464795..635146f77cf 100644 --- a/yql/essentials/sql/v1/sql_translation.cpp +++ b/yql/essentials/sql/v1/sql_translation.cpp @@ -5270,27 +5270,6 @@ bool TSqlTranslation::ParseViewQuery( namespace { -static std::string::size_type GetQueryPosition(const TString& query, const NSQLv1Generated::TToken& token, bool antlr4) { - if (1 == token.GetLine() && 0 == token.GetColumn()) { - return 0; - } - - TPosition pos = {0, 1}; - TTextWalker walker(pos, antlr4); - - std::string::size_type position = 0; - for (char c : query) { - walker.Advance(c); - ++position; - - if (pos.Row == token.GetLine() && pos.Column == token.GetColumn()) { - return position; - } - } - - return std::string::npos; -} - static TString GetLambdaText(TTranslation& ctx, TContext& Ctx, const TRule_lambda_or_parameter& lambdaOrParameter) { static const TString statementSeparator = ";\n"; @@ -5343,6 +5322,27 @@ static TString GetLambdaText(TTranslation& ctx, TContext& Ctx, const TRule_lambd } } +} // anonymous namespace + +std::string::size_type GetQueryPosition(const TString& query, const NSQLv1Generated::TToken& token, bool antlr4) { + if (1 == token.GetLine() && 0 == token.GetColumn()) { + return 0; + } + + TPosition pos = {0, 1}; + TTextWalker walker(pos, antlr4); + + std::string::size_type position = 0; + for (char c : query) { + walker.Advance(c); + ++position; + + if (pos.Row == token.GetLine() && pos.Column == token.GetColumn()) { + return position; + } + } + + return std::string::npos; } bool TSqlTranslation::ParseTransferLambda( diff --git a/yql/essentials/sql/v1/sql_translation.h b/yql/essentials/sql/v1/sql_translation.h index c17a2d43a48..976846a2f3b 100644 --- a/yql/essentials/sql/v1/sql_translation.h +++ b/yql/essentials/sql/v1/sql_translation.h @@ -354,4 +354,6 @@ TVector<TPatternComponent<TChar>> SplitPattern(const TBasicString<TChar>& patter bool ParseNumbers(TContext& ctx, const TString& strOrig, ui64& value, TString& suffix); +std::string::size_type GetQueryPosition(const TString& query, const NSQLv1Generated::TToken& token, bool antlr4); + } // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql_ut.cpp b/yql/essentials/sql/v1/sql_ut.cpp index 66c9a159fbe..c29c49ee59a 100644 --- a/yql/essentials/sql/v1/sql_ut.cpp +++ b/yql/essentials/sql/v1/sql_ut.cpp @@ -1,4 +1,5 @@ #include "sql_ut.h" +#include "sql_translation.h" #include "format/sql_format.h" #include "lexer/lexer.h" diff --git a/yql/essentials/sql/v1/sql_ut_antlr4.cpp b/yql/essentials/sql/v1/sql_ut_antlr4.cpp index 18480d048cd..bc29c949ddd 100644 --- a/yql/essentials/sql/v1/sql_ut_antlr4.cpp +++ b/yql/essentials/sql/v1/sql_ut_antlr4.cpp @@ -1,4 +1,5 @@ #include "sql_ut_antlr4.h" +#include "sql_translation.h" #include "format/sql_format.h" #include "lexer/lexer.h" diff --git a/yql/essentials/sql/v1/sql_ut_common.h b/yql/essentials/sql/v1/sql_ut_common.h index d961a33ea6c..b6ae6ead2ec 100644 --- a/yql/essentials/sql/v1/sql_ut_common.h +++ b/yql/essentials/sql/v1/sql_ut_common.h @@ -9393,18 +9393,51 @@ USE hahn; UNIT_ASSERT_VALUES_EQUAL(1, elementStat["__query_text"]); } + Y_UNIT_TEST(CreateStreamingQueryCrlfCheck) { + NYql::TAstParseResult res = SqlToYql(TStringBuilder() << R"sql( +USE plato; +-- Some comment +CREATE STREAMING QUERY MyQuery AS DO )sql" << "\r" << R"sql(BEGIN +USE plato; +$source = SELECT * FROM Input; +INSERT INTO Output1 SELECT * FROM $source; +INSERT INTO Output2 SELECT * FROM $source; +END DO; +USE hahn; +-- Other comment + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "createObject") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('"__query_ast" (block '()#"); + } + + if (word == "__query_text") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('"__query_text" '"\nUSE plato;\n$source = SELECT * FROM Input;\nINSERT INTO Output1 SELECT * FROM $source;\nINSERT INTO Output2 SELECT * FROM $source;\n")))#"); + } + }; + + TWordCountHive elementStat = { {TString("createObject"), 0}, {TString("__query_text"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["createObject"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["__query_text"]); + } + Y_UNIT_TEST(CreateStreamingQueryWithSettings) { - NYql::TAstParseResult res = SqlToYql(R"sql( + NYql::TAstParseResult res = SqlToYql(TStringBuilder() << R"sql( USE plato; -- Some comment CREATE STREAMING QUERY MyQuery WITH ( RUN = TRUE, RESOURCE_POOL = my_pool -) AS DO BEGIN +) AS DO )sql" << "\r" << R"sql(BEGIN USE plato; $source = SELECT * FROM Input; INSERT INTO Output1 SELECT * FROM $source; -INSERT INTO Output2 SELECT * FROM $source;END DO; +INSERT INTO Output2 SELECT * FROM $source; +END DO; USE hahn; -- Other comment )sql"); @@ -9416,7 +9449,7 @@ USE hahn; } if (word == "__query_text") { - UNIT_ASSERT_STRING_CONTAINS(line, R"#('('"__query_text" '"\nUSE plato;\n$source = SELECT * FROM Input;\nINSERT INTO Output1 SELECT * FROM $source;\nINSERT INTO Output2 SELECT * FROM $source;") '('"resource_pool" '"my_pool") '('"run" (Bool '"true")))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('"__query_text" '"\nUSE plato;\n$source = SELECT * FROM Input;\nINSERT INTO Output1 SELECT * FROM $source;\nINSERT INTO Output2 SELECT * FROM $source;\n") '('"resource_pool" '"my_pool") '('"run" (Bool '"true")))#"); } }; @@ -9548,14 +9581,15 @@ USE hahn; } Y_UNIT_TEST(AlterStreamingQuerySetQuery) { - NYql::TAstParseResult res = SqlToYql(R"sql( + NYql::TAstParseResult res = SqlToYql(TStringBuilder() << R"sql( USE plato; -- Some comment -ALTER STREAMING QUERY MyQuery AS DO BEGIN +ALTER STREAMING QUERY MyQuery AS DO )sql" << "\r" << R"sql(BEGIN USE plato; $source = SELECT * FROM Input; INSERT INTO Output1 SELECT * FROM $source; -INSERT INTO Output2 SELECT * FROM $source;END DO; +INSERT INTO Output2 SELECT * FROM $source; +END DO; USE hahn; -- Other comment )sql"); @@ -9567,7 +9601,7 @@ USE hahn; } if (word == "__query_text") { - UNIT_ASSERT_STRING_CONTAINS(line, R"#('('"__query_text" '"\nUSE plato;\n$source = SELECT * FROM Input;\nINSERT INTO Output1 SELECT * FROM $source;\nINSERT INTO Output2 SELECT * FROM $source;")))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('"__query_text" '"\nUSE plato;\n$source = SELECT * FROM Input;\nINSERT INTO Output1 SELECT * FROM $source;\nINSERT INTO Output2 SELECT * FROM $source;\n")))#"); } }; @@ -9756,3 +9790,71 @@ USE hahn; UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); } } + +Y_UNIT_TEST_SUITE(TestGetQueryPosition) { + Y_UNIT_TEST(TestTokenFinding) { + const TString query = TStringBuilder() << R"( +)" << "\r" << R"(BEGIN)" << "\r\n" << R"( + )" << "\n\r" << R"(END +$b = ()" << "\r\r" << R"($x) -> { + +)" << "\n" << R"( +-- comment A +return /*Комментарий*/ $x; +-- Comment B +}; +)"; + + NSQLTranslationV1::TLexers lexers; +#if ANTLR_VER == 3 + bool antlr4 = false; + lexers.Antlr3 = NSQLTranslationV1::MakeAntlr3LexerFactory(); +#else + bool antlr4 = true; + lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory(); +#endif + + ui64 lexerPosition = 0; + const auto onNextToken = [&](NSQLTranslation::TParsedToken&& token) { + NSQLv1Generated::TToken tokenProto; + tokenProto.SetLine(token.Line); + tokenProto.SetColumn(token.LinePos); + UNIT_ASSERT_VALUES_EQUAL_C(lexerPosition, NSQLTranslationV1::GetQueryPosition(query, tokenProto, antlr4), token.Line << ":" << token.LinePos << ":'" << token.Content << "'"); + + lexerPosition += token.Content.size(); + }; + + const auto lexer = NSQLTranslationV1::MakeLexer(lexers, false, antlr4); + + NYql::TIssues issues; + const bool result = lexer->Tokenize(query, {}, onNextToken, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS); + UNIT_ASSERT_C(result, issues.ToOneLineString()); + } + + Y_UNIT_TEST(TestTokenMissing) { + const TString query = "BEGIN /*Комментарий*/ \nEND"; + NSQLv1Generated::TToken tokenProto; + +#if ANTLR_VER == 3 + bool antlr4 = false; +#else + bool antlr4 = true; +#endif + + tokenProto.SetLine(3); + tokenProto.SetColumn(0); + UNIT_ASSERT_VALUES_EQUAL(std::string::npos, NSQLTranslationV1::GetQueryPosition(query, tokenProto, antlr4)); + + tokenProto.SetLine(2); + tokenProto.SetColumn(4); + UNIT_ASSERT_VALUES_EQUAL(std::string::npos, NSQLTranslationV1::GetQueryPosition(query, tokenProto, antlr4)); + + tokenProto.SetLine(1); + tokenProto.SetColumn(34); + UNIT_ASSERT_VALUES_EQUAL(std::string::npos, NSQLTranslationV1::GetQueryPosition(query, tokenProto, antlr4)); + + tokenProto.SetLine(1); + tokenProto.SetColumn(0); + UNIT_ASSERT_VALUES_EQUAL(0, NSQLTranslationV1::GetQueryPosition(query, tokenProto, antlr4)); + } +} |
