diff options
| author | grigoriypisar <[email protected]> | 2025-09-04 12:04:05 +0300 | 
|---|---|---|
| committer | grigoriypisar <[email protected]> | 2025-09-04 12:26:12 +0300 | 
| commit | c029afad9f05609faea295c8ba76996f9a07fbef (patch) | |
| tree | 74bd8c20f7089ff6aa5a8a9535d58bd16c422450 /yql/essentials | |
| parent | 89ffb9c4ebdd8a2eedfbf7a2f4778dfb6ef50161 (diff) | |
fixed parsing for BEGIN / END in streaming queries
Добавлена возможность отключения отложенного применения '\\n' и '\\r' в TextWalker, чтобы позиции генерируемые им были в точности равны позициям токенов от antlr4 лексера:
<https://nda.ya.ru/t/hmKq_iWN7JVCGe>
commit_hash:15049d23b9ac1232b9e1d281d86d6b51d5822f85
Diffstat (limited to 'yql/essentials')
| -rw-r--r-- | yql/essentials/public/issue/yql_issue.cpp | 21 | ||||
| -rw-r--r-- | yql/essentials/public/issue/yql_issue.h | 4 | ||||
| -rw-r--r-- | yql/essentials/public/issue/yql_issue_ut.cpp | 8 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/sql_translation.cpp | 42 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/sql_translation.h | 2 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/sql_ut.cpp | 1 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/sql_ut_antlr4.cpp | 1 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/sql_ut_common.h | 118 | 
8 files changed, 143 insertions, 54 deletions
diff --git a/yql/essentials/public/issue/yql_issue.cpp b/yql/essentials/public/issue/yql_issue.cpp index 08b35ae56c5..02d8bd2e273 100644 --- a/yql/essentials/public/issue/yql_issue.cpp +++ b/yql/essentials/public/issue/yql_issue.cpp @@ -42,14 +42,8 @@ void SanitizeNonAscii(TString& s) {  TTextWalker& TTextWalker::Advance(char c) {      if (c == '\n') { -        HaveCr_ = false; -        ++LfCount_; -        return *this; -    } - - -    if (c == '\r' && !HaveCr_) { -        HaveCr_ = true; +        Position_.Row++; +        Position_.Column = 0;          return *this;      } @@ -58,15 +52,8 @@ TTextWalker& TTextWalker::Advance(char c) {          charDistance = 0;      } -    // either not '\r' or second '\r' -    if (LfCount_) { -        Position_.Row += LfCount_; -        Position_.Column = charDistance; -        LfCount_ = 0; -    } else { -        Position_.Column += charDistance + (HaveCr_ && c != '\r'); -    } -    HaveCr_ = (c == '\r'); +    Position_.Column += charDistance; +      return *this;  } diff --git a/yql/essentials/public/issue/yql_issue.h b/yql/essentials/public/issue/yql_issue.h index ee2b376f0b2..7f354b8670b 100644 --- a/yql/essentials/public/issue/yql_issue.h +++ b/yql/essentials/public/issue/yql_issue.h @@ -58,8 +58,6 @@ public:      TTextWalker(TPosition& position, bool utf8Aware)          : Position_(position)          , Utf8Aware_(utf8Aware) -        , HaveCr_(false) -        , LfCount_(0)      {      } @@ -76,8 +74,6 @@ public:  private:      TPosition& Position_;      const bool Utf8Aware_; -    bool HaveCr_; -    ui32 LfCount_;  };  struct TRange { diff --git a/yql/essentials/public/issue/yql_issue_ut.cpp b/yql/essentials/public/issue/yql_issue_ut.cpp index 551c965ccfe..9076c0ee3da 100644 --- a/yql/essentials/public/issue/yql_issue_ut.cpp +++ b/yql/essentials/public/issue/yql_issue_ut.cpp @@ -49,15 +49,15 @@ Y_UNIT_TEST_SUITE(TextWalkerTest) {          TTextWalker walker(pos, false);          walker.Advance(TStringBuf("a\raa\r")); -        UNIT_ASSERT_VALUES_EQUAL(pos, TPosition(4, 1)); +        UNIT_ASSERT_VALUES_EQUAL(pos, TPosition(5, 1));          walker.Advance('\n'); -        UNIT_ASSERT_VALUES_EQUAL(pos, TPosition(4, 1)); +        UNIT_ASSERT_VALUES_EQUAL(pos, TPosition(0, 2));          walker.Advance(TStringBuf("\r\r\ra"));          UNIT_ASSERT_VALUES_EQUAL(pos, TPosition(4, 2));          walker.Advance('\r'); -        UNIT_ASSERT_VALUES_EQUAL(pos, TPosition(4, 2)); +        UNIT_ASSERT_VALUES_EQUAL(pos, TPosition(5, 2));          walker.Advance('\n'); -        UNIT_ASSERT_VALUES_EQUAL(pos, TPosition(4, 2)); +        UNIT_ASSERT_VALUES_EQUAL(pos, TPosition(0, 3));          walker.Advance('a');          UNIT_ASSERT_VALUES_EQUAL(pos, TPosition(1, 3));      } diff --git a/yql/essentials/sql/v1/sql_translation.cpp b/yql/essentials/sql/v1/sql_translation.cpp index e3ff7464795..635146f77cf 100644 --- a/yql/essentials/sql/v1/sql_translation.cpp +++ b/yql/essentials/sql/v1/sql_translation.cpp @@ -5270,27 +5270,6 @@ bool TSqlTranslation::ParseViewQuery(  namespace { -static std::string::size_type GetQueryPosition(const TString& query, const NSQLv1Generated::TToken& token, bool antlr4) { -    if (1 == token.GetLine() && 0 == token.GetColumn()) { -        return 0; -    } - -    TPosition pos = {0, 1}; -    TTextWalker walker(pos, antlr4); - -    std::string::size_type position = 0; -    for (char c : query) { -        walker.Advance(c); -        ++position; - -        if (pos.Row == token.GetLine() && pos.Column == token.GetColumn()) { -            return position; -        } -    } - -    return std::string::npos; -} -  static TString GetLambdaText(TTranslation& ctx, TContext& Ctx, const TRule_lambda_or_parameter& lambdaOrParameter) {      static const TString statementSeparator = ";\n"; @@ -5343,6 +5322,27 @@ static TString GetLambdaText(TTranslation& ctx, TContext& Ctx, const TRule_lambd      }  } +} // anonymous namespace + +std::string::size_type GetQueryPosition(const TString& query, const NSQLv1Generated::TToken& token, bool antlr4) { +    if (1 == token.GetLine() && 0 == token.GetColumn()) { +        return 0; +    } + +    TPosition pos = {0, 1}; +    TTextWalker walker(pos, antlr4); + +    std::string::size_type position = 0; +    for (char c : query) { +        walker.Advance(c); +        ++position; + +        if (pos.Row == token.GetLine() && pos.Column == token.GetColumn()) { +            return position; +        } +    } + +    return std::string::npos;  }  bool TSqlTranslation::ParseTransferLambda( diff --git a/yql/essentials/sql/v1/sql_translation.h b/yql/essentials/sql/v1/sql_translation.h index c17a2d43a48..976846a2f3b 100644 --- a/yql/essentials/sql/v1/sql_translation.h +++ b/yql/essentials/sql/v1/sql_translation.h @@ -354,4 +354,6 @@ TVector<TPatternComponent<TChar>> SplitPattern(const TBasicString<TChar>& patter  bool ParseNumbers(TContext& ctx, const TString& strOrig, ui64& value, TString& suffix); +std::string::size_type GetQueryPosition(const TString& query, const NSQLv1Generated::TToken& token, bool antlr4); +  } // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql_ut.cpp b/yql/essentials/sql/v1/sql_ut.cpp index 66c9a159fbe..c29c49ee59a 100644 --- a/yql/essentials/sql/v1/sql_ut.cpp +++ b/yql/essentials/sql/v1/sql_ut.cpp @@ -1,4 +1,5 @@  #include "sql_ut.h" +#include "sql_translation.h"  #include "format/sql_format.h"  #include "lexer/lexer.h" diff --git a/yql/essentials/sql/v1/sql_ut_antlr4.cpp b/yql/essentials/sql/v1/sql_ut_antlr4.cpp index 18480d048cd..bc29c949ddd 100644 --- a/yql/essentials/sql/v1/sql_ut_antlr4.cpp +++ b/yql/essentials/sql/v1/sql_ut_antlr4.cpp @@ -1,4 +1,5 @@  #include "sql_ut_antlr4.h" +#include "sql_translation.h"  #include "format/sql_format.h"  #include "lexer/lexer.h" diff --git a/yql/essentials/sql/v1/sql_ut_common.h b/yql/essentials/sql/v1/sql_ut_common.h index d961a33ea6c..b6ae6ead2ec 100644 --- a/yql/essentials/sql/v1/sql_ut_common.h +++ b/yql/essentials/sql/v1/sql_ut_common.h @@ -9393,18 +9393,51 @@ USE hahn;          UNIT_ASSERT_VALUES_EQUAL(1, elementStat["__query_text"]);      } +    Y_UNIT_TEST(CreateStreamingQueryCrlfCheck) { +        NYql::TAstParseResult res = SqlToYql(TStringBuilder() << R"sql( +USE plato; +-- Some comment +CREATE STREAMING QUERY MyQuery AS DO )sql" << "\r" << R"sql(BEGIN +USE plato; +$source = SELECT * FROM Input; +INSERT INTO Output1 SELECT * FROM $source; +INSERT INTO Output2 SELECT * FROM $source; +END DO; +USE hahn; +-- Other comment +        )sql"); +        UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString()); + +        TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { +            if (word == "createObject") { +                UNIT_ASSERT_STRING_CONTAINS(line, R"#('('"__query_ast" (block '()#"); +            } + +            if (word == "__query_text") { +                UNIT_ASSERT_STRING_CONTAINS(line, R"#('('"__query_text" '"\nUSE plato;\n$source = SELECT * FROM Input;\nINSERT INTO Output1 SELECT * FROM $source;\nINSERT INTO Output2 SELECT * FROM $source;\n")))#"); +            } +        }; + +        TWordCountHive elementStat = { {TString("createObject"), 0}, {TString("__query_text"), 0} }; +        VerifyProgram(res, elementStat, verifyLine); + +        UNIT_ASSERT_VALUES_EQUAL(1, elementStat["createObject"]); +        UNIT_ASSERT_VALUES_EQUAL(1, elementStat["__query_text"]); +    } +      Y_UNIT_TEST(CreateStreamingQueryWithSettings) { -        NYql::TAstParseResult res = SqlToYql(R"sql( +        NYql::TAstParseResult res = SqlToYql(TStringBuilder() << R"sql(  USE plato;  -- Some comment  CREATE STREAMING QUERY MyQuery WITH (      RUN = TRUE,      RESOURCE_POOL = my_pool -) AS DO BEGIN +) AS DO )sql" << "\r" << R"sql(BEGIN  USE plato;  $source = SELECT * FROM Input;  INSERT INTO Output1 SELECT * FROM $source; -INSERT INTO Output2 SELECT * FROM $source;END DO; +INSERT INTO Output2 SELECT * FROM $source; +END DO;  USE hahn;  -- Other comment          )sql"); @@ -9416,7 +9449,7 @@ USE hahn;              }              if (word == "__query_text") { -                UNIT_ASSERT_STRING_CONTAINS(line, R"#('('"__query_text" '"\nUSE plato;\n$source = SELECT * FROM Input;\nINSERT INTO Output1 SELECT * FROM $source;\nINSERT INTO Output2 SELECT * FROM $source;") '('"resource_pool" '"my_pool") '('"run" (Bool '"true")))#"); +                UNIT_ASSERT_STRING_CONTAINS(line, R"#('('"__query_text" '"\nUSE plato;\n$source = SELECT * FROM Input;\nINSERT INTO Output1 SELECT * FROM $source;\nINSERT INTO Output2 SELECT * FROM $source;\n") '('"resource_pool" '"my_pool") '('"run" (Bool '"true")))#");              }          }; @@ -9548,14 +9581,15 @@ USE hahn;      }      Y_UNIT_TEST(AlterStreamingQuerySetQuery) { -        NYql::TAstParseResult res = SqlToYql(R"sql( +        NYql::TAstParseResult res = SqlToYql(TStringBuilder() << R"sql(  USE plato;  -- Some comment -ALTER STREAMING QUERY MyQuery AS DO BEGIN +ALTER STREAMING QUERY MyQuery AS DO )sql" << "\r" << R"sql(BEGIN  USE plato;  $source = SELECT * FROM Input;  INSERT INTO Output1 SELECT * FROM $source; -INSERT INTO Output2 SELECT * FROM $source;END DO; +INSERT INTO Output2 SELECT * FROM $source; +END DO;  USE hahn;  -- Other comment          )sql"); @@ -9567,7 +9601,7 @@ USE hahn;              }              if (word == "__query_text") { -                UNIT_ASSERT_STRING_CONTAINS(line, R"#('('"__query_text" '"\nUSE plato;\n$source = SELECT * FROM Input;\nINSERT INTO Output1 SELECT * FROM $source;\nINSERT INTO Output2 SELECT * FROM $source;")))#"); +                UNIT_ASSERT_STRING_CONTAINS(line, R"#('('"__query_text" '"\nUSE plato;\n$source = SELECT * FROM Input;\nINSERT INTO Output1 SELECT * FROM $source;\nINSERT INTO Output2 SELECT * FROM $source;\n")))#");              }          }; @@ -9756,3 +9790,71 @@ USE hahn;          UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);      }  } + +Y_UNIT_TEST_SUITE(TestGetQueryPosition) { +    Y_UNIT_TEST(TestTokenFinding) { +        const TString query = TStringBuilder() << R"( +)" << "\r" << R"(BEGIN)" << "\r\n" << R"( +   )" << "\n\r" << R"(END +$b = ()" << "\r\r" << R"($x) -> { + +)" << "\n" << R"( +-- comment A +return /*Комментарий*/ $x; +-- Comment B +}; +)"; + +        NSQLTranslationV1::TLexers lexers; +#if ANTLR_VER == 3 +        bool antlr4 = false; +        lexers.Antlr3 = NSQLTranslationV1::MakeAntlr3LexerFactory(); +#else +        bool antlr4 = true; +        lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory(); +#endif + +        ui64 lexerPosition = 0; +        const auto onNextToken = [&](NSQLTranslation::TParsedToken&& token) { +            NSQLv1Generated::TToken tokenProto; +            tokenProto.SetLine(token.Line); +            tokenProto.SetColumn(token.LinePos); +            UNIT_ASSERT_VALUES_EQUAL_C(lexerPosition, NSQLTranslationV1::GetQueryPosition(query, tokenProto, antlr4), token.Line << ":" << token.LinePos << ":'" << token.Content << "'"); + +            lexerPosition += token.Content.size(); +        }; + +        const auto lexer = NSQLTranslationV1::MakeLexer(lexers, false, antlr4); + +        NYql::TIssues issues; +        const bool result = lexer->Tokenize(query, {}, onNextToken, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS); +        UNIT_ASSERT_C(result, issues.ToOneLineString()); +    } + +    Y_UNIT_TEST(TestTokenMissing) { +        const TString query = "BEGIN /*Комментарий*/ \nEND"; +        NSQLv1Generated::TToken tokenProto; + +#if ANTLR_VER == 3 +        bool antlr4 = false; +#else +        bool antlr4 = true; +#endif + +        tokenProto.SetLine(3); +        tokenProto.SetColumn(0); +        UNIT_ASSERT_VALUES_EQUAL(std::string::npos, NSQLTranslationV1::GetQueryPosition(query, tokenProto, antlr4)); + +        tokenProto.SetLine(2); +        tokenProto.SetColumn(4); +        UNIT_ASSERT_VALUES_EQUAL(std::string::npos, NSQLTranslationV1::GetQueryPosition(query, tokenProto, antlr4)); + +        tokenProto.SetLine(1); +        tokenProto.SetColumn(34); +        UNIT_ASSERT_VALUES_EQUAL(std::string::npos, NSQLTranslationV1::GetQueryPosition(query, tokenProto, antlr4)); + +        tokenProto.SetLine(1); +        tokenProto.SetColumn(0); +        UNIT_ASSERT_VALUES_EQUAL(0, NSQLTranslationV1::GetQueryPosition(query, tokenProto, antlr4)); +    } +}  | 
