diff options
| author | vvvv <[email protected]> | 2025-04-14 22:40:18 +0300 | 
|---|---|---|
| committer | vvvv <[email protected]> | 2025-04-14 22:52:33 +0300 | 
| commit | 87a85d90a3532eba45980e35b0a9a636e35c5dec (patch) | |
| tree | 6268dce29ed56c548e37a5c3cc01a81094acf46c /yql/essentials/sql | |
| parent | 597cd5f306419a1a879a1d616bf428757c485172 (diff) | |
YQL-19616 refactor test lexers from sql2yql, supported facade run tools
commit_hash:fb1727dda2b8c7d2ff42d4436c54cb7aa1ce4bc2
Diffstat (limited to 'yql/essentials/sql')
| -rw-r--r-- | yql/essentials/sql/v1/lexer/check/check_lexers.cpp | 82 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/lexer/check/check_lexers.h | 9 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/lexer/check/ya.make | 21 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/lexer/ya.make | 1 | 
4 files changed, 113 insertions, 0 deletions
| diff --git a/yql/essentials/sql/v1/lexer/check/check_lexers.cpp b/yql/essentials/sql/v1/lexer/check/check_lexers.cpp new file mode 100644 index 00000000000..d0cccefdc79 --- /dev/null +++ b/yql/essentials/sql/v1/lexer/check/check_lexers.cpp @@ -0,0 +1,82 @@ +#include "check_lexers.h" + + +#include <yql/essentials/sql/settings/translation_settings.h> +#include <yql/essentials/sql/v1/lexer/lexer.h> +#include <yql/essentials/sql/v1/lexer/antlr4/lexer.h> +#include <yql/essentials/sql/v1/lexer/antlr4_ansi/lexer.h> +#include <yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h> +#include <yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h> +#include <yql/essentials/sql/v1/lexer/regex/lexer.h> +#include <yql/essentials/sql/v1/proto_parser/antlr4/proto_parser.h> +#include <yql/essentials/sql/v1/proto_parser/antlr4_ansi/proto_parser.h> +#include <yql/essentials/core/issue/yql_issue.h> + +#include <util/string/builder.h> + +namespace NSQLTranslationV1 { + +bool CheckLexers(NYql::TPosition pos, const TString& query, NYql::TIssues& issues) { +    NSQLTranslationV1::TLexers lexers; +    NSQLTranslation::TTranslationSettings settings; +    if (!NSQLTranslation::ParseTranslationSettings(query, settings, issues)) { +        return false; +    } + +    lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory(); +    lexers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiLexerFactory(); +    lexers.Antlr4Pure = NSQLTranslationV1::MakeAntlr4PureLexerFactory(); +    lexers.Antlr4PureAnsi = NSQLTranslationV1::MakeAntlr4PureAnsiLexerFactory(); +    auto lexerMain = NSQLTranslationV1::MakeLexer(lexers, settings.AnsiLexer, true, NSQLTranslationV1::ELexerFlavor::Default); +    auto lexerPure = NSQLTranslationV1::MakeLexer(lexers, settings.AnsiLexer, true, NSQLTranslationV1::ELexerFlavor::Pure); +    auto lexerRegex = NSQLTranslationV1::MakeRegexLexerFactory(settings.AnsiLexer)->MakeLexer(); +    TVector<NSQLTranslation::TParsedToken> mainTokens; +    if (!lexerMain->Tokenize(query, "", [&](auto token) { mainTokens.push_back(token);}, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS)) { +        return false; +    } + +    TVector<NSQLTranslation::TParsedToken> pureTokens; +    if (!lexerPure->Tokenize(query, "", [&](auto token) { pureTokens.push_back(token);}, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS)) { +        return false; +    } + +    TVector<NSQLTranslation::TParsedToken> regexTokens; +    if (!lexerRegex->Tokenize(query, "", [&](auto token) { regexTokens.push_back(token);}, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS)) { +        return false; +    } + +    bool hasErrors = false; +    auto check = [&](const char* name, const TVector<NSQLTranslation::TParsedToken>& otherTokens) { +        if (mainTokens.size() != otherTokens.size()) { +            hasErrors = true; +            issues.AddIssue(NYql::TIssue(pos, TStringBuilder () << "Mismatch token count, main: " << +                mainTokens.size() << ", " << name << ": " << otherTokens.size() << "\n")); +        } + +        TStringBuilder textBuilder; + +        for (size_t i = 0; i < Min(mainTokens.size(), otherTokens.size()); ++i) { +            if (mainTokens[i].Name != otherTokens[i].Name || mainTokens[i].Content != otherTokens[i].Content) { +                hasErrors = true; +                TStringBuilder err; +                err << "Mismatch token #" << i << ", main: " << mainTokens[i].Name << ":" << mainTokens[i].Content +                    << ", " << name << ": " << otherTokens[i].Name << ":" << otherTokens[i].Content << "\n"; +                err << "Text sample: ["; +                TString text = textBuilder; +                constexpr size_t LexerContextSample = 50; +                err << text.substr(text.size() >= LexerContextSample ? text.size() - LexerContextSample : 0u, LexerContextSample); +                err << "]\n"; +                issues.AddIssue(NYql::TIssue(pos, err)); +                break; +            } + +            textBuilder << mainTokens[i].Content; +        } +    }; + +    check("pure", pureTokens); +    check("regex", regexTokens); +    return !hasErrors; +} + +} diff --git a/yql/essentials/sql/v1/lexer/check/check_lexers.h b/yql/essentials/sql/v1/lexer/check/check_lexers.h new file mode 100644 index 00000000000..0fceaa2e0bd --- /dev/null +++ b/yql/essentials/sql/v1/lexer/check/check_lexers.h @@ -0,0 +1,9 @@ +#pragma once +#include <yql/essentials/core/issue/yql_issue.h> +#include <util/generic/string.h> + +namespace NSQLTranslationV1 { + +bool CheckLexers(NYql::TPosition pos, const TString& query, NYql::TIssues& issues); + +} diff --git a/yql/essentials/sql/v1/lexer/check/ya.make b/yql/essentials/sql/v1/lexer/check/ya.make new file mode 100644 index 00000000000..dc4ac21836d --- /dev/null +++ b/yql/essentials/sql/v1/lexer/check/ya.make @@ -0,0 +1,21 @@ +LIBRARY() + +SRCS( +    check_lexers.h +    check_lexers.cpp +) + +PEERDIR( +    yql/essentials/core/issue +    yql/essentials/sql/settings +    yql/essentials/sql/v1/lexer +    yql/essentials/sql/v1/lexer/antlr4 +    yql/essentials/sql/v1/lexer/antlr4_ansi +    yql/essentials/sql/v1/lexer/antlr4_pure +    yql/essentials/sql/v1/lexer/antlr4_pure_ansi +    yql/essentials/sql/v1/lexer/regex +    yql/essentials/sql/v1/proto_parser/antlr4 +    yql/essentials/sql/v1/proto_parser/antlr4_ansi +) + +END() diff --git a/yql/essentials/sql/v1/lexer/ya.make b/yql/essentials/sql/v1/lexer/ya.make index 66c0c87f15f..6462ced1991 100644 --- a/yql/essentials/sql/v1/lexer/ya.make +++ b/yql/essentials/sql/v1/lexer/ya.make @@ -22,6 +22,7 @@ RECURSE(      antlr4_ansi      antlr4_pure      antlr4_pure_ansi +    check      regex  ) | 
