summaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql
diff options
context:
space:
mode:
authorvvvv <[email protected]>2025-04-14 22:40:18 +0300
committervvvv <[email protected]>2025-04-14 22:52:33 +0300
commit87a85d90a3532eba45980e35b0a9a636e35c5dec (patch)
tree6268dce29ed56c548e37a5c3cc01a81094acf46c /yql/essentials/sql
parent597cd5f306419a1a879a1d616bf428757c485172 (diff)
YQL-19616 refactor test lexers from sql2yql, supported facade run tools
commit_hash:fb1727dda2b8c7d2ff42d4436c54cb7aa1ce4bc2
Diffstat (limited to 'yql/essentials/sql')
-rw-r--r--yql/essentials/sql/v1/lexer/check/check_lexers.cpp82
-rw-r--r--yql/essentials/sql/v1/lexer/check/check_lexers.h9
-rw-r--r--yql/essentials/sql/v1/lexer/check/ya.make21
-rw-r--r--yql/essentials/sql/v1/lexer/ya.make1
4 files changed, 113 insertions, 0 deletions
diff --git a/yql/essentials/sql/v1/lexer/check/check_lexers.cpp b/yql/essentials/sql/v1/lexer/check/check_lexers.cpp
new file mode 100644
index 00000000000..d0cccefdc79
--- /dev/null
+++ b/yql/essentials/sql/v1/lexer/check/check_lexers.cpp
@@ -0,0 +1,82 @@
+#include "check_lexers.h"
+
+
+#include <yql/essentials/sql/settings/translation_settings.h>
+#include <yql/essentials/sql/v1/lexer/lexer.h>
+#include <yql/essentials/sql/v1/lexer/antlr4/lexer.h>
+#include <yql/essentials/sql/v1/lexer/antlr4_ansi/lexer.h>
+#include <yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h>
+#include <yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h>
+#include <yql/essentials/sql/v1/lexer/regex/lexer.h>
+#include <yql/essentials/sql/v1/proto_parser/antlr4/proto_parser.h>
+#include <yql/essentials/sql/v1/proto_parser/antlr4_ansi/proto_parser.h>
+#include <yql/essentials/core/issue/yql_issue.h>
+
+#include <util/string/builder.h>
+
+namespace NSQLTranslationV1 {
+
+bool CheckLexers(NYql::TPosition pos, const TString& query, NYql::TIssues& issues) {
+ NSQLTranslationV1::TLexers lexers;
+ NSQLTranslation::TTranslationSettings settings;
+ if (!NSQLTranslation::ParseTranslationSettings(query, settings, issues)) {
+ return false;
+ }
+
+ lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory();
+ lexers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiLexerFactory();
+ lexers.Antlr4Pure = NSQLTranslationV1::MakeAntlr4PureLexerFactory();
+ lexers.Antlr4PureAnsi = NSQLTranslationV1::MakeAntlr4PureAnsiLexerFactory();
+ auto lexerMain = NSQLTranslationV1::MakeLexer(lexers, settings.AnsiLexer, true, NSQLTranslationV1::ELexerFlavor::Default);
+ auto lexerPure = NSQLTranslationV1::MakeLexer(lexers, settings.AnsiLexer, true, NSQLTranslationV1::ELexerFlavor::Pure);
+ auto lexerRegex = NSQLTranslationV1::MakeRegexLexerFactory(settings.AnsiLexer)->MakeLexer();
+ TVector<NSQLTranslation::TParsedToken> mainTokens;
+ if (!lexerMain->Tokenize(query, "", [&](auto token) { mainTokens.push_back(token);}, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS)) {
+ return false;
+ }
+
+ TVector<NSQLTranslation::TParsedToken> pureTokens;
+ if (!lexerPure->Tokenize(query, "", [&](auto token) { pureTokens.push_back(token);}, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS)) {
+ return false;
+ }
+
+ TVector<NSQLTranslation::TParsedToken> regexTokens;
+ if (!lexerRegex->Tokenize(query, "", [&](auto token) { regexTokens.push_back(token);}, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS)) {
+ return false;
+ }
+
+ bool hasErrors = false;
+ auto check = [&](const char* name, const TVector<NSQLTranslation::TParsedToken>& otherTokens) {
+ if (mainTokens.size() != otherTokens.size()) {
+ hasErrors = true;
+ issues.AddIssue(NYql::TIssue(pos, TStringBuilder () << "Mismatch token count, main: " <<
+ mainTokens.size() << ", " << name << ": " << otherTokens.size() << "\n"));
+ }
+
+ TStringBuilder textBuilder;
+
+ for (size_t i = 0; i < Min(mainTokens.size(), otherTokens.size()); ++i) {
+ if (mainTokens[i].Name != otherTokens[i].Name || mainTokens[i].Content != otherTokens[i].Content) {
+ hasErrors = true;
+ TStringBuilder err;
+ err << "Mismatch token #" << i << ", main: " << mainTokens[i].Name << ":" << mainTokens[i].Content
+ << ", " << name << ": " << otherTokens[i].Name << ":" << otherTokens[i].Content << "\n";
+ err << "Text sample: [";
+ TString text = textBuilder;
+ constexpr size_t LexerContextSample = 50;
+ err << text.substr(text.size() >= LexerContextSample ? text.size() - LexerContextSample : 0u, LexerContextSample);
+ err << "]\n";
+ issues.AddIssue(NYql::TIssue(pos, err));
+ break;
+ }
+
+ textBuilder << mainTokens[i].Content;
+ }
+ };
+
+ check("pure", pureTokens);
+ check("regex", regexTokens);
+ return !hasErrors;
+}
+
+}
diff --git a/yql/essentials/sql/v1/lexer/check/check_lexers.h b/yql/essentials/sql/v1/lexer/check/check_lexers.h
new file mode 100644
index 00000000000..0fceaa2e0bd
--- /dev/null
+++ b/yql/essentials/sql/v1/lexer/check/check_lexers.h
@@ -0,0 +1,9 @@
+#pragma once
+#include <yql/essentials/core/issue/yql_issue.h>
+#include <util/generic/string.h>
+
+namespace NSQLTranslationV1 {
+
+bool CheckLexers(NYql::TPosition pos, const TString& query, NYql::TIssues& issues);
+
+}
diff --git a/yql/essentials/sql/v1/lexer/check/ya.make b/yql/essentials/sql/v1/lexer/check/ya.make
new file mode 100644
index 00000000000..dc4ac21836d
--- /dev/null
+++ b/yql/essentials/sql/v1/lexer/check/ya.make
@@ -0,0 +1,21 @@
+LIBRARY()
+
+SRCS(
+ check_lexers.h
+ check_lexers.cpp
+)
+
+PEERDIR(
+ yql/essentials/core/issue
+ yql/essentials/sql/settings
+ yql/essentials/sql/v1/lexer
+ yql/essentials/sql/v1/lexer/antlr4
+ yql/essentials/sql/v1/lexer/antlr4_ansi
+ yql/essentials/sql/v1/lexer/antlr4_pure
+ yql/essentials/sql/v1/lexer/antlr4_pure_ansi
+ yql/essentials/sql/v1/lexer/regex
+ yql/essentials/sql/v1/proto_parser/antlr4
+ yql/essentials/sql/v1/proto_parser/antlr4_ansi
+)
+
+END()
diff --git a/yql/essentials/sql/v1/lexer/ya.make b/yql/essentials/sql/v1/lexer/ya.make
index 66c0c87f15f..6462ced1991 100644
--- a/yql/essentials/sql/v1/lexer/ya.make
+++ b/yql/essentials/sql/v1/lexer/ya.make
@@ -22,6 +22,7 @@ RECURSE(
antlr4_ansi
antlr4_pure
antlr4_pure_ansi
+ check
regex
)