diff options
author | vvvv <[email protected]> | 2025-02-19 17:28:26 +0300 |
---|---|---|
committer | vvvv <[email protected]> | 2025-02-19 17:46:38 +0300 |
commit | 52daccf61e2e827114cfb3372071cddaec7974ba (patch) | |
tree | 07404f6ec241c8a2b96a4da07dc0f0d0c247799c /yql/essentials/sql | |
parent | ca4b5f28703b0d06599c94f3eacc5d6c498c31d5 (diff) |
YQL-19594 Explicit lexers & parsers
commit_hash:6be543b7c5bff6ee474ee606c920197fb2569767
Diffstat (limited to 'yql/essentials/sql')
-rw-r--r-- | yql/essentials/sql/v1/context.cpp | 12 | ||||
-rw-r--r-- | yql/essentials/sql/v1/context.h | 18 | ||||
-rw-r--r-- | yql/essentials/sql/v1/format/sql_format.cpp | 38 | ||||
-rw-r--r-- | yql/essentials/sql/v1/format/sql_format.h | 12 | ||||
-rw-r--r-- | yql/essentials/sql/v1/format/sql_format_ut.cpp | 23 | ||||
-rw-r--r-- | yql/essentials/sql/v1/format/sql_format_ut.h | 2 | ||||
-rw-r--r-- | yql/essentials/sql/v1/format/sql_format_ut_antlr4.cpp | 23 | ||||
-rw-r--r-- | yql/essentials/sql/v1/format/ut/ya.make | 7 | ||||
-rw-r--r-- | yql/essentials/sql/v1/format/ut_antlr4/ya.make | 8 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/lexer_ut.cpp | 33 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/ut/ya.make | 2 | ||||
-rw-r--r-- | yql/essentials/sql/v1/sql.cpp | 64 | ||||
-rw-r--r-- | yql/essentials/sql/v1/sql.h | 9 | ||||
-rw-r--r-- | yql/essentials/sql/v1/sql_translation.cpp | 5 | ||||
-rw-r--r-- | yql/essentials/sql/v1/sql_ut.cpp | 5 | ||||
-rw-r--r-- | yql/essentials/sql/v1/sql_ut_antlr4.cpp | 6 | ||||
-rw-r--r-- | yql/essentials/sql/v1/ut/ya.make | 1 | ||||
-rw-r--r-- | yql/essentials/sql/v1/ut_antlr4/ya.make | 1 |
18 files changed, 212 insertions, 57 deletions
diff --git a/yql/essentials/sql/v1/context.cpp b/yql/essentials/sql/v1/context.cpp index d28c31469b4..4082757671b 100644 --- a/yql/essentials/sql/v1/context.cpp +++ b/yql/essentials/sql/v1/context.cpp @@ -82,10 +82,20 @@ THashMap<TStringBuf, TPragmaMaybeField> CTX_PRAGMA_MAYBE_FIELDS = { } // namespace TContext::TContext(const NSQLTranslation::TTranslationSettings& settings, + const NSQLTranslation::TSQLHints& hints, + NYql::TIssues& issues, + const TString& query) + : TContext(MakeAllLexers(), MakeAllParsers(), settings, hints, issues, query) +{} + +TContext::TContext(const TLexers& lexers, const TParsers& parsers, + const NSQLTranslation::TTranslationSettings& settings, const NSQLTranslation::TSQLHints& hints, TIssues& issues, const TString& query) - : ClusterMapping(settings.ClusterMapping) + : Lexers(lexers) + , Parsers(parsers) + , ClusterMapping(settings.ClusterMapping) , PathPrefix(settings.PathPrefix) , ClusterPathPrefixes(settings.ClusterPathPrefixes) , SQLHints(hints) diff --git a/yql/essentials/sql/v1/context.h b/yql/essentials/sql/v1/context.h index c0f45e8fa9b..7c49529a78e 100644 --- a/yql/essentials/sql/v1/context.h +++ b/yql/essentials/sql/v1/context.h @@ -92,10 +92,18 @@ namespace NSQLTranslationV1 { class TContext { public: + //FIXME remove TContext(const NSQLTranslation::TTranslationSettings& settings, - const NSQLTranslation::TSQLHints& hints, - NYql::TIssues& issues, - const TString& query = {}); + const NSQLTranslation::TSQLHints& hints, + NYql::TIssues& issues, + const TString& query = {}); + + TContext(const TLexers& lexers, + const TParsers& parsers, + const NSQLTranslation::TTranslationSettings& settings, + const NSQLTranslation::TSQLHints& hints, + NYql::TIssues& issues, + const TString& query = {}); virtual ~TContext(); @@ -250,6 +258,10 @@ namespace NSQLTranslationV1 { private: IOutputStream& MakeIssue(NYql::ESeverity severity, NYql::TIssueCode code, NYql::TPosition pos); + public: + const TLexers Lexers; + const TParsers Parsers; + private: NYql::TPosition Position; THolder<TStringOutput> IssueMsgHolder; diff --git a/yql/essentials/sql/v1/format/sql_format.cpp b/yql/essentials/sql/v1/format/sql_format.cpp index c6a9aa1cb78..cfb6fcde71d 100644 --- a/yql/essentials/sql/v1/format/sql_format.cpp +++ b/yql/essentials/sql/v1/format/sql_format.cpp @@ -3,9 +3,6 @@ #include <yql/essentials/parser/lexer_common/lexer.h> #include <yql/essentials/core/sql_types/simple_types.h> -#include <yql/essentials/sql/v1/lexer/lexer.h> -#include <yql/essentials/sql/v1/proto_parser/proto_parser.h> - #include <yql/essentials/parser/proto_ast/gen/v1_proto_split/SQLv1Parser.pb.main.h> #include <library/cpp/protobuf/util/simple_reflection.h> @@ -3110,8 +3107,12 @@ TStaticData::TStaticData() class TSqlFormatter : public NSQLFormat::ISqlFormatter { public: - TSqlFormatter(const NSQLTranslation::TTranslationSettings& settings) - : Settings(settings) + TSqlFormatter(const NSQLTranslationV1::TLexers& lexers, + const NSQLTranslationV1::TParsers& parsers, + const NSQLTranslation::TTranslationSettings& settings) + : Lexers(lexers) + , Parsers(parsers) + , Settings(settings) {} bool Format(const TString& query, TString& formattedQuery, NYql::TIssues& issues, EFormatMode mode) override { @@ -3126,7 +3127,7 @@ public: } if (mode == EFormatMode::Obfuscate) { - auto message = NSQLTranslationV1::SqlAST(query, parsedSettings.File, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS, parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser, parsedSettings.TestAntlr4, parsedSettings.Arena); + auto message = NSQLTranslationV1::SqlAST(Parsers, query, parsedSettings.File, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS, parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser, parsedSettings.Arena); if (!message) { return false; } @@ -3135,7 +3136,7 @@ public: return Format(visitor.Process(*message), formattedQuery, issues, EFormatMode::Pretty); } - auto lexer = NSQLTranslationV1::MakeLexer(parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser); + auto lexer = NSQLTranslationV1::MakeLexer(Lexers, parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser); TVector<TString> statements; if (!NSQLTranslationV1::SplitQueryToStatements(query, lexer, statements, issues, parsedSettings.File)) { return false; @@ -3161,7 +3162,7 @@ public: } NYql::TIssues parserIssues; - auto message = NSQLTranslationV1::SqlAST(currentQuery, parsedSettings.File, parserIssues, NSQLTranslation::SQL_MAX_PARSER_ERRORS, parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser, parsedSettings.TestAntlr4, parsedSettings.Arena); + auto message = NSQLTranslationV1::SqlAST(Parsers, currentQuery, parsedSettings.File, parserIssues, NSQLTranslation::SQL_MAX_PARSER_ERRORS, parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser, parsedSettings.Arena); if (!message) { finalFormattedQuery << currentQuery; if (!currentQuery.EndsWith("\n")) { @@ -3209,23 +3210,32 @@ public: } private: + const NSQLTranslationV1::TLexers Lexers; + const NSQLTranslationV1::TParsers Parsers; const NSQLTranslation::TTranslationSettings Settings; }; } +ISqlFormatter::TPtr MakeSqlFormatter(const NSQLTranslationV1::TLexers& lexers, + const NSQLTranslationV1::TParsers& parsers, + const NSQLTranslation::TTranslationSettings& settings) { + return ISqlFormatter::TPtr(new TSqlFormatter(lexers, parsers, settings)); +} + ISqlFormatter::TPtr MakeSqlFormatter(const NSQLTranslation::TTranslationSettings& settings) { - return ISqlFormatter::TPtr(new TSqlFormatter(settings)); + return MakeSqlFormatter(NSQLTranslationV1::MakeAllLexers(), NSQLTranslationV1::MakeAllParsers(), settings); } -TString MutateQuery(const TString& query, const NSQLTranslation::TTranslationSettings& settings) { +TString MutateQuery(const NSQLTranslationV1::TLexers& lexers, + const TString& query, const NSQLTranslation::TTranslationSettings& settings) { auto parsedSettings = settings; NYql::TIssues issues; if (!NSQLTranslation::ParseTranslationSettings(query, parsedSettings, issues)) { throw yexception() << issues.ToString(); } - auto lexer = NSQLTranslationV1::MakeLexer(parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser); + auto lexer = NSQLTranslationV1::MakeLexer(lexers, parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser); TVector<NSQLTranslation::TParsedToken> allTokens; auto onNextToken = [&](NSQLTranslation::TParsedToken&& token) { if (token.Name != "EOF") { @@ -3248,13 +3258,15 @@ TString MutateQuery(const TString& query, const NSQLTranslation::TTranslationSet return newQueryBuilder; } -bool SqlFormatSimple(const TString& query, TString& formattedQuery, TString& error) { +bool SqlFormatSimple(const NSQLTranslationV1::TLexers& lexers, + const NSQLTranslationV1::TParsers& parsers, + const TString& query, TString& formattedQuery, TString& error) { try { google::protobuf::Arena arena; NSQLTranslation::TTranslationSettings settings; settings.Arena = &arena; - auto formatter = MakeSqlFormatter(settings); + auto formatter = MakeSqlFormatter(lexers, parsers, settings); NYql::TIssues issues; const bool result = formatter->Format(query, formattedQuery, issues); if (!result) { diff --git a/yql/essentials/sql/v1/format/sql_format.h b/yql/essentials/sql/v1/format/sql_format.h index 6944a730710..3233f2031b8 100644 --- a/yql/essentials/sql/v1/format/sql_format.h +++ b/yql/essentials/sql/v1/format/sql_format.h @@ -2,6 +2,8 @@ #include <yql/essentials/public/issue/yql_issue.h> #include <yql/essentials/sql/settings/translation_settings.h> +#include <yql/essentials/sql/v1/lexer/lexer.h> +#include <yql/essentials/sql/v1/proto_parser/proto_parser.h> #include <util/generic/string.h> @@ -23,12 +25,18 @@ public: virtual ~ISqlFormatter() = default; }; +//FIXME remove ISqlFormatter::TPtr MakeSqlFormatter(const NSQLTranslation::TTranslationSettings& settings = {}); +ISqlFormatter::TPtr MakeSqlFormatter(const NSQLTranslationV1::TLexers& lexers, + const NSQLTranslationV1::TParsers& parsers, + const NSQLTranslation::TTranslationSettings& settings = {}); + // insert spaces and comments between each tokens -TString MutateQuery(const TString& query, const NSQLTranslation::TTranslationSettings& settings = {}); +TString MutateQuery(const NSQLTranslationV1::TLexers& lexers, const TString& query, const NSQLTranslation::TTranslationSettings& settings = {}); -bool SqlFormatSimple(const TString& query, TString& formattedQuery, TString& error); +bool SqlFormatSimple(const NSQLTranslationV1::TLexers& lexers, + const NSQLTranslationV1::TParsers& parsers, const TString& query, TString& formattedQuery, TString& error); THashSet<TString> GetKeywords(); diff --git a/yql/essentials/sql/v1/format/sql_format_ut.cpp b/yql/essentials/sql/v1/format/sql_format_ut.cpp index e721f1fae6e..8714ee2d7c9 100644 --- a/yql/essentials/sql/v1/format/sql_format_ut.cpp +++ b/yql/essentials/sql/v1/format/sql_format_ut.cpp @@ -1,6 +1,10 @@ #include <library/cpp/testing/unittest/registar.h> #include "sql_format.h" +#include <yql/essentials/sql/v1/lexer/antlr3/lexer.h> +#include <yql/essentials/sql/v1/lexer/antlr3_ansi/lexer.h> +#include <yql/essentials/sql/v1/proto_parser/antlr3/proto_parser.h> +#include <yql/essentials/sql/v1/proto_parser/antlr3_ansi/proto_parser.h> #include <google/protobuf/arena.h> #include <util/string/subst.h> @@ -12,13 +16,25 @@ using TCases = TVector<std::pair<TString, TString>>; struct TSetup { TSetup(bool ansiLexer = false) { + NSQLTranslationV1::TLexers lexers; + lexers.Antlr3 = NSQLTranslationV1::MakeAntlr3LexerFactory(); + lexers.Antlr3Ansi = NSQLTranslationV1::MakeAntlr3AnsiLexerFactory(); + NSQLTranslationV1::TParsers parsers; + parsers.Antlr3 = NSQLTranslationV1::MakeAntlr3ParserFactory(); + parsers.Antlr3Ansi = NSQLTranslationV1::MakeAntlr3AnsiParserFactory(); + NSQLTranslation::TTranslationSettings settings; + settings.Antlr4Parser = false; settings.Arena = &Arena; settings.AnsiLexer = ansiLexer; - Formatter = NSQLFormat::MakeSqlFormatter(settings); + Formatter = NSQLFormat::MakeSqlFormatter(lexers, parsers, settings); } void Run(const TCases& cases, NSQLFormat::EFormatMode mode = NSQLFormat::EFormatMode::Pretty) { + NSQLTranslationV1::TLexers lexers; + lexers.Antlr3 = NSQLTranslationV1::MakeAntlr3LexerFactory(); + lexers.Antlr3Ansi = NSQLTranslationV1::MakeAntlr3AnsiLexerFactory(); + for (const auto& c : cases) { NYql::TIssues issues; TString formatted; @@ -33,8 +49,11 @@ struct TSetup { UNIT_ASSERT_C(res2, issues.ToString()); UNIT_ASSERT_NO_DIFF(formatted, formatted2); + if (mode == NSQLFormat::EFormatMode::Pretty) { - auto mutatedQuery = NSQLFormat::MutateQuery(c.first); + NSQLTranslation::TTranslationSettings settings; + settings.Antlr4Parser = false; + auto mutatedQuery = NSQLFormat::MutateQuery(lexers, c.first, settings); auto res3 = Formatter->Format(mutatedQuery, formatted, issues); UNIT_ASSERT_C(res3, issues.ToString()); } diff --git a/yql/essentials/sql/v1/format/sql_format_ut.h b/yql/essentials/sql/v1/format/sql_format_ut.h index ac4a305d981..d92393008f0 100644 --- a/yql/essentials/sql/v1/format/sql_format_ut.h +++ b/yql/essentials/sql/v1/format/sql_format_ut.h @@ -1856,6 +1856,6 @@ Y_UNIT_TEST(ValueConstructor) { "SELECT\n\tCallable(Callable<(Int32) -> Int32>, ($x) -> ($x))(0)\n;\n"}, }; - TSetup setup(/* ansiLexer = */ true); + TSetup setup; setup.Run(cases); } diff --git a/yql/essentials/sql/v1/format/sql_format_ut_antlr4.cpp b/yql/essentials/sql/v1/format/sql_format_ut_antlr4.cpp index 12e80587eac..9e5be312a7f 100644 --- a/yql/essentials/sql/v1/format/sql_format_ut_antlr4.cpp +++ b/yql/essentials/sql/v1/format/sql_format_ut_antlr4.cpp @@ -2,6 +2,12 @@ #include "sql_format.h" +#include <yql/essentials/sql/v1/lexer/antlr4/lexer.h> +#include <yql/essentials/sql/v1/lexer/antlr4_ansi/lexer.h> +#include <yql/essentials/sql/v1/proto_parser/antlr4/proto_parser.h> +#include <yql/essentials/sql/v1/proto_parser/antlr4_ansi/proto_parser.h> + + #include <google/protobuf/arena.h> #include <util/string/subst.h> #include <util/string/join.h> @@ -12,14 +18,25 @@ using TCases = TVector<std::pair<TString, TString>>; struct TSetup { TSetup(bool ansiLexer = false) { + NSQLTranslationV1::TLexers lexers; + lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory(); + lexers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiLexerFactory(); + NSQLTranslationV1::TParsers parsers; + parsers.Antlr4 = NSQLTranslationV1::MakeAntlr4ParserFactory(); + parsers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiParserFactory(); + NSQLTranslation::TTranslationSettings settings; settings.Arena = &Arena; settings.Antlr4Parser = true; settings.AnsiLexer = ansiLexer; - Formatter = NSQLFormat::MakeSqlFormatter(settings); + Formatter = NSQLFormat::MakeSqlFormatter(lexers, parsers, settings); } void Run(const TCases& cases, NSQLFormat::EFormatMode mode = NSQLFormat::EFormatMode::Pretty) { + NSQLTranslationV1::TLexers lexers; + lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory(); + lexers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiLexerFactory(); + for (const auto& c : cases) { NYql::TIssues issues; TString formatted; @@ -35,7 +52,9 @@ struct TSetup { UNIT_ASSERT_NO_DIFF(formatted, formatted2); if (mode == NSQLFormat::EFormatMode::Pretty) { - auto mutatedQuery = NSQLFormat::MutateQuery(c.first); + NSQLTranslation::TTranslationSettings settings; + settings.Antlr4Parser = true; + auto mutatedQuery = NSQLFormat::MutateQuery(lexers, c.first, settings); auto res3 = Formatter->Format(mutatedQuery, formatted, issues); UNIT_ASSERT_C(res3, issues.ToString()); } diff --git a/yql/essentials/sql/v1/format/ut/ya.make b/yql/essentials/sql/v1/format/ut/ya.make index 4c3ef65f965..690b13b6180 100644 --- a/yql/essentials/sql/v1/format/ut/ya.make +++ b/yql/essentials/sql/v1/format/ut/ya.make @@ -4,4 +4,11 @@ SRCS( sql_format_ut.cpp ) +PEERDIR( + yql/essentials/sql/v1/lexer/antlr3 + yql/essentials/sql/v1/lexer/antlr3_ansi + yql/essentials/sql/v1/proto_parser/antlr3 + yql/essentials/sql/v1/proto_parser/antlr3_ansi +) + END() diff --git a/yql/essentials/sql/v1/format/ut_antlr4/ya.make b/yql/essentials/sql/v1/format/ut_antlr4/ya.make index a0f9d710a8c..13b0f72a949 100644 --- a/yql/essentials/sql/v1/format/ut_antlr4/ya.make +++ b/yql/essentials/sql/v1/format/ut_antlr4/ya.make @@ -4,4 +4,12 @@ SRCS( sql_format_ut_antlr4.cpp ) +PEERDIR( + yql/essentials/sql/v1/lexer/antlr4 + yql/essentials/sql/v1/lexer/antlr4_ansi + yql/essentials/sql/v1/proto_parser/antlr4 + yql/essentials/sql/v1/proto_parser/antlr4_ansi +) + + END() diff --git a/yql/essentials/sql/v1/lexer/lexer_ut.cpp b/yql/essentials/sql/v1/lexer/lexer_ut.cpp index 7dc84162890..2f0c8bb8e2b 100644 --- a/yql/essentials/sql/v1/lexer/lexer_ut.cpp +++ b/yql/essentials/sql/v1/lexer/lexer_ut.cpp @@ -2,6 +2,8 @@ #include <yql/essentials/core/issue/yql_issue.h> #include <yql/essentials/sql/settings/translation_settings.h> +#include <yql/essentials/sql/v1/lexer/antlr3/lexer.h> +#include <yql/essentials/sql/v1/lexer/antlr4/lexer.h> #include <library/cpp/testing/unittest/registar.h> @@ -74,8 +76,12 @@ Y_UNIT_TEST_SUITE(SQLv1Lexer) { "\"select\"select", }; - auto lexer3 = MakeLexer(/* ansi = */ false, /* antlr4 = */ false); - auto lexer4 = MakeLexer(/* ansi = */ false, /* antlr4 = */ true); + NSQLTranslationV1::TLexers lexers; + lexers.Antlr3 = NSQLTranslationV1::MakeAntlr3LexerFactory(); + lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory(); + + auto lexer3 = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ false); + auto lexer4 = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ true); for (const auto& query : queriesUtf8) { auto [tokens3, issues3] = Tokenize(lexer3, query); @@ -89,7 +95,11 @@ Y_UNIT_TEST_SUITE(SQLv1Lexer) { TVector<TString> InvalidQueries(); void TestInvalidTokensSkipped(bool antlr4, const TVector<TVector<TString>>& expected) { - auto lexer = MakeLexer(/* ansi = */ false, antlr4); + NSQLTranslationV1::TLexers lexers; + lexers.Antlr3 = NSQLTranslationV1::MakeAntlr3LexerFactory(); + lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory(); + + auto lexer = MakeLexer(lexers, /* ansi = */ false, antlr4); auto input = InvalidQueries(); UNIT_ASSERT_VALUES_EQUAL(input.size(), expected.size()); @@ -144,8 +154,12 @@ Y_UNIT_TEST_SUITE(SQLv1Lexer) { } Y_UNIT_TEST(IssuesCollected) { - auto lexer3 = MakeLexer(/* ansi = */ false, /* antlr4 = */ false); - auto lexer4 = MakeLexer(/* ansi = */ false, /* antlr4 = */ true); + NSQLTranslationV1::TLexers lexers; + lexers.Antlr3 = NSQLTranslationV1::MakeAntlr3LexerFactory(); + lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory(); + + auto lexer3 = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ false); + auto lexer4 = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ true); for (const auto& query : InvalidQueries()) { auto issues3 = GetIssueMessages(lexer3, query); @@ -157,7 +171,9 @@ Y_UNIT_TEST_SUITE(SQLv1Lexer) { } Y_UNIT_TEST(IssueMessagesAntlr3) { - auto lexer3 = MakeLexer(/* ansi = */ false, /* antlr4 = */ false); + NSQLTranslationV1::TLexers lexers; + lexers.Antlr3 = NSQLTranslationV1::MakeAntlr3LexerFactory(); + auto lexer3 = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ false); auto actual = GetIssueMessages(lexer3, "\xF0\x9F\x98\x8A SELECT * FR"); @@ -172,7 +188,10 @@ Y_UNIT_TEST_SUITE(SQLv1Lexer) { } Y_UNIT_TEST(IssueMessagesAntlr4) { - auto lexer4 = MakeLexer(/* ansi = */ false, /* antlr4 = */ true); + NSQLTranslationV1::TLexers lexers; + lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory(); + + auto lexer4 = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ true); auto actual = GetIssueMessages(lexer4, "\xF0\x9F\x98\x8A SELECT * FR"); diff --git a/yql/essentials/sql/v1/lexer/ut/ya.make b/yql/essentials/sql/v1/lexer/ut/ya.make index a05178ada52..70503c127e8 100644 --- a/yql/essentials/sql/v1/lexer/ut/ya.make +++ b/yql/essentials/sql/v1/lexer/ut/ya.make @@ -3,6 +3,8 @@ UNITTEST_FOR(yql/essentials/sql/v1/lexer) PEERDIR( yql/essentials/core/issue yql/essentials/parser/lexer_common + yql/essentials/sql/v1/lexer/antlr3 + yql/essentials/sql/v1/lexer/antlr4 ) SRCS( diff --git a/yql/essentials/sql/v1/sql.cpp b/yql/essentials/sql/v1/sql.cpp index d8558b21ec7..e4c82a0744f 100644 --- a/yql/essentials/sql/v1/sql.cpp +++ b/yql/essentials/sql/v1/sql.cpp @@ -75,35 +75,44 @@ void SqlASTsToYqlsImpl(NYql::TAstParseResult& res, const std::vector<::NSQLv1Gen } } -NYql::TAstParseResult SqlASTToYql(const TString& query, +NYql::TAstParseResult SqlASTToYql(const TLexers& lexers, const TParsers& parsers, + const TString& query, const google::protobuf::Message& protoAst, const NSQLTranslation::TSQLHints& hints, const NSQLTranslation::TTranslationSettings& settings) { YQL_ENSURE(IsQueryMode(settings.Mode)); TAstParseResult res; - TContext ctx(settings, hints, res.Issues, query); + TContext ctx(lexers, parsers, settings, hints, res.Issues, query); SqlASTToYqlImpl(res, protoAst, ctx); res.ActualSyntaxType = NYql::ESyntaxType::YQLv1; return res; } -NYql::TAstParseResult SqlToYql(const TString& query, const NSQLTranslation::TTranslationSettings& settings, NYql::TWarningRules* warningRules) +NYql::TAstParseResult SqlASTToYql(const TString& query, + const google::protobuf::Message& protoAst, + const NSQLTranslation::TSQLHints& hints, + const NSQLTranslation::TTranslationSettings& settings) +{ + return SqlASTToYql(MakeAllLexers(), MakeAllParsers(), query, protoAst, hints, settings); +} + +NYql::TAstParseResult SqlToYql(const TLexers& lexers, const TParsers& parsers, const TString& query, const NSQLTranslation::TTranslationSettings& settings, NYql::TWarningRules* warningRules) { TAstParseResult res; const TString queryName = settings.File; NSQLTranslation::TSQLHints hints; - auto lexer = MakeLexer(settings.AnsiLexer, settings.Antlr4Parser); + auto lexer = MakeLexer(lexers, settings.AnsiLexer, settings.Antlr4Parser); YQL_ENSURE(lexer); if (!CollectSqlHints(*lexer, query, queryName, settings.File, hints, res.Issues, settings.MaxErrors, settings.Antlr4Parser)) { return res; } - TContext ctx(settings, hints, res.Issues, query); + TContext ctx(lexers, parsers, settings, hints, res.Issues, query); NSQLTranslation::TErrorCollectorOverIssues collector(res.Issues, settings.MaxErrors, settings.File); - google::protobuf::Message* ast(SqlAST(query, queryName, collector, settings.AnsiLexer, settings.Antlr4Parser, settings.TestAntlr4, settings.Arena)); + google::protobuf::Message* ast(SqlAST(parsers, query, queryName, collector, settings.AnsiLexer, settings.Antlr4Parser, settings.Arena)); if (ast) { SqlASTToYqlImpl(res, *ast, ctx); } else { @@ -117,6 +126,10 @@ NYql::TAstParseResult SqlToYql(const TString& query, const NSQLTranslation::TTra return res; } +NYql::TAstParseResult SqlToYql(const TString& query, const NSQLTranslation::TTranslationSettings& settings, NYql::TWarningRules* warningRules) { + return SqlToYql(MakeAllLexers(), MakeAllParsers(), query, settings, warningRules); +} + bool NeedUseForAllStatements(const TRule_sql_stmt_core::AltCase& subquery) { switch (subquery) { case TRule_sql_stmt_core::kAltSqlStmtCore1: // pragma @@ -187,7 +200,7 @@ bool NeedUseForAllStatements(const TRule_sql_stmt_core::AltCase& subquery) { } } -TVector<NYql::TAstParseResult> SqlToAstStatements(const TString& queryText, const NSQLTranslation::TTranslationSettings& settings, NYql::TWarningRules* warningRules, +TVector<NYql::TAstParseResult> SqlToAstStatements(const TLexers& lexers, const TParsers& parsers, const TString& queryText, const NSQLTranslation::TTranslationSettings& settings, NYql::TWarningRules* warningRules, TVector<NYql::TStmtParseInfo>* stmtParseInfo) { TVector<TAstParseResult> result; @@ -195,16 +208,16 @@ TVector<NYql::TAstParseResult> SqlToAstStatements(const TString& queryText, cons TIssues issues; NSQLTranslation::TSQLHints hints; - auto lexer = MakeLexer(settings.AnsiLexer, settings.Antlr4Parser); + auto lexer = MakeLexer(lexers, settings.AnsiLexer, settings.Antlr4Parser); YQL_ENSURE(lexer); if (!CollectSqlHints(*lexer, queryText, queryName, settings.File, hints, issues, settings.MaxErrors, settings.Antlr4Parser)) { return result; } - TContext ctx(settings, hints, issues, queryText); + TContext ctx(lexers, parsers, settings, hints, issues, queryText); NSQLTranslation::TErrorCollectorOverIssues collector(issues, settings.MaxErrors, settings.File); - google::protobuf::Message* astProto(SqlAST(queryText, queryName, collector, settings.AnsiLexer, settings.Antlr4Parser, settings.TestAntlr4, settings.Arena)); + google::protobuf::Message* astProto(SqlAST(parsers, queryText, queryName, collector, settings.AnsiLexer, settings.Antlr4Parser, settings.Arena)); if (astProto) { auto ast = static_cast<const TSQLv1ParserAST&>(*astProto); const auto& query = ast.GetRule_sql_query(); @@ -215,7 +228,7 @@ TVector<NYql::TAstParseResult> SqlToAstStatements(const TString& queryText, cons if (NeedUseForAllStatements(statements.GetRule_sql_stmt2().GetRule_sql_stmt_core2().Alt_case())) { commonStates.push_back(statements.GetRule_sql_stmt2().GetRule_sql_stmt_core2()); } else { - TContext ctx(settings, hints, issues, queryText); + TContext ctx(lexers, parsers, settings, hints, issues, queryText); result.emplace_back(); if (stmtParseInfo) { stmtParseInfo->push_back({}); @@ -229,7 +242,7 @@ TVector<NYql::TAstParseResult> SqlToAstStatements(const TString& queryText, cons commonStates.push_back(block.GetRule_sql_stmt2().GetRule_sql_stmt_core2()); continue; } - TContext ctx(settings, hints, issues, queryText); + TContext ctx(lexers, parsers, settings, hints, issues, queryText); result.emplace_back(); if (stmtParseInfo) { stmtParseInfo->push_back({}); @@ -251,9 +264,13 @@ TVector<NYql::TAstParseResult> SqlToAstStatements(const TString& queryText, cons return result; } -bool SplitQueryToStatements(const TString& query, TVector<TString>& statements, NYql::TIssues& issues, +TVector<NYql::TAstParseResult> SqlToAstStatements(const TString& query, const NSQLTranslation::TTranslationSettings& settings, NYql::TWarningRules* warningRules, TVector<NYql::TStmtParseInfo>* stmtParseInfo) { + return SqlToAstStatements(MakeAllLexers(), MakeAllParsers(), query, settings, warningRules, stmtParseInfo); +} + +bool SplitQueryToStatements(const TLexers& lexers, const TParsers& parsers, const TString& query, TVector<TString>& statements, NYql::TIssues& issues, const NSQLTranslation::TTranslationSettings& settings) { - auto lexer = NSQLTranslationV1::MakeLexer(settings.AnsiLexer, settings.Antlr4Parser); + auto lexer = NSQLTranslationV1::MakeLexer(lexers, settings.AnsiLexer, settings.Antlr4Parser); TVector<TString> parts; if (!SplitQueryToStatements(query, lexer, parts, issues)) { @@ -262,8 +279,8 @@ bool SplitQueryToStatements(const TString& query, TVector<TString>& statements, for (auto& currentQuery : parts) { NYql::TIssues parserIssues; - auto message = NSQLTranslationV1::SqlAST(currentQuery, settings.File, parserIssues, NSQLTranslation::SQL_MAX_PARSER_ERRORS, - settings.AnsiLexer, settings.Antlr4Parser, settings.TestAntlr4, settings.Arena); + auto message = NSQLTranslationV1::SqlAST(parsers, currentQuery, settings.File, parserIssues, NSQLTranslation::SQL_MAX_PARSER_ERRORS, + settings.AnsiLexer, settings.Antlr4Parser, settings.Arena); if (!message) { // Skip empty statements continue; @@ -275,13 +292,17 @@ bool SplitQueryToStatements(const TString& query, TVector<TString>& statements, return true; } +bool SplitQueryToStatements(const TString& query, TVector<TString>& statements, NYql::TIssues& issues, + const NSQLTranslation::TTranslationSettings& settings) { + return SplitQueryToStatements(MakeAllLexers(), MakeAllParsers(), query, statements, issues, settings); +} + class TTranslator : public NSQLTranslation::ITranslator { public: TTranslator(const TLexers& lexers, const TParsers& parsers) : Lexers_(lexers) , Parsers_(parsers) { - Y_UNUSED(Parsers_); } NSQLTranslation::ILexer::TPtr MakeLexer(const NSQLTranslation::TTranslationSettings& settings) final { @@ -291,23 +312,22 @@ public: NYql::TAstParseResult TextToAst(const TString& query, const NSQLTranslation::TTranslationSettings& settings, NYql::TWarningRules* warningRules, NYql::TStmtParseInfo* stmtParseInfo) final { Y_UNUSED(stmtParseInfo); - return SqlToYql(query, settings, warningRules); + return SqlToYql(Lexers_, Parsers_, query, settings, warningRules); } google::protobuf::Message* TextToMessage(const TString& query, const TString& queryName, NYql::TIssues& issues, size_t maxErrors, const NSQLTranslation::TTranslationSettings& settings) final { - return SqlAST(query, queryName, issues, maxErrors, settings.AnsiLexer, settings.Antlr4Parser, - settings.TestAntlr4, settings.Arena); + return SqlAST(Parsers_, query, queryName, issues, maxErrors, settings.AnsiLexer, settings.Antlr4Parser, settings.Arena); } NYql::TAstParseResult TextAndMessageToAst(const TString& query, const google::protobuf::Message& protoAst, const NSQLTranslation::TSQLHints& hints, const NSQLTranslation::TTranslationSettings& settings) final { - return SqlASTToYql(query, protoAst, hints, settings); + return SqlASTToYql(Lexers_, Parsers_, query, protoAst, hints, settings); } TVector<NYql::TAstParseResult> TextToManyAst(const TString& query, const NSQLTranslation::TTranslationSettings& settings, NYql::TWarningRules* warningRules, TVector<NYql::TStmtParseInfo>* stmtParseInfo) final { - return SqlToAstStatements(query, settings, warningRules, stmtParseInfo); + return SqlToAstStatements(Lexers_, Parsers_, query, settings, warningRules, stmtParseInfo); } private: diff --git a/yql/essentials/sql/v1/sql.h b/yql/essentials/sql/v1/sql.h index b1aaf500012..5ded2321f32 100644 --- a/yql/essentials/sql/v1/sql.h +++ b/yql/essentials/sql/v1/sql.h @@ -20,16 +20,25 @@ namespace NSQLTranslation { namespace NSQLTranslationV1 { + //FIXME remove NYql::TAstParseResult SqlToYql(const TString& query, const NSQLTranslation::TTranslationSettings& settings, NYql::TWarningRules* warningRules = nullptr); + NYql::TAstParseResult SqlToYql(const TLexers& lexers, const TParsers& parsers, const TString& query, const NSQLTranslation::TTranslationSettings& settings, NYql::TWarningRules* warningRules = nullptr); + //FIXME remove NYql::TAstParseResult SqlASTToYql(const TString& query, const google::protobuf::Message& protoAst, const NSQLTranslation::TSQLHints& hints, const NSQLTranslation::TTranslationSettings& settings); + NYql::TAstParseResult SqlASTToYql(const TLexers& lexers, const TParsers& parsers, const TString& query, const google::protobuf::Message& protoAst, const NSQLTranslation::TSQLHints& hints, const NSQLTranslation::TTranslationSettings& settings); + //FIXME remove TVector<NYql::TAstParseResult> SqlToAstStatements(const TString& query, const NSQLTranslation::TTranslationSettings& settings, NYql::TWarningRules* warningRules, TVector<NYql::TStmtParseInfo>* stmtParseInfo = nullptr); + TVector<NYql::TAstParseResult> SqlToAstStatements(const TLexers& lexers, const TParsers& parsers, const TString& query, const NSQLTranslation::TTranslationSettings& settings, NYql::TWarningRules* warningRules, TVector<NYql::TStmtParseInfo>* stmtParseInfo = nullptr); bool NeedUseForAllStatements(const NSQLv1Generated::TRule_sql_stmt_core::AltCase& subquery); + //FIXME remove bool SplitQueryToStatements(const TString& query, TVector<TString>& statements, NYql::TIssues& issues, const NSQLTranslation::TTranslationSettings& settings); + bool SplitQueryToStatements(const TLexers& lexers, const TParsers& parsers, const TString& query, TVector<TString>& statements, NYql::TIssues& issues, + const NSQLTranslation::TTranslationSettings& settings); NSQLTranslation::TTranslatorPtr MakeTranslator(); diff --git a/yql/essentials/sql/v1/sql_translation.cpp b/yql/essentials/sql/v1/sql_translation.cpp index 1c608978869..f4416bb6c5c 100644 --- a/yql/essentials/sql/v1/sql_translation.cpp +++ b/yql/essentials/sql/v1/sql_translation.cpp @@ -64,8 +64,9 @@ bool RecreateContext( const TString queryName = "context recreation query"; const auto* ast = NSQLTranslationV1::SqlAST( + ctx.Parsers, recreationQuery, queryName, ctx.Issues, - settings.MaxErrors, settings.AnsiLexer, settings.Antlr4Parser, settings.TestAntlr4, settings.Arena + settings.MaxErrors, settings.AnsiLexer, settings.Antlr4Parser, settings.Arena ); if (!ast) { return false; @@ -83,7 +84,7 @@ TNodePtr BuildViewSelect( const TString& contextRecreationQuery ) { TIssues issues; - TContext context(parentContext.Settings, {}, issues, parentContext.Query); + TContext context(parentContext.Lexers, parentContext.Parsers, parentContext.Settings, {}, issues, parentContext.Query); if (!RecreateContext(context, context.Settings, contextRecreationQuery)) { parentContext.Issues.AddIssues(issues); return nullptr; diff --git a/yql/essentials/sql/v1/sql_ut.cpp b/yql/essentials/sql/v1/sql_ut.cpp index bdde5cddab9..e8d1f3527b7 100644 --- a/yql/essentials/sql/v1/sql_ut.cpp +++ b/yql/essentials/sql/v1/sql_ut.cpp @@ -5,6 +5,7 @@ #include <yql/essentials/providers/common/provider/yql_provider_names.h> #include <yql/essentials/sql/sql.h> #include <yql/essentials/sql/v1/sql.h> +#include <yql/essentials/sql/v1/lexer/antlr3/lexer.h> #include <util/generic/map.h> #include <library/cpp/testing/unittest/registar.h> @@ -18,7 +19,9 @@ using namespace NSQLTranslation; namespace { TParsedTokenList Tokenize(const TString& query) { - auto lexer = NSQLTranslationV1::MakeLexer(true, false); + NSQLTranslationV1::TLexers lexers; + lexers.Antlr3 = NSQLTranslationV1::MakeAntlr3LexerFactory(); + auto lexer = NSQLTranslationV1::MakeLexer(lexers, false, false); TParsedTokenList tokens; NYql::TIssues issues; UNIT_ASSERT_C(Tokenize(*lexer, query, "Query", tokens, issues, SQL_MAX_PARSER_ERRORS), diff --git a/yql/essentials/sql/v1/sql_ut_antlr4.cpp b/yql/essentials/sql/v1/sql_ut_antlr4.cpp index 2784f85f0b6..a150a7624ba 100644 --- a/yql/essentials/sql/v1/sql_ut_antlr4.cpp +++ b/yql/essentials/sql/v1/sql_ut_antlr4.cpp @@ -4,6 +4,7 @@ #include <yql/essentials/providers/common/provider/yql_provider_names.h> #include <yql/essentials/sql/sql.h> +#include <yql/essentials/sql/v1/lexer/antlr4/lexer.h> #include <util/generic/map.h> #include <library/cpp/testing/unittest/registar.h> @@ -17,7 +18,10 @@ using namespace NSQLTranslation; namespace { TParsedTokenList Tokenize(const TString& query) { - auto lexer = NSQLTranslationV1::MakeLexer(true, true); + NSQLTranslationV1::TLexers lexers; + lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory(); + + auto lexer = NSQLTranslationV1::MakeLexer(lexers, false, true); TParsedTokenList tokens; NYql::TIssues issues; UNIT_ASSERT_C(Tokenize(*lexer, query, "Query", tokens, issues, SQL_MAX_PARSER_ERRORS), diff --git a/yql/essentials/sql/v1/ut/ya.make b/yql/essentials/sql/v1/ut/ya.make index f7dddb9af18..6922d80243e 100644 --- a/yql/essentials/sql/v1/ut/ya.make +++ b/yql/essentials/sql/v1/ut/ya.make @@ -12,6 +12,7 @@ PEERDIR( yql/essentials/sql yql/essentials/sql/pg_dummy yql/essentials/sql/v1/format + yql/essentials/sql/v1/lexer/antlr3 ) TIMEOUT(300) diff --git a/yql/essentials/sql/v1/ut_antlr4/ya.make b/yql/essentials/sql/v1/ut_antlr4/ya.make index 211ebf7fe2e..427d68f825c 100644 --- a/yql/essentials/sql/v1/ut_antlr4/ya.make +++ b/yql/essentials/sql/v1/ut_antlr4/ya.make @@ -12,6 +12,7 @@ PEERDIR( yql/essentials/sql yql/essentials/sql/pg_dummy yql/essentials/sql/v1/format + yql/essentials/sql/v1/lexer/antlr4 ) TIMEOUT(300) |