diff options
author | Victor Smirnov <[email protected]> | 2025-03-19 13:03:56 +0300 |
---|---|---|
committer | robot-piglet <[email protected]> | 2025-03-19 13:18:48 +0300 |
commit | 28b29535ce7b21a3dde60b485c98f66f8c08f882 (patch) | |
tree | b831ec57225a22c3241a443eccc20af1053fc561 /yql/essentials/sql | |
parent | 6c4b9a2b45127baabf73cdcb6323f3e3e09e5440 (diff) |
YQL-19616 Implement ILexer via antlr_ast
- [x] Added `antlr_ast/antlr4` module and moved `TLexerTokensCollector4` there from `proto_ast/antlr4`.
- [x] Moved stuff around back and forth.
Ready for a review.
---
Co-authored-by: vityaman [[email protected]]
Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1128
commit_hash:e08785c3408ef813505bdc7511560e9536f4ab79
Diffstat (limited to 'yql/essentials/sql')
-rw-r--r-- | yql/essentials/sql/v0/context.cpp | 2 | ||||
-rw-r--r-- | yql/essentials/sql/v1/context.cpp | 2 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/antlr4/lexer.cpp | 6 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/antlr4_pure/lexer.cpp | 39 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h | 9 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/antlr4_pure/ya.make | 13 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.cpp | 39 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h | 9 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/antlr4_pure_ansi/ya.make | 13 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/lexer.cpp | 36 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/lexer.h | 4 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/lexer_ut.cpp | 10 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/ut/ya.make | 1 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/ya.make | 2 | ||||
-rw-r--r-- | yql/essentials/sql/v1/proto_parser/proto_parser.cpp | 4 | ||||
-rw-r--r-- | yql/essentials/sql/v1/proto_parser/proto_parser.h | 2 | ||||
-rw-r--r-- | yql/essentials/sql/v1/sql.cpp | 4 |
17 files changed, 172 insertions, 23 deletions
diff --git a/yql/essentials/sql/v0/context.cpp b/yql/essentials/sql/v0/context.cpp index 5d461ee5f33..01f2c1e71b3 100644 --- a/yql/essentials/sql/v0/context.cpp +++ b/yql/essentials/sql/v0/context.cpp @@ -133,7 +133,7 @@ IOutputStream& TContext::MakeIssue(ESeverity severity, TIssueCode code, NYql::TP } if (Settings.MaxErrors <= Issues.Size()) { - ythrow NProtoAST::TTooManyErrors() << "Too many issues"; + ythrow NAST::TTooManyErrors() << "Too many issues"; } } diff --git a/yql/essentials/sql/v1/context.cpp b/yql/essentials/sql/v1/context.cpp index b72c673b0ec..569ae375ebc 100644 --- a/yql/essentials/sql/v1/context.cpp +++ b/yql/essentials/sql/v1/context.cpp @@ -248,7 +248,7 @@ IOutputStream& TContext::MakeIssue(ESeverity severity, TIssueCode code, NYql::TP } if (Settings.MaxErrors <= Issues.Size()) { - ythrow NProtoAST::TTooManyErrors() << "Too many issues"; + ythrow NAST::TTooManyErrors() << "Too many issues"; } } diff --git a/yql/essentials/sql/v1/lexer/antlr4/lexer.cpp b/yql/essentials/sql/v1/lexer/antlr4/lexer.cpp index 5add4fc6bfb..e3f63c4b65a 100644 --- a/yql/essentials/sql/v1/lexer/antlr4/lexer.cpp +++ b/yql/essentials/sql/v1/lexer/antlr4/lexer.cpp @@ -1,8 +1,10 @@ #include "lexer.h" + #include <yql/essentials/parser/proto_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h> -#include <yql/essentials/public/issue/yql_issue.h> -#include <yql/essentials/parser/proto_ast/collect_issues/collect_issues.h> #include <yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h> +#include <yql/essentials/parser/proto_ast/collect_issues/collect_issues.h> + +#include <yql/essentials/public/issue/yql_issue.h> namespace NSQLTranslationV1 { diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.cpp b/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.cpp new file mode 100644 index 00000000000..d1cfb228eda --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.cpp @@ -0,0 +1,39 @@ +#include "lexer.h" + +#include <yql/essentials/parser/common/issue.h> +#include <yql/essentials/parser/common/antlr4/lexer_tokens_collector.h> + +#include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h> + +#include <yql/essentials/public/issue/yql_issue.h> + +namespace NSQLTranslationV1 { + + namespace { + + class TLexer: public NSQLTranslation::ILexer { + public: + bool Tokenize(const TString& query, const TString& queryName, const TTokenCallback& onNextToken, NYql::TIssues& issues, size_t maxErrors) final { + NYql::TIssues newIssues; + NSQLTranslation::TErrorCollectorOverIssues collector(newIssues, maxErrors, queryName); + NAST::TLexerTokensCollector4<NALADefaultAntlr4::SQLv1Antlr4Lexer> tokensCollector(query, queryName); + tokensCollector.CollectTokens(collector, onNextToken); + issues.AddIssues(newIssues); + return !AnyOf(newIssues.begin(), newIssues.end(), [](auto issue) { return issue.GetSeverity() == NYql::ESeverity::TSeverityIds_ESeverityId_S_ERROR; }); + } + }; + + class TFactory: public NSQLTranslation::ILexerFactory { + public: + THolder<NSQLTranslation::ILexer> MakeLexer() const final { + return MakeHolder<TLexer>(); + } + }; + + } // namespace + + NSQLTranslation::TLexerFactoryPtr MakeAntlr4PureLexerFactory() { + return MakeIntrusive<TFactory>(); + } + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h b/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h new file mode 100644 index 00000000000..21c4651daca --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h @@ -0,0 +1,9 @@ +#pragma once + +#include <yql/essentials/parser/lexer_common/lexer.h> + +namespace NSQLTranslationV1 { + + NSQLTranslation::TLexerFactoryPtr MakeAntlr4PureLexerFactory(); + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure/ya.make b/yql/essentials/sql/v1/lexer/antlr4_pure/ya.make new file mode 100644 index 00000000000..c638733caef --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr4_pure/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +PEERDIR( + yql/essentials/public/issue + yql/essentials/parser/common/antlr4 + yql/essentials/parser/antlr_ast/gen/v1_antlr4 +) + +SRCS( + lexer.cpp +) + +END() diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.cpp b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.cpp new file mode 100644 index 00000000000..b1df2ac506a --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.cpp @@ -0,0 +1,39 @@ +#include "lexer.h" + +#include <yql/essentials/parser/common/issue.h> +#include <yql/essentials/parser/common/antlr4/lexer_tokens_collector.h> + +#include <yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Lexer.h> + +#include <yql/essentials/public/issue/yql_issue.h> + +namespace NSQLTranslationV1 { + + namespace { + + class TLexer: public NSQLTranslation::ILexer { + public: + bool Tokenize(const TString& query, const TString& queryName, const TTokenCallback& onNextToken, NYql::TIssues& issues, size_t maxErrors) final { + NYql::TIssues newIssues; + NSQLTranslation::TErrorCollectorOverIssues collector(newIssues, maxErrors, queryName); + NAST::TLexerTokensCollector4<NALAAnsiAntlr4::SQLv1Antlr4Lexer> tokensCollector(query, queryName); + tokensCollector.CollectTokens(collector, onNextToken); + issues.AddIssues(newIssues); + return !AnyOf(newIssues.begin(), newIssues.end(), [](auto issue) { return issue.GetSeverity() == NYql::ESeverity::TSeverityIds_ESeverityId_S_ERROR; }); + } + }; + + class TFactory: public NSQLTranslation::ILexerFactory { + public: + THolder<NSQLTranslation::ILexer> MakeLexer() const final { + return MakeHolder<TLexer>(); + } + }; + + } // namespace + + NSQLTranslation::TLexerFactoryPtr MakeAntlr4PureAnsiLexerFactory() { + return MakeIntrusive<TFactory>(); + } + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h new file mode 100644 index 00000000000..232e3fec749 --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h @@ -0,0 +1,9 @@ +#pragma once + +#include <yql/essentials/parser/lexer_common/lexer.h> + +namespace NSQLTranslationV1 { + + NSQLTranslation::TLexerFactoryPtr MakeAntlr4PureAnsiLexerFactory(); + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/ya.make b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/ya.make new file mode 100644 index 00000000000..161e2b77f03 --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +PEERDIR( + yql/essentials/public/issue + yql/essentials/parser/common/antlr4 + yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4 +) + +SRCS( + lexer.cpp +) + +END() diff --git a/yql/essentials/sql/v1/lexer/lexer.cpp b/yql/essentials/sql/v1/lexer/lexer.cpp index 8c94fff7e1a..2b5da9ddd53 100644 --- a/yql/essentials/sql/v1/lexer/lexer.cpp +++ b/yql/essentials/sql/v1/lexer/lexer.cpp @@ -29,8 +29,8 @@ using NSQLTranslation::MakeDummyLexerFactory; class TV1Lexer : public ILexer { public: - explicit TV1Lexer(const TLexers& lexers, bool ansi, bool antlr4) - : Factory(GetFactory(lexers, ansi, antlr4)) + explicit TV1Lexer(const TLexers& lexers, bool ansi, bool antlr4, bool pure) + : Factory(GetFactory(lexers, ansi, antlr4, pure)) { } @@ -42,31 +42,41 @@ public: } private: - static NSQLTranslation::TLexerFactoryPtr GetFactory(const TLexers& lexers, bool ansi, bool antlr4) { - if (!ansi && !antlr4) { + static NSQLTranslation::TLexerFactoryPtr GetFactory(const TLexers& lexers, bool ansi, bool antlr4, bool pure = false) { + if (!ansi && !antlr4 && !pure) { if (lexers.Antlr3) { return lexers.Antlr3; } - return MakeDummyLexerFactory("antlr3"); - } else if (ansi && !antlr4) { + } else if (ansi && !antlr4 && !pure) { if (lexers.Antlr3Ansi) { return lexers.Antlr3Ansi; } - return MakeDummyLexerFactory("antlr3_ansi"); - } else if (!ansi && antlr4) { + } else if (!ansi && antlr4 && !pure) { if (lexers.Antlr4) { return lexers.Antlr4; } - return MakeDummyLexerFactory("antlr4"); - } else { + } else if (ansi && antlr4 && !pure) { if (lexers.Antlr4Ansi) { return lexers.Antlr4Ansi; } - return MakeDummyLexerFactory("antlr4_ansi"); + } else if (!ansi && antlr4 && pure) { + if (lexers.Antlr4Pure) { + return lexers.Antlr4Pure; + } + return MakeDummyLexerFactory("antlr4_pure"); + } else if (ansi && antlr4 && pure) { + if (lexers.Antlr4PureAnsi) { + return lexers.Antlr4PureAnsi; + } + return MakeDummyLexerFactory("antlr4_pure_ansi"); + } else if (!ansi && !antlr4 && pure) { + return MakeDummyLexerFactory("antlr3_pure"); + } else { + return MakeDummyLexerFactory("antlr3_pure_ansi"); } } @@ -76,8 +86,8 @@ private: } // namespace -NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool antlr4) { - return NSQLTranslation::ILexer::TPtr(new TV1Lexer(lexers, ansi, antlr4)); +NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool antlr4, bool pure) { + return NSQLTranslation::ILexer::TPtr(new TV1Lexer(lexers, ansi, antlr4, pure)); } bool IsProbablyKeyword(const NSQLTranslation::TParsedToken& token) { diff --git a/yql/essentials/sql/v1/lexer/lexer.h b/yql/essentials/sql/v1/lexer/lexer.h index 2a3af96055e..857681ae51f 100644 --- a/yql/essentials/sql/v1/lexer/lexer.h +++ b/yql/essentials/sql/v1/lexer/lexer.h @@ -9,9 +9,11 @@ struct TLexers { NSQLTranslation::TLexerFactoryPtr Antlr3Ansi; NSQLTranslation::TLexerFactoryPtr Antlr4; NSQLTranslation::TLexerFactoryPtr Antlr4Ansi; + NSQLTranslation::TLexerFactoryPtr Antlr4Pure; + NSQLTranslation::TLexerFactoryPtr Antlr4PureAnsi; }; -NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool antlr4); +NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool antlr4, bool pure = false); // "Probably" because YQL keyword can be an identifier // depending on a query context. For example diff --git a/yql/essentials/sql/v1/lexer/lexer_ut.cpp b/yql/essentials/sql/v1/lexer/lexer_ut.cpp index 2f0c8bb8e2b..3ad01f631b6 100644 --- a/yql/essentials/sql/v1/lexer/lexer_ut.cpp +++ b/yql/essentials/sql/v1/lexer/lexer_ut.cpp @@ -2,8 +2,10 @@ #include <yql/essentials/core/issue/yql_issue.h> #include <yql/essentials/sql/settings/translation_settings.h> + #include <yql/essentials/sql/v1/lexer/antlr3/lexer.h> #include <yql/essentials/sql/v1/lexer/antlr4/lexer.h> +#include <yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h> #include <library/cpp/testing/unittest/registar.h> @@ -79,16 +81,21 @@ Y_UNIT_TEST_SUITE(SQLv1Lexer) { NSQLTranslationV1::TLexers lexers; lexers.Antlr3 = NSQLTranslationV1::MakeAntlr3LexerFactory(); lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory(); + lexers.Antlr4Pure = NSQLTranslationV1::MakeAntlr4PureLexerFactory(); auto lexer3 = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ false); auto lexer4 = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ true); + auto lexer4p = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ true, /* pure = */ true); for (const auto& query : queriesUtf8) { auto [tokens3, issues3] = Tokenize(lexer3, query); auto [tokens4, issues4] = Tokenize(lexer4, query); + auto [tokens4p, issues4p] = Tokenize(lexer4p, query); AssertEquivialent(tokens3, tokens4); + AssertEquivialent(tokens3, tokens4p); UNIT_ASSERT(issues3.Empty()); UNIT_ASSERT(issues4.Empty()); + UNIT_ASSERT(issues4p.Empty()); } } @@ -160,13 +167,16 @@ Y_UNIT_TEST_SUITE(SQLv1Lexer) { auto lexer3 = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ false); auto lexer4 = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ true); + auto lexer4p = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ true, /* pure = */ true); for (const auto& query : InvalidQueries()) { auto issues3 = GetIssueMessages(lexer3, query); auto issues4 = GetIssueMessages(lexer4, query); + auto issues4p = GetIssueMessages(lexer4p, query); UNIT_ASSERT(!issues3.empty()); UNIT_ASSERT(!issues4.empty()); + UNIT_ASSERT(!issues4p.empty()); } } diff --git a/yql/essentials/sql/v1/lexer/ut/ya.make b/yql/essentials/sql/v1/lexer/ut/ya.make index 70503c127e8..c50c8cd7277 100644 --- a/yql/essentials/sql/v1/lexer/ut/ya.make +++ b/yql/essentials/sql/v1/lexer/ut/ya.make @@ -5,6 +5,7 @@ PEERDIR( yql/essentials/parser/lexer_common yql/essentials/sql/v1/lexer/antlr3 yql/essentials/sql/v1/lexer/antlr4 + yql/essentials/sql/v1/lexer/antlr4_pure ) SRCS( diff --git a/yql/essentials/sql/v1/lexer/ya.make b/yql/essentials/sql/v1/lexer/ya.make index 8a3f00d36e1..c38b56c9273 100644 --- a/yql/essentials/sql/v1/lexer/ya.make +++ b/yql/essentials/sql/v1/lexer/ya.make @@ -20,6 +20,8 @@ RECURSE( antlr3_ansi antlr4 antlr4_ansi + antlr4_pure + antlr4_pure_ansi ) RECURSE_FOR_TESTS( diff --git a/yql/essentials/sql/v1/proto_parser/proto_parser.cpp b/yql/essentials/sql/v1/proto_parser/proto_parser.cpp index 5651345215c..5fbef92ca52 100644 --- a/yql/essentials/sql/v1/proto_parser/proto_parser.cpp +++ b/yql/essentials/sql/v1/proto_parser/proto_parser.cpp @@ -27,7 +27,7 @@ namespace NSQLTranslationV1 { namespace { -void ReportError(NProtoAST::IErrorCollector& err, const TString& name) { +void ReportError(NAST::IErrorCollector& err, const TString& name) { err.Error(0, 0, TStringBuilder() << "Parser " << name << " is not supported"); } @@ -39,7 +39,7 @@ google::protobuf::Message* SqlAST(const TParsers& parsers, const TString& query, return SqlAST(parsers, query, queryName, collector, ansiLexer, anlr4Parser, arena); } -google::protobuf::Message* SqlAST(const TParsers& parsers, const TString& query, const TString& queryName, NProtoAST::IErrorCollector& err, +google::protobuf::Message* SqlAST(const TParsers& parsers, const TString& query, const TString& queryName, NAST::IErrorCollector& err, bool ansiLexer, bool anlr4Parser, google::protobuf::Arena* arena) { YQL_ENSURE(arena); #if defined(_tsan_enabled_) diff --git a/yql/essentials/sql/v1/proto_parser/proto_parser.h b/yql/essentials/sql/v1/proto_parser/proto_parser.h index b2002d875b8..39c41771410 100644 --- a/yql/essentials/sql/v1/proto_parser/proto_parser.h +++ b/yql/essentials/sql/v1/proto_parser/proto_parser.h @@ -23,5 +23,5 @@ namespace NSQLTranslationV1 { google::protobuf::Message* SqlAST(const TParsers& parsers, const TString& query, const TString& queryName, NYql::TIssues& err, size_t maxErrors, bool ansiLexer, bool antlr4Parser, google::protobuf::Arena* arena); google::protobuf::Message* SqlAST(const TParsers& parsers, const TString& query, const TString& queryName, - NProtoAST::IErrorCollector& err, bool ansiLexer, bool antlr4Parser, google::protobuf::Arena* arena); + NAST::IErrorCollector& err, bool ansiLexer, bool antlr4Parser, google::protobuf::Arena* arena); } // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql.cpp b/yql/essentials/sql/v1/sql.cpp index 7201c4b0693..3d8951cb83e 100644 --- a/yql/essentials/sql/v1/sql.cpp +++ b/yql/essentials/sql/v1/sql.cpp @@ -20,7 +20,7 @@ TAstNode* SqlASTToYql(const google::protobuf::Message& protoAst, TContext& ctx) if (node && node->Init(ctx, nullptr)) { return node->Translate(ctx); } - } catch (const NProtoAST::TTooManyErrors&) { + } catch (const NAST::TTooManyErrors&) { // do not add error issue, no room for it } @@ -34,7 +34,7 @@ TAstNode* SqlASTsToYqls(const std::vector<::NSQLv1Generated::TRule_sql_stmt_core if (node && node->Init(ctx, nullptr)) { return node->Translate(ctx); } - } catch (const NProtoAST::TTooManyErrors&) { + } catch (const NAST::TTooManyErrors&) { // do not add error issue, no room for it } |