diff options
author | vvvv <[email protected]> | 2025-02-18 14:49:48 +0300 |
---|---|---|
committer | vvvv <[email protected]> | 2025-02-18 16:08:18 +0300 |
commit | 1213d16b7fd20d4255d2ebb709a1745efbfeb91b (patch) | |
tree | a1cd11d96b5abf0a0ec287c76c4f3cfe9b32a86e /yql/essentials/sql/v1/lexer | |
parent | 408888e6801333da2d97af0b27a1c4da4448b9e0 (diff) |
Introduced lexer & parser interfaces
commit_hash:fee365c90a176dd33a967cee20994b21d530080c
Diffstat (limited to 'yql/essentials/sql/v1/lexer')
-rw-r--r-- | yql/essentials/sql/v1/lexer/antlr3/lexer.cpp | 40 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/antlr3/lexer.h | 8 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/antlr3/ya.make | 14 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/antlr3_ansi/lexer.cpp | 40 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/antlr3_ansi/lexer.h | 8 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/antlr3_ansi/ya.make | 14 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/antlr4/lexer.cpp | 36 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/antlr4/lexer.h | 8 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/antlr4/ya.make | 14 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/antlr4_ansi/lexer.cpp | 36 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/antlr4_ansi/lexer.h | 8 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/antlr4_ansi/ya.make | 14 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/lexer.cpp | 96 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/lexer.h | 13 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/ya.make | 17 |
15 files changed, 325 insertions, 41 deletions
diff --git a/yql/essentials/sql/v1/lexer/antlr3/lexer.cpp b/yql/essentials/sql/v1/lexer/antlr3/lexer.cpp new file mode 100644 index 00000000000..e2d23d1503d --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr3/lexer.cpp @@ -0,0 +1,40 @@ +#include "lexer.h" +#include <yql/essentials/parser/proto_ast/gen/v1/SQLv1Lexer.h> +#include <yql/essentials/public/issue/yql_issue.h> +#include <yql/essentials/parser/proto_ast/collect_issues/collect_issues.h> +#include <yql/essentials/parser/proto_ast/antlr3/proto_ast_antlr3.h> + +namespace NALPDefault { +extern ANTLR_UINT8 *SQLv1ParserTokenNames[]; +} + +namespace NSQLTranslationV1 { + +namespace { + +class TLexer: public NSQLTranslation::ILexer { +public: + bool Tokenize(const TString& query, const TString& queryName, const TTokenCallback& onNextToken, NYql::TIssues& issues, size_t maxErrors) final { + NYql::TIssues newIssues; + NSQLTranslation::TErrorCollectorOverIssues collector(newIssues, maxErrors, queryName); + NProtoAST::TLexerTokensCollector3<NALPDefault::SQLv1Lexer> tokensCollector(query, (const char**)NALPDefault::SQLv1ParserTokenNames, queryName); + tokensCollector.CollectTokens(collector, onNextToken); + issues.AddIssues(newIssues); + return !AnyOf(newIssues.begin(), newIssues.end(), [](auto issue) { return issue.GetSeverity() == NYql::ESeverity::TSeverityIds_ESeverityId_S_ERROR; }); + } +}; + +class TFactory: public NSQLTranslation::ILexerFactory { +public: + THolder<NSQLTranslation::ILexer> MakeLexer() const final { + return MakeHolder<TLexer>(); + } +}; + +} + +NSQLTranslation::TLexerFactoryPtr MakeAntlr3LexerFactory() { + return MakeIntrusive<TFactory>(); +} + +} diff --git a/yql/essentials/sql/v1/lexer/antlr3/lexer.h b/yql/essentials/sql/v1/lexer/antlr3/lexer.h new file mode 100644 index 00000000000..0c4fd35d0f6 --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr3/lexer.h @@ -0,0 +1,8 @@ +#pragma once +#include <yql/essentials/parser/lexer_common/lexer.h> + +namespace NSQLTranslationV1 { + +NSQLTranslation::TLexerFactoryPtr MakeAntlr3LexerFactory(); + +} diff --git a/yql/essentials/sql/v1/lexer/antlr3/ya.make b/yql/essentials/sql/v1/lexer/antlr3/ya.make new file mode 100644 index 00000000000..8fd1537311c --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr3/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +PEERDIR( + yql/essentials/parser/lexer_common + yql/essentials/public/issue + yql/essentials/parser/proto_ast/collect_issues + yql/essentials/parser/proto_ast/gen/v1 +) + +SRCS( + lexer.cpp +) + +END() diff --git a/yql/essentials/sql/v1/lexer/antlr3_ansi/lexer.cpp b/yql/essentials/sql/v1/lexer/antlr3_ansi/lexer.cpp new file mode 100644 index 00000000000..5b5c459f701 --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr3_ansi/lexer.cpp @@ -0,0 +1,40 @@ +#include "lexer.h" +#include <yql/essentials/parser/proto_ast/gen/v1_ansi/SQLv1Lexer.h> +#include <yql/essentials/public/issue/yql_issue.h> +#include <yql/essentials/parser/proto_ast/collect_issues/collect_issues.h> +#include <yql/essentials/parser/proto_ast/antlr3/proto_ast_antlr3.h> + +namespace NALPAnsi { +extern ANTLR_UINT8 *SQLv1ParserTokenNames[]; +} + +namespace NSQLTranslationV1 { + +namespace { + +class TLexer: public NSQLTranslation::ILexer { +public: + bool Tokenize(const TString& query, const TString& queryName, const TTokenCallback& onNextToken, NYql::TIssues& issues, size_t maxErrors) final { + NYql::TIssues newIssues; + NSQLTranslation::TErrorCollectorOverIssues collector(newIssues, maxErrors, queryName); + NProtoAST::TLexerTokensCollector3<NALPAnsi::SQLv1Lexer> tokensCollector(query, (const char**)NALPAnsi::SQLv1ParserTokenNames, queryName); + tokensCollector.CollectTokens(collector, onNextToken); + issues.AddIssues(newIssues); + return !AnyOf(newIssues.begin(), newIssues.end(), [](auto issue) { return issue.GetSeverity() == NYql::ESeverity::TSeverityIds_ESeverityId_S_ERROR; }); + } +}; + +class TFactory: public NSQLTranslation::ILexerFactory { +public: + THolder<NSQLTranslation::ILexer> MakeLexer() const final { + return MakeHolder<TLexer>(); + } +}; + +} + +NSQLTranslation::TLexerFactoryPtr MakeAntlr3AnsiLexerFactory() { + return MakeIntrusive<TFactory>(); +} + +} diff --git a/yql/essentials/sql/v1/lexer/antlr3_ansi/lexer.h b/yql/essentials/sql/v1/lexer/antlr3_ansi/lexer.h new file mode 100644 index 00000000000..c2347d82332 --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr3_ansi/lexer.h @@ -0,0 +1,8 @@ +#pragma once +#include <yql/essentials/parser/lexer_common/lexer.h> + +namespace NSQLTranslationV1 { + +NSQLTranslation::TLexerFactoryPtr MakeAntlr3AnsiLexerFactory(); + +} diff --git a/yql/essentials/sql/v1/lexer/antlr3_ansi/ya.make b/yql/essentials/sql/v1/lexer/antlr3_ansi/ya.make new file mode 100644 index 00000000000..ed34a4fc28c --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr3_ansi/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +PEERDIR( + yql/essentials/parser/lexer_common + yql/essentials/public/issue + yql/essentials/parser/proto_ast/collect_issues + yql/essentials/parser/proto_ast/gen/v1_ansi +) + +SRCS( + lexer.cpp +) + +END() diff --git a/yql/essentials/sql/v1/lexer/antlr4/lexer.cpp b/yql/essentials/sql/v1/lexer/antlr4/lexer.cpp new file mode 100644 index 00000000000..5add4fc6bfb --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr4/lexer.cpp @@ -0,0 +1,36 @@ +#include "lexer.h" +#include <yql/essentials/parser/proto_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h> +#include <yql/essentials/public/issue/yql_issue.h> +#include <yql/essentials/parser/proto_ast/collect_issues/collect_issues.h> +#include <yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h> + +namespace NSQLTranslationV1 { + +namespace { + +class TLexer: public NSQLTranslation::ILexer { +public: + bool Tokenize(const TString& query, const TString& queryName, const TTokenCallback& onNextToken, NYql::TIssues& issues, size_t maxErrors) final { + NYql::TIssues newIssues; + NSQLTranslation::TErrorCollectorOverIssues collector(newIssues, maxErrors, queryName); + NProtoAST::TLexerTokensCollector4<NALPDefaultAntlr4::SQLv1Antlr4Lexer> tokensCollector(query, queryName); + tokensCollector.CollectTokens(collector, onNextToken); + issues.AddIssues(newIssues); + return !AnyOf(newIssues.begin(), newIssues.end(), [](auto issue) { return issue.GetSeverity() == NYql::ESeverity::TSeverityIds_ESeverityId_S_ERROR; }); + } +}; + +class TFactory: public NSQLTranslation::ILexerFactory { +public: + THolder<NSQLTranslation::ILexer> MakeLexer() const final { + return MakeHolder<TLexer>(); + } +}; + +} + +NSQLTranslation::TLexerFactoryPtr MakeAntlr4LexerFactory() { + return MakeIntrusive<TFactory>(); +} + +} diff --git a/yql/essentials/sql/v1/lexer/antlr4/lexer.h b/yql/essentials/sql/v1/lexer/antlr4/lexer.h new file mode 100644 index 00000000000..89d598d018e --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr4/lexer.h @@ -0,0 +1,8 @@ +#pragma once +#include <yql/essentials/parser/lexer_common/lexer.h> + +namespace NSQLTranslationV1 { + +NSQLTranslation::TLexerFactoryPtr MakeAntlr4LexerFactory(); + +} diff --git a/yql/essentials/sql/v1/lexer/antlr4/ya.make b/yql/essentials/sql/v1/lexer/antlr4/ya.make new file mode 100644 index 00000000000..5b97daed5c6 --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr4/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +PEERDIR( + yql/essentials/parser/lexer_common + yql/essentials/public/issue + yql/essentials/parser/proto_ast/collect_issues + yql/essentials/parser/proto_ast/gen/v1_antlr4 +) + +SRCS( + lexer.cpp +) + +END() diff --git a/yql/essentials/sql/v1/lexer/antlr4_ansi/lexer.cpp b/yql/essentials/sql/v1/lexer/antlr4_ansi/lexer.cpp new file mode 100644 index 00000000000..ca5842ae984 --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr4_ansi/lexer.cpp @@ -0,0 +1,36 @@ +#include "lexer.h" +#include <yql/essentials/parser/proto_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Lexer.h> +#include <yql/essentials/public/issue/yql_issue.h> +#include <yql/essentials/parser/proto_ast/collect_issues/collect_issues.h> +#include <yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h> + +namespace NSQLTranslationV1 { + +namespace { + +class TLexer: public NSQLTranslation::ILexer { +public: + bool Tokenize(const TString& query, const TString& queryName, const TTokenCallback& onNextToken, NYql::TIssues& issues, size_t maxErrors) final { + NYql::TIssues newIssues; + NSQLTranslation::TErrorCollectorOverIssues collector(newIssues, maxErrors, queryName); + NProtoAST::TLexerTokensCollector4<NALPAnsiAntlr4::SQLv1Antlr4Lexer> tokensCollector(query, queryName); + tokensCollector.CollectTokens(collector, onNextToken); + issues.AddIssues(newIssues); + return !AnyOf(newIssues.begin(), newIssues.end(), [](auto issue) { return issue.GetSeverity() == NYql::ESeverity::TSeverityIds_ESeverityId_S_ERROR; }); + } +}; + +class TFactory: public NSQLTranslation::ILexerFactory { +public: + THolder<NSQLTranslation::ILexer> MakeLexer() const final { + return MakeHolder<TLexer>(); + } +}; + +} + +NSQLTranslation::TLexerFactoryPtr MakeAntlr4AnsiLexerFactory() { + return MakeIntrusive<TFactory>(); +} + +} diff --git a/yql/essentials/sql/v1/lexer/antlr4_ansi/lexer.h b/yql/essentials/sql/v1/lexer/antlr4_ansi/lexer.h new file mode 100644 index 00000000000..4aed14ef017 --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr4_ansi/lexer.h @@ -0,0 +1,8 @@ +#pragma once +#include <yql/essentials/parser/lexer_common/lexer.h> + +namespace NSQLTranslationV1 { + +NSQLTranslation::TLexerFactoryPtr MakeAntlr4AnsiLexerFactory(); + +} diff --git a/yql/essentials/sql/v1/lexer/antlr4_ansi/ya.make b/yql/essentials/sql/v1/lexer/antlr4_ansi/ya.make new file mode 100644 index 00000000000..292094bcd13 --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr4_ansi/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +PEERDIR( + yql/essentials/parser/lexer_common + yql/essentials/public/issue + yql/essentials/parser/proto_ast/collect_issues + yql/essentials/parser/proto_ast/gen/v1_ansi_antlr4 +) + +SRCS( + lexer.cpp +) + +END() diff --git a/yql/essentials/sql/v1/lexer/lexer.cpp b/yql/essentials/sql/v1/lexer/lexer.cpp index fb9ca54f358..d477580b042 100644 --- a/yql/essentials/sql/v1/lexer/lexer.cpp +++ b/yql/essentials/sql/v1/lexer/lexer.cpp @@ -1,14 +1,12 @@ #include "lexer.h" #include <yql/essentials/public/issue/yql_issue.h> -#include <yql/essentials/parser/proto_ast/collect_issues/collect_issues.h> -#include <yql/essentials/parser/proto_ast/antlr3/proto_ast_antlr3.h> -#include <yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h> -#include <yql/essentials/parser/proto_ast/gen/v1/SQLv1Lexer.h> -#include <yql/essentials/parser/proto_ast/gen/v1_ansi/SQLv1Lexer.h> -#include <yql/essentials/parser/proto_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h> -#include <yql/essentials/parser/proto_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Lexer.h> -#include <yql/essentials/sql/v1/sql.h> +#include <yql/essentials/parser/lexer_common/lexer.h> +#include <yql/essentials/sql/v1/lexer/antlr3/lexer.h> +#include <yql/essentials/sql/v1/lexer/antlr3_ansi/lexer.h> +#include <yql/essentials/sql/v1/lexer/antlr4/lexer.h> +#include <yql/essentials/sql/v1/lexer/antlr4_ansi/lexer.h> +#include <yql/essentials/sql/settings/translation_settings.h> #include <util/string/ascii.h> #include <util/string/builder.h> @@ -18,17 +16,17 @@ #include <util/system/mutex.h> #endif -namespace NALPDefault { -extern ANTLR_UINT8 *SQLv1ParserTokenNames[]; -} +namespace NSQLTranslationV1 { -namespace NALPAnsi { -extern ANTLR_UINT8 *SQLv1ParserTokenNames[]; +TLexers MakeAllLexers() { + return TLexers { + .Antlr3 = MakeAntlr3LexerFactory(), + .Antlr3Ansi = MakeAntlr3AnsiLexerFactory(), + .Antlr4 = MakeAntlr4LexerFactory(), + .Antlr4Ansi = MakeAntlr4AnsiLexerFactory() + }; } - -namespace NSQLTranslationV1 { - namespace { #if defined(_tsan_enabled_) @@ -36,47 +34,71 @@ TMutex SanitizerSQLTranslationMutex; #endif using NSQLTranslation::ILexer; +using NSQLTranslation::MakeDummyLexerFactory; class TV1Lexer : public ILexer { public: - explicit TV1Lexer(bool ansi, bool antlr4) - : Ansi(ansi), Antlr4(antlr4) + explicit TV1Lexer(const TLexers& lexers, bool ansi, bool antlr4) + : Factory(GetFactory(lexers, ansi, antlr4)) { } bool Tokenize(const TString& query, const TString& queryName, const TTokenCallback& onNextToken, NYql::TIssues& issues, size_t maxErrors) override { - NYql::TIssues newIssues; #if defined(_tsan_enabled_) TGuard<TMutex> grd(SanitizerSQLTranslationMutex); #endif - NSQLTranslation::TErrorCollectorOverIssues collector(newIssues, maxErrors, queryName); - if (Ansi && !Antlr4) { - NProtoAST::TLexerTokensCollector3<NALPAnsi::SQLv1Lexer> tokensCollector(query, (const char**)NALPAnsi::SQLv1ParserTokenNames, queryName); - tokensCollector.CollectTokens(collector, onNextToken); - } else if (!Ansi && !Antlr4) { - NProtoAST::TLexerTokensCollector3<NALPDefault::SQLv1Lexer> tokensCollector(query, (const char**)NALPDefault::SQLv1ParserTokenNames, queryName); - tokensCollector.CollectTokens(collector, onNextToken); - } else if (Ansi && Antlr4) { - NProtoAST::TLexerTokensCollector4<NALPAnsiAntlr4::SQLv1Antlr4Lexer> tokensCollector(query, queryName); - tokensCollector.CollectTokens(collector, onNextToken); + return Factory->MakeLexer()->Tokenize(query, queryName, onNextToken, issues, maxErrors); + } + +private: + static NSQLTranslation::TLexerFactoryPtr GetFactory(const TLexers& lexers, bool ansi, bool antlr4) { + if (!ansi && !antlr4) { + if (lexers.Antlr3) { + return lexers.Antlr3; + } + + if (lexers.Antlr4) { + return lexers.Antlr4; + } + + return MakeDummyLexerFactory("antlr3"); + } else if (ansi && !antlr4) { + if (lexers.Antlr3Ansi) { + return lexers.Antlr3Ansi; + } + + if (lexers.Antlr4Ansi) { + return lexers.Antlr4Ansi; + } + + return MakeDummyLexerFactory("antlr3_ansi"); + } else if (!ansi && antlr4) { + if (lexers.Antlr4) { + return lexers.Antlr4; + } + + return MakeDummyLexerFactory("antlr4"); } else { - NProtoAST::TLexerTokensCollector4<NALPDefaultAntlr4::SQLv1Antlr4Lexer> tokensCollector(query, queryName); - tokensCollector.CollectTokens(collector, onNextToken); - } + if (lexers.Antlr4Ansi) { + return lexers.Antlr4Ansi; + } - issues.AddIssues(newIssues); - return !AnyOf(newIssues.begin(), newIssues.end(), [](auto issue) { return issue.GetSeverity() == NYql::ESeverity::TSeverityIds_ESeverityId_S_ERROR; }); + return MakeDummyLexerFactory("antlr4_ansi"); + } } private: - const bool Ansi; - const bool Antlr4; + NSQLTranslation::TLexerFactoryPtr Factory; }; } // namespace NSQLTranslation::ILexer::TPtr MakeLexer(bool ansi, bool antlr4) { - return NSQLTranslation::ILexer::TPtr(new TV1Lexer(ansi, antlr4)); + return NSQLTranslation::ILexer::TPtr(new TV1Lexer(MakeAllLexers(), ansi, antlr4)); +} + +NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool antlr4) { + return NSQLTranslation::ILexer::TPtr(new TV1Lexer(lexers, ansi, antlr4)); } bool IsProbablyKeyword(const NSQLTranslation::TParsedToken& token) { diff --git a/yql/essentials/sql/v1/lexer/lexer.h b/yql/essentials/sql/v1/lexer/lexer.h index 3ce780686c6..0e94cb10bfa 100644 --- a/yql/essentials/sql/v1/lexer/lexer.h +++ b/yql/essentials/sql/v1/lexer/lexer.h @@ -4,8 +4,21 @@ namespace NSQLTranslationV1 { +struct TLexers { + NSQLTranslation::TLexerFactoryPtr Antlr3; + NSQLTranslation::TLexerFactoryPtr Antlr3Ansi; + NSQLTranslation::TLexerFactoryPtr Antlr4; + NSQLTranslation::TLexerFactoryPtr Antlr4Ansi; +}; + +//FIXME remove +TLexers MakeAllLexers(); + +//FIXME remove NSQLTranslation::ILexer::TPtr MakeLexer(bool ansi, bool antlr4); +NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool antlr4); + // "Probably" because YQL keyword can be an identifier // depending on a query context. For example // in SELECT * FROM group - group is an identifier, but diff --git a/yql/essentials/sql/v1/lexer/ya.make b/yql/essentials/sql/v1/lexer/ya.make index cd5eea1b700..f32e79a472c 100644 --- a/yql/essentials/sql/v1/lexer/ya.make +++ b/yql/essentials/sql/v1/lexer/ya.make @@ -2,10 +2,11 @@ LIBRARY() PEERDIR( yql/essentials/core/issue/protos - yql/essentials/parser/proto_ast/gen/v1 - yql/essentials/parser/proto_ast/gen/v1_ansi - yql/essentials/parser/proto_ast/gen/v1_antlr4 - yql/essentials/parser/proto_ast/gen/v1_ansi_antlr4 + yql/essentials/sql/settings + yql/essentials/sql/v1/lexer/antlr3 + yql/essentials/sql/v1/lexer/antlr3_ansi + yql/essentials/sql/v1/lexer/antlr4 + yql/essentials/sql/v1/lexer/antlr4_ansi ) SRCS( @@ -18,6 +19,14 @@ SUPPRESSIONS( END() +RECURSE( + antlr3 + antlr3_ansi + antlr4 + antlr4_ansi +) + RECURSE_FOR_TESTS( ut ) + |