diff options
author | Victor Smirnov <[email protected]> | 2025-03-19 13:03:56 +0300 |
---|---|---|
committer | robot-piglet <[email protected]> | 2025-03-19 13:18:48 +0300 |
commit | 28b29535ce7b21a3dde60b485c98f66f8c08f882 (patch) | |
tree | b831ec57225a22c3241a443eccc20af1053fc561 | |
parent | 6c4b9a2b45127baabf73cdcb6323f3e3e09e5440 (diff) |
YQL-19616 Implement ILexer via antlr_ast
- [x] Added `antlr_ast/antlr4` module and moved `TLexerTokensCollector4` there from `proto_ast/antlr4`.
- [x] Moved stuff around back and forth.
Ready for a review.
---
Co-authored-by: vityaman [[email protected]]
Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1128
commit_hash:e08785c3408ef813505bdc7511560e9536f4ab79
33 files changed, 440 insertions, 206 deletions
diff --git a/yql/essentials/parser/common/antlr4/error_listener.cpp b/yql/essentials/parser/common/antlr4/error_listener.cpp new file mode 100644 index 00000000000..8dfc582e22f --- /dev/null +++ b/yql/essentials/parser/common/antlr4/error_listener.cpp @@ -0,0 +1,19 @@ +#include "error_listener.h" + +namespace antlr4 { + + YqlErrorListener::YqlErrorListener(NAST::IErrorCollector* errors, bool* error) + : errors(errors) + , error(error) + { + } + + void YqlErrorListener::syntaxError( + Recognizer* /*recognizer*/, Token* /*offendingSymbol*/, + size_t line, size_t charPositionInLine, + const std::string& msg, std::exception_ptr /*e*/) { + *error = true; + errors->Error(line, charPositionInLine, msg.c_str()); + } + +} // namespace antlr4 diff --git a/yql/essentials/parser/common/antlr4/error_listener.h b/yql/essentials/parser/common/antlr4/error_listener.h new file mode 100644 index 00000000000..206651fac4b --- /dev/null +++ b/yql/essentials/parser/common/antlr4/error_listener.h @@ -0,0 +1,22 @@ +#pragma once + +#include <yql/essentials/parser/common/error.h> + +#include <contrib/libs/antlr4_cpp_runtime/src/antlr4-runtime.h> + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC YqlErrorListener: public BaseErrorListener { + NAST::IErrorCollector* errors; + bool* error; + + public: + YqlErrorListener(NAST::IErrorCollector* errors, bool* error); + + virtual void syntaxError( + Recognizer* recognizer, Token* offendingSymbol, + size_t line, size_t charPositionInLine, + const std::string& msg, std::exception_ptr e) override; + }; + +} // namespace antlr4 diff --git a/yql/essentials/parser/common/antlr4/lexer_tokens_collector.h b/yql/essentials/parser/common/antlr4/lexer_tokens_collector.h new file mode 100644 index 00000000000..2e5ef52f48e --- /dev/null +++ b/yql/essentials/parser/common/antlr4/lexer_tokens_collector.h @@ -0,0 +1,63 @@ +#pragma once + +#include "error_listener.h" + +#include <yql/essentials/parser/common/error.h> +#include <yql/essentials/parser/lexer_common/lexer.h> + +#include <contrib/libs/antlr4_cpp_runtime/src/antlr4-runtime.h> + +namespace NAST { + + template <typename TLexer> + class TLexerTokensCollector4 { + public: + TLexerTokensCollector4(TStringBuf data, const TString& queryName = "query") + : QueryName(queryName) + , InputStream(std::string(data)) + , Lexer(&InputStream) + { + } + + void CollectTokens(NAST::IErrorCollector& errors, const NSQLTranslation::ILexer::TTokenCallback& onNextToken) { + try { + bool error = false; + typename antlr4::YqlErrorListener listener(&errors, &error); + Lexer.removeErrorListeners(); + Lexer.addErrorListener(&listener); + + for (;;) { + auto token = Lexer.nextToken(); + auto type = token->getType(); + const bool isEOF = type == TLexer::EOF; + NSQLTranslation::TParsedToken last; + last.Name = GetTokenName(type); + last.Content = token->getText(); + last.Line = token->getLine(); + last.LinePos = token->getCharPositionInLine(); + onNextToken(std::move(last)); + if (isEOF) { + break; + } + } + } catch (const NAST::TTooManyErrors&) { + } catch (...) { + errors.Error(0, 0, CurrentExceptionMessage()); + } + } + + private: + TString GetTokenName(size_t type) const { + auto res = Lexer.getVocabulary().getSymbolicName(type); + if (res != "") { + return TString(res); + } + return TString(NAST::INVALID_TOKEN_NAME); + } + + TString QueryName; + antlr4::ANTLRInputStream InputStream; + TLexer Lexer; + }; + +} // namespace NAST diff --git a/yql/essentials/parser/common/antlr4/ya.make b/yql/essentials/parser/common/antlr4/ya.make new file mode 100644 index 00000000000..d74107fe119 --- /dev/null +++ b/yql/essentials/parser/common/antlr4/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +PEERDIR( + contrib/libs/antlr4_cpp_runtime + yql/essentials/parser/common +) + +SRCS( + error_listener.cpp +) + +END() diff --git a/yql/essentials/parser/common/error.cpp b/yql/essentials/parser/common/error.cpp new file mode 100644 index 00000000000..9954a037cb4 --- /dev/null +++ b/yql/essentials/parser/common/error.cpp @@ -0,0 +1,47 @@ +#include "error.h" + +namespace NAST { + + IErrorCollector::IErrorCollector(size_t maxErrors) + : MaxErrors(maxErrors) + , NumErrors(0) + { + } + + IErrorCollector::~IErrorCollector() + { + } + + void IErrorCollector::Error(ui32 line, ui32 col, const TString& message) { + if (NumErrors + 1 == MaxErrors) { + AddError(0, 0, "Too many errors"); + ++NumErrors; + } + + if (NumErrors >= MaxErrors) { + ythrow TTooManyErrors() << "Too many errors"; + } + + AddError(line, col, message); + ++NumErrors; + } + + TErrorOutput::TErrorOutput(IOutputStream& err, const TString& name, size_t maxErrors) + : IErrorCollector(maxErrors) + , Err(err) + , Name(name) + { + } + + TErrorOutput::~TErrorOutput() + { + } + + void TErrorOutput::AddError(ui32 line, ui32 col, const TString& message) { + if (!Name.empty()) { + Err << "Query " << Name << ": "; + } + Err << "Line " << line << " column " << col << " error: " << message; + } + +} // namespace NAST diff --git a/yql/essentials/parser/common/error.h b/yql/essentials/parser/common/error.h new file mode 100644 index 00000000000..281d745a5bf --- /dev/null +++ b/yql/essentials/parser/common/error.h @@ -0,0 +1,42 @@ +#pragma once + +#include <util/generic/yexception.h> +#include <util/generic/fwd.h> + +namespace NAST { + static const char* INVALID_TOKEN_NAME = "nothing"; + static const char* ABSENCE = " absence"; + + class TTooManyErrors: public yexception { + }; + + class IErrorCollector { + public: + explicit IErrorCollector(size_t maxErrors); + virtual ~IErrorCollector(); + + // throws TTooManyErrors + void Error(ui32 line, ui32 col, const TString& message); + + private: + virtual void AddError(ui32 line, ui32 col, const TString& message) = 0; + + protected: + const size_t MaxErrors; + size_t NumErrors; + }; + + class TErrorOutput: public IErrorCollector { + public: + TErrorOutput(IOutputStream& err, const TString& name, size_t maxErrors); + virtual ~TErrorOutput(); + + private: + void AddError(ui32 line, ui32 col, const TString& message) override; + + public: + IOutputStream& Err; + TString Name; + }; + +} // namespace NAST diff --git a/yql/essentials/parser/common/issue.h b/yql/essentials/parser/common/issue.h new file mode 100644 index 00000000000..5573adb03c3 --- /dev/null +++ b/yql/essentials/parser/common/issue.h @@ -0,0 +1,30 @@ +#pragma once + +#include "error.h" + +#include <yql/essentials/public/issue/yql_issue.h> + +#include <util/generic/string.h> + +namespace NSQLTranslation { + + class TErrorCollectorOverIssues: public NAST::IErrorCollector { + public: + TErrorCollectorOverIssues(NYql::TIssues& issues, size_t maxErrors, const TString& file) + : IErrorCollector(maxErrors) + , Issues_(issues) + , File_(file) + { + } + + private: + void AddError(ui32 line, ui32 col, const TString& message) override { + Issues_.AddIssue(NYql::TPosition(col, line, File_), message); + } + + private: + NYql::TIssues& Issues_; + const TString File_; + }; + +} // namespace NSQLTranslation diff --git a/yql/essentials/parser/common/ya.make b/yql/essentials/parser/common/ya.make new file mode 100644 index 00000000000..06a7e91363a --- /dev/null +++ b/yql/essentials/parser/common/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +PEERDIR( + yql/essentials/public/issue +) + +SRCS( + error.cpp +) + +END() diff --git a/yql/essentials/parser/proto_ast/antlr3/proto_ast_antlr3.h b/yql/essentials/parser/proto_ast/antlr3/proto_ast_antlr3.h index 22f40fd1fd9..e06d7216063 100644 --- a/yql/essentials/parser/proto_ast/antlr3/proto_ast_antlr3.h +++ b/yql/essentials/parser/proto_ast/antlr3/proto_ast_antlr3.h @@ -5,6 +5,7 @@ #include <contrib/libs/antlr3_cpp_runtime/include/antlr3.hpp> namespace NProtoAST { + using namespace NAST; template <typename TParser, typename TLexer> class TProtoASTBuilder3 { diff --git a/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h b/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h index 81973a400a1..0539cfccf01 100644 --- a/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h +++ b/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h @@ -1,5 +1,9 @@ #pragma once +#include <yql/essentials/parser/common/error.h> +#include <yql/essentials/parser/common/antlr4/error_listener.h> +#include <yql/essentials/parser/common/antlr4/lexer_tokens_collector.h> + #include <yql/essentials/parser/proto_ast/common.h> #ifdef ERROR @@ -7,19 +11,15 @@ #endif #include <contrib/libs/antlr4_cpp_runtime/src/antlr4-runtime.h> -namespace antlr4 { - class ANTLR4CPP_PUBLIC YqlErrorListener : public BaseErrorListener { - NProtoAST::IErrorCollector* errors; - bool* error; - public: - YqlErrorListener(NProtoAST::IErrorCollector* errors, bool* error); +namespace NProtoAST { + using namespace NAST; - virtual void syntaxError(Recognizer *recognizer, Token * offendingSymbol, size_t line, size_t charPositionInLine, - const std::string &msg, std::exception_ptr e) override; - }; -} + template <typename InputType> + void InvalidCharacter(IOutputStream& err, const InputType* input); + + template <typename TokenType> + inline void InvalidToken(IOutputStream& err, const TokenType* token); -namespace NProtoAST { template <> inline void InvalidToken<antlr4::Token>(IOutputStream& err, const antlr4::Token* token) { if (token) { @@ -76,56 +76,5 @@ namespace NProtoAST { TParser Parser; }; - template <typename TLexer> - class TLexerTokensCollector4 { - - public: - TLexerTokensCollector4(TStringBuf data, const TString& queryName = "query") - : QueryName(queryName) - , InputStream(std::string(data)) - , Lexer(&InputStream) - { - } - - void CollectTokens(IErrorCollector& errors, const NSQLTranslation::ILexer::TTokenCallback& onNextToken) { - try { - bool error = false; - typename antlr4::YqlErrorListener listener(&errors, &error); - Lexer.removeErrorListeners(); - Lexer.addErrorListener(&listener); - - for (;;) { - auto token = Lexer.nextToken(); - auto type = token->getType(); - const bool isEOF = type == TLexer::EOF; - NSQLTranslation::TParsedToken last; - last.Name = GetTokenName(type); - last.Content = token->getText(); - last.Line = token->getLine(); - last.LinePos = token->getCharPositionInLine(); - onNextToken(std::move(last)); - if (isEOF) { - break; - } - } - } catch (const TTooManyErrors&) { - } catch (...) { - errors.Error(0, 0, CurrentExceptionMessage()); - } - } - - private: - TString GetTokenName(size_t type) const { - auto res = Lexer.getVocabulary().getSymbolicName(type); - if (res != ""){ - return TString(res); - } - return TString(INVALID_TOKEN_NAME); - } - - TString QueryName; - antlr4::ANTLRInputStream InputStream; - TLexer Lexer; - }; } // namespace NProtoAST diff --git a/yql/essentials/parser/proto_ast/antlr4/ya.make b/yql/essentials/parser/proto_ast/antlr4/ya.make index c419174e177..18c68a249bc 100644 --- a/yql/essentials/parser/proto_ast/antlr4/ya.make +++ b/yql/essentials/parser/proto_ast/antlr4/ya.make @@ -1,6 +1,7 @@ LIBRARY() PEERDIR( + yql/essentials/parser/common/antlr4 yql/essentials/parser/proto_ast contrib/libs/antlr4_cpp_runtime ) diff --git a/yql/essentials/parser/proto_ast/collect_issues/collect_issues.h b/yql/essentials/parser/proto_ast/collect_issues/collect_issues.h index 8fd8be4b3d4..11ba0c86133 100644 --- a/yql/essentials/parser/proto_ast/collect_issues/collect_issues.h +++ b/yql/essentials/parser/proto_ast/collect_issues/collect_issues.h @@ -1,27 +1 @@ -#pragma once - -#include <yql/essentials/parser/proto_ast/common.h> -#include <yql/essentials/public/issue/yql_issue.h> - -namespace NSQLTranslation { - -class TErrorCollectorOverIssues : public NProtoAST::IErrorCollector { -public: - TErrorCollectorOverIssues(NYql::TIssues& issues, size_t maxErrors, const TString& file) - : IErrorCollector(maxErrors) - , Issues_(issues) - , File_(file) - { - } - -private: - void AddError(ui32 line, ui32 col, const TString& message) override { - Issues_.AddIssue(NYql::TPosition(col, line, File_), message); - } - -private: - NYql::TIssues& Issues_; - const TString File_; -}; - -} // namespace NSQLTranslation +#include <yql/essentials/parser/common/issue.h> diff --git a/yql/essentials/parser/proto_ast/collect_issues/ya.make b/yql/essentials/parser/proto_ast/collect_issues/ya.make index 26fac8cf892..e2e6b681dbb 100644 --- a/yql/essentials/parser/proto_ast/collect_issues/ya.make +++ b/yql/essentials/parser/proto_ast/collect_issues/ya.make @@ -1,12 +1,7 @@ LIBRARY() PEERDIR( - yql/essentials/public/issue - yql/essentials/parser/proto_ast -) - -SRCS( - collect_issues.h + yql/essentials/parser/common ) END() diff --git a/yql/essentials/parser/proto_ast/common.cpp b/yql/essentials/parser/proto_ast/common.cpp deleted file mode 100644 index a9d1b9d268e..00000000000 --- a/yql/essentials/parser/proto_ast/common.cpp +++ /dev/null @@ -1,47 +0,0 @@ -#include "common.h" - -namespace NProtoAST { - -IErrorCollector::IErrorCollector(size_t maxErrors) - : MaxErrors(maxErrors) - , NumErrors(0) -{ -} - -IErrorCollector::~IErrorCollector() -{ -} - -void IErrorCollector::Error(ui32 line, ui32 col, const TString& message) { - if (NumErrors + 1 == MaxErrors) { - AddError(0, 0, "Too many errors"); - ++NumErrors; - } - - if (NumErrors >= MaxErrors) { - ythrow TTooManyErrors() << "Too many errors"; - } - - AddError(line, col, message); - ++NumErrors; -} - -TErrorOutput::TErrorOutput(IOutputStream& err, const TString& name, size_t maxErrors) - : IErrorCollector(maxErrors) - , Err(err) - , Name(name) -{ -} - -TErrorOutput::~TErrorOutput() -{ -} - -void TErrorOutput::AddError(ui32 line, ui32 col, const TString& message) { - if (!Name.empty()) { - Err << "Query " << Name << ": "; - } - Err << "Line " << line << " column " << col << " error: " << message; -} - -} diff --git a/yql/essentials/parser/proto_ast/common.h b/yql/essentials/parser/proto_ast/common.h index 64017a5c449..be0b5ee0810 100644 --- a/yql/essentials/parser/proto_ast/common.h +++ b/yql/essentials/parser/proto_ast/common.h @@ -1,15 +1,14 @@ #pragma once #include <yql/essentials/parser/lexer_common/lexer.h> +#include <yql/essentials/parser/common/error.h> #include <google/protobuf/message.h> #include <util/generic/ptr.h> #include <util/generic/vector.h> #include <util/charset/utf8.h> -namespace NProtoAST { - static const char* INVALID_TOKEN_NAME = "nothing"; - static const char* ABSENCE = " absence"; +namespace NAST { template <typename InputType> void InvalidCharacter(IOutputStream& err, const InputType* input) { @@ -22,7 +21,6 @@ namespace NProtoAST { } } - template <typename TokenType> inline void InvalidToken(IOutputStream& err, const TokenType* token) { if (token) { @@ -34,46 +32,16 @@ namespace NProtoAST { } } - class TTooManyErrors : public yexception { - }; - - class IErrorCollector { - public: - explicit IErrorCollector(size_t maxErrors); - virtual ~IErrorCollector(); - - // throws TTooManyErrors - void Error(ui32 line, ui32 col, const TString& message); - - private: - virtual void AddError(ui32 line, ui32 col, const TString& message) = 0; - - protected: - const size_t MaxErrors; - size_t NumErrors; - }; - - class TErrorOutput: public IErrorCollector { - public: - TErrorOutput(IOutputStream& err, const TString& name, size_t maxErrors); - virtual ~TErrorOutput(); - - private: - void AddError(ui32 line, ui32 col, const TString& message) override; - - public: - IOutputStream& Err; - TString Name; - }; -} // namespace NProtoAST +} // namespace NAST namespace NSQLTranslation { + class IParser { public: virtual ~IParser() = default; virtual google::protobuf::Message* Parse( - const TString& query, const TString& queryName, NProtoAST::IErrorCollector& err, + const TString& query, const TString& queryName, NAST::IErrorCollector& err, google::protobuf::Arena* arena) = 0; }; @@ -85,4 +53,5 @@ namespace NSQLTranslation { }; using TParserFactoryPtr = TIntrusivePtr<IParserFactory>; + } // namespace NSQLTranslation diff --git a/yql/essentials/parser/proto_ast/ya.make b/yql/essentials/parser/proto_ast/ya.make index 6b7493ae893..6daae38e176 100644 --- a/yql/essentials/parser/proto_ast/ya.make +++ b/yql/essentials/parser/proto_ast/ya.make @@ -2,10 +2,7 @@ LIBRARY() PEERDIR( contrib/libs/protobuf -) - -SRCS( - common.cpp + yql/essentials/parser/common ) END() diff --git a/yql/essentials/sql/v0/context.cpp b/yql/essentials/sql/v0/context.cpp index 5d461ee5f33..01f2c1e71b3 100644 --- a/yql/essentials/sql/v0/context.cpp +++ b/yql/essentials/sql/v0/context.cpp @@ -133,7 +133,7 @@ IOutputStream& TContext::MakeIssue(ESeverity severity, TIssueCode code, NYql::TP } if (Settings.MaxErrors <= Issues.Size()) { - ythrow NProtoAST::TTooManyErrors() << "Too many issues"; + ythrow NAST::TTooManyErrors() << "Too many issues"; } } diff --git a/yql/essentials/sql/v1/context.cpp b/yql/essentials/sql/v1/context.cpp index b72c673b0ec..569ae375ebc 100644 --- a/yql/essentials/sql/v1/context.cpp +++ b/yql/essentials/sql/v1/context.cpp @@ -248,7 +248,7 @@ IOutputStream& TContext::MakeIssue(ESeverity severity, TIssueCode code, NYql::TP } if (Settings.MaxErrors <= Issues.Size()) { - ythrow NProtoAST::TTooManyErrors() << "Too many issues"; + ythrow NAST::TTooManyErrors() << "Too many issues"; } } diff --git a/yql/essentials/sql/v1/lexer/antlr4/lexer.cpp b/yql/essentials/sql/v1/lexer/antlr4/lexer.cpp index 5add4fc6bfb..e3f63c4b65a 100644 --- a/yql/essentials/sql/v1/lexer/antlr4/lexer.cpp +++ b/yql/essentials/sql/v1/lexer/antlr4/lexer.cpp @@ -1,8 +1,10 @@ #include "lexer.h" + #include <yql/essentials/parser/proto_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h> -#include <yql/essentials/public/issue/yql_issue.h> -#include <yql/essentials/parser/proto_ast/collect_issues/collect_issues.h> #include <yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h> +#include <yql/essentials/parser/proto_ast/collect_issues/collect_issues.h> + +#include <yql/essentials/public/issue/yql_issue.h> namespace NSQLTranslationV1 { diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.cpp b/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.cpp new file mode 100644 index 00000000000..d1cfb228eda --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.cpp @@ -0,0 +1,39 @@ +#include "lexer.h" + +#include <yql/essentials/parser/common/issue.h> +#include <yql/essentials/parser/common/antlr4/lexer_tokens_collector.h> + +#include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h> + +#include <yql/essentials/public/issue/yql_issue.h> + +namespace NSQLTranslationV1 { + + namespace { + + class TLexer: public NSQLTranslation::ILexer { + public: + bool Tokenize(const TString& query, const TString& queryName, const TTokenCallback& onNextToken, NYql::TIssues& issues, size_t maxErrors) final { + NYql::TIssues newIssues; + NSQLTranslation::TErrorCollectorOverIssues collector(newIssues, maxErrors, queryName); + NAST::TLexerTokensCollector4<NALADefaultAntlr4::SQLv1Antlr4Lexer> tokensCollector(query, queryName); + tokensCollector.CollectTokens(collector, onNextToken); + issues.AddIssues(newIssues); + return !AnyOf(newIssues.begin(), newIssues.end(), [](auto issue) { return issue.GetSeverity() == NYql::ESeverity::TSeverityIds_ESeverityId_S_ERROR; }); + } + }; + + class TFactory: public NSQLTranslation::ILexerFactory { + public: + THolder<NSQLTranslation::ILexer> MakeLexer() const final { + return MakeHolder<TLexer>(); + } + }; + + } // namespace + + NSQLTranslation::TLexerFactoryPtr MakeAntlr4PureLexerFactory() { + return MakeIntrusive<TFactory>(); + } + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h b/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h new file mode 100644 index 00000000000..21c4651daca --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h @@ -0,0 +1,9 @@ +#pragma once + +#include <yql/essentials/parser/lexer_common/lexer.h> + +namespace NSQLTranslationV1 { + + NSQLTranslation::TLexerFactoryPtr MakeAntlr4PureLexerFactory(); + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure/ya.make b/yql/essentials/sql/v1/lexer/antlr4_pure/ya.make new file mode 100644 index 00000000000..c638733caef --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr4_pure/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +PEERDIR( + yql/essentials/public/issue + yql/essentials/parser/common/antlr4 + yql/essentials/parser/antlr_ast/gen/v1_antlr4 +) + +SRCS( + lexer.cpp +) + +END() diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.cpp b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.cpp new file mode 100644 index 00000000000..b1df2ac506a --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.cpp @@ -0,0 +1,39 @@ +#include "lexer.h" + +#include <yql/essentials/parser/common/issue.h> +#include <yql/essentials/parser/common/antlr4/lexer_tokens_collector.h> + +#include <yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Lexer.h> + +#include <yql/essentials/public/issue/yql_issue.h> + +namespace NSQLTranslationV1 { + + namespace { + + class TLexer: public NSQLTranslation::ILexer { + public: + bool Tokenize(const TString& query, const TString& queryName, const TTokenCallback& onNextToken, NYql::TIssues& issues, size_t maxErrors) final { + NYql::TIssues newIssues; + NSQLTranslation::TErrorCollectorOverIssues collector(newIssues, maxErrors, queryName); + NAST::TLexerTokensCollector4<NALAAnsiAntlr4::SQLv1Antlr4Lexer> tokensCollector(query, queryName); + tokensCollector.CollectTokens(collector, onNextToken); + issues.AddIssues(newIssues); + return !AnyOf(newIssues.begin(), newIssues.end(), [](auto issue) { return issue.GetSeverity() == NYql::ESeverity::TSeverityIds_ESeverityId_S_ERROR; }); + } + }; + + class TFactory: public NSQLTranslation::ILexerFactory { + public: + THolder<NSQLTranslation::ILexer> MakeLexer() const final { + return MakeHolder<TLexer>(); + } + }; + + } // namespace + + NSQLTranslation::TLexerFactoryPtr MakeAntlr4PureAnsiLexerFactory() { + return MakeIntrusive<TFactory>(); + } + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h new file mode 100644 index 00000000000..232e3fec749 --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h @@ -0,0 +1,9 @@ +#pragma once + +#include <yql/essentials/parser/lexer_common/lexer.h> + +namespace NSQLTranslationV1 { + + NSQLTranslation::TLexerFactoryPtr MakeAntlr4PureAnsiLexerFactory(); + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/ya.make b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/ya.make new file mode 100644 index 00000000000..161e2b77f03 --- /dev/null +++ b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +PEERDIR( + yql/essentials/public/issue + yql/essentials/parser/common/antlr4 + yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4 +) + +SRCS( + lexer.cpp +) + +END() diff --git a/yql/essentials/sql/v1/lexer/lexer.cpp b/yql/essentials/sql/v1/lexer/lexer.cpp index 8c94fff7e1a..2b5da9ddd53 100644 --- a/yql/essentials/sql/v1/lexer/lexer.cpp +++ b/yql/essentials/sql/v1/lexer/lexer.cpp @@ -29,8 +29,8 @@ using NSQLTranslation::MakeDummyLexerFactory; class TV1Lexer : public ILexer { public: - explicit TV1Lexer(const TLexers& lexers, bool ansi, bool antlr4) - : Factory(GetFactory(lexers, ansi, antlr4)) + explicit TV1Lexer(const TLexers& lexers, bool ansi, bool antlr4, bool pure) + : Factory(GetFactory(lexers, ansi, antlr4, pure)) { } @@ -42,31 +42,41 @@ public: } private: - static NSQLTranslation::TLexerFactoryPtr GetFactory(const TLexers& lexers, bool ansi, bool antlr4) { - if (!ansi && !antlr4) { + static NSQLTranslation::TLexerFactoryPtr GetFactory(const TLexers& lexers, bool ansi, bool antlr4, bool pure = false) { + if (!ansi && !antlr4 && !pure) { if (lexers.Antlr3) { return lexers.Antlr3; } - return MakeDummyLexerFactory("antlr3"); - } else if (ansi && !antlr4) { + } else if (ansi && !antlr4 && !pure) { if (lexers.Antlr3Ansi) { return lexers.Antlr3Ansi; } - return MakeDummyLexerFactory("antlr3_ansi"); - } else if (!ansi && antlr4) { + } else if (!ansi && antlr4 && !pure) { if (lexers.Antlr4) { return lexers.Antlr4; } - return MakeDummyLexerFactory("antlr4"); - } else { + } else if (ansi && antlr4 && !pure) { if (lexers.Antlr4Ansi) { return lexers.Antlr4Ansi; } - return MakeDummyLexerFactory("antlr4_ansi"); + } else if (!ansi && antlr4 && pure) { + if (lexers.Antlr4Pure) { + return lexers.Antlr4Pure; + } + return MakeDummyLexerFactory("antlr4_pure"); + } else if (ansi && antlr4 && pure) { + if (lexers.Antlr4PureAnsi) { + return lexers.Antlr4PureAnsi; + } + return MakeDummyLexerFactory("antlr4_pure_ansi"); + } else if (!ansi && !antlr4 && pure) { + return MakeDummyLexerFactory("antlr3_pure"); + } else { + return MakeDummyLexerFactory("antlr3_pure_ansi"); } } @@ -76,8 +86,8 @@ private: } // namespace -NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool antlr4) { - return NSQLTranslation::ILexer::TPtr(new TV1Lexer(lexers, ansi, antlr4)); +NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool antlr4, bool pure) { + return NSQLTranslation::ILexer::TPtr(new TV1Lexer(lexers, ansi, antlr4, pure)); } bool IsProbablyKeyword(const NSQLTranslation::TParsedToken& token) { diff --git a/yql/essentials/sql/v1/lexer/lexer.h b/yql/essentials/sql/v1/lexer/lexer.h index 2a3af96055e..857681ae51f 100644 --- a/yql/essentials/sql/v1/lexer/lexer.h +++ b/yql/essentials/sql/v1/lexer/lexer.h @@ -9,9 +9,11 @@ struct TLexers { NSQLTranslation::TLexerFactoryPtr Antlr3Ansi; NSQLTranslation::TLexerFactoryPtr Antlr4; NSQLTranslation::TLexerFactoryPtr Antlr4Ansi; + NSQLTranslation::TLexerFactoryPtr Antlr4Pure; + NSQLTranslation::TLexerFactoryPtr Antlr4PureAnsi; }; -NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool antlr4); +NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool antlr4, bool pure = false); // "Probably" because YQL keyword can be an identifier // depending on a query context. For example diff --git a/yql/essentials/sql/v1/lexer/lexer_ut.cpp b/yql/essentials/sql/v1/lexer/lexer_ut.cpp index 2f0c8bb8e2b..3ad01f631b6 100644 --- a/yql/essentials/sql/v1/lexer/lexer_ut.cpp +++ b/yql/essentials/sql/v1/lexer/lexer_ut.cpp @@ -2,8 +2,10 @@ #include <yql/essentials/core/issue/yql_issue.h> #include <yql/essentials/sql/settings/translation_settings.h> + #include <yql/essentials/sql/v1/lexer/antlr3/lexer.h> #include <yql/essentials/sql/v1/lexer/antlr4/lexer.h> +#include <yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h> #include <library/cpp/testing/unittest/registar.h> @@ -79,16 +81,21 @@ Y_UNIT_TEST_SUITE(SQLv1Lexer) { NSQLTranslationV1::TLexers lexers; lexers.Antlr3 = NSQLTranslationV1::MakeAntlr3LexerFactory(); lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory(); + lexers.Antlr4Pure = NSQLTranslationV1::MakeAntlr4PureLexerFactory(); auto lexer3 = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ false); auto lexer4 = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ true); + auto lexer4p = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ true, /* pure = */ true); for (const auto& query : queriesUtf8) { auto [tokens3, issues3] = Tokenize(lexer3, query); auto [tokens4, issues4] = Tokenize(lexer4, query); + auto [tokens4p, issues4p] = Tokenize(lexer4p, query); AssertEquivialent(tokens3, tokens4); + AssertEquivialent(tokens3, tokens4p); UNIT_ASSERT(issues3.Empty()); UNIT_ASSERT(issues4.Empty()); + UNIT_ASSERT(issues4p.Empty()); } } @@ -160,13 +167,16 @@ Y_UNIT_TEST_SUITE(SQLv1Lexer) { auto lexer3 = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ false); auto lexer4 = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ true); + auto lexer4p = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ true, /* pure = */ true); for (const auto& query : InvalidQueries()) { auto issues3 = GetIssueMessages(lexer3, query); auto issues4 = GetIssueMessages(lexer4, query); + auto issues4p = GetIssueMessages(lexer4p, query); UNIT_ASSERT(!issues3.empty()); UNIT_ASSERT(!issues4.empty()); + UNIT_ASSERT(!issues4p.empty()); } } diff --git a/yql/essentials/sql/v1/lexer/ut/ya.make b/yql/essentials/sql/v1/lexer/ut/ya.make index 70503c127e8..c50c8cd7277 100644 --- a/yql/essentials/sql/v1/lexer/ut/ya.make +++ b/yql/essentials/sql/v1/lexer/ut/ya.make @@ -5,6 +5,7 @@ PEERDIR( yql/essentials/parser/lexer_common yql/essentials/sql/v1/lexer/antlr3 yql/essentials/sql/v1/lexer/antlr4 + yql/essentials/sql/v1/lexer/antlr4_pure ) SRCS( diff --git a/yql/essentials/sql/v1/lexer/ya.make b/yql/essentials/sql/v1/lexer/ya.make index 8a3f00d36e1..c38b56c9273 100644 --- a/yql/essentials/sql/v1/lexer/ya.make +++ b/yql/essentials/sql/v1/lexer/ya.make @@ -20,6 +20,8 @@ RECURSE( antlr3_ansi antlr4 antlr4_ansi + antlr4_pure + antlr4_pure_ansi ) RECURSE_FOR_TESTS( diff --git a/yql/essentials/sql/v1/proto_parser/proto_parser.cpp b/yql/essentials/sql/v1/proto_parser/proto_parser.cpp index 5651345215c..5fbef92ca52 100644 --- a/yql/essentials/sql/v1/proto_parser/proto_parser.cpp +++ b/yql/essentials/sql/v1/proto_parser/proto_parser.cpp @@ -27,7 +27,7 @@ namespace NSQLTranslationV1 { namespace { -void ReportError(NProtoAST::IErrorCollector& err, const TString& name) { +void ReportError(NAST::IErrorCollector& err, const TString& name) { err.Error(0, 0, TStringBuilder() << "Parser " << name << " is not supported"); } @@ -39,7 +39,7 @@ google::protobuf::Message* SqlAST(const TParsers& parsers, const TString& query, return SqlAST(parsers, query, queryName, collector, ansiLexer, anlr4Parser, arena); } -google::protobuf::Message* SqlAST(const TParsers& parsers, const TString& query, const TString& queryName, NProtoAST::IErrorCollector& err, +google::protobuf::Message* SqlAST(const TParsers& parsers, const TString& query, const TString& queryName, NAST::IErrorCollector& err, bool ansiLexer, bool anlr4Parser, google::protobuf::Arena* arena) { YQL_ENSURE(arena); #if defined(_tsan_enabled_) diff --git a/yql/essentials/sql/v1/proto_parser/proto_parser.h b/yql/essentials/sql/v1/proto_parser/proto_parser.h index b2002d875b8..39c41771410 100644 --- a/yql/essentials/sql/v1/proto_parser/proto_parser.h +++ b/yql/essentials/sql/v1/proto_parser/proto_parser.h @@ -23,5 +23,5 @@ namespace NSQLTranslationV1 { google::protobuf::Message* SqlAST(const TParsers& parsers, const TString& query, const TString& queryName, NYql::TIssues& err, size_t maxErrors, bool ansiLexer, bool antlr4Parser, google::protobuf::Arena* arena); google::protobuf::Message* SqlAST(const TParsers& parsers, const TString& query, const TString& queryName, - NProtoAST::IErrorCollector& err, bool ansiLexer, bool antlr4Parser, google::protobuf::Arena* arena); + NAST::IErrorCollector& err, bool ansiLexer, bool antlr4Parser, google::protobuf::Arena* arena); } // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql.cpp b/yql/essentials/sql/v1/sql.cpp index 7201c4b0693..3d8951cb83e 100644 --- a/yql/essentials/sql/v1/sql.cpp +++ b/yql/essentials/sql/v1/sql.cpp @@ -20,7 +20,7 @@ TAstNode* SqlASTToYql(const google::protobuf::Message& protoAst, TContext& ctx) if (node && node->Init(ctx, nullptr)) { return node->Translate(ctx); } - } catch (const NProtoAST::TTooManyErrors&) { + } catch (const NAST::TTooManyErrors&) { // do not add error issue, no room for it } @@ -34,7 +34,7 @@ TAstNode* SqlASTsToYqls(const std::vector<::NSQLv1Generated::TRule_sql_stmt_core if (node && node->Init(ctx, nullptr)) { return node->Translate(ctx); } - } catch (const NProtoAST::TTooManyErrors&) { + } catch (const NAST::TTooManyErrors&) { // do not add error issue, no room for it } |