summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Smirnov <[email protected]>2025-03-19 13:03:56 +0300
committerrobot-piglet <[email protected]>2025-03-19 13:18:48 +0300
commit28b29535ce7b21a3dde60b485c98f66f8c08f882 (patch)
treeb831ec57225a22c3241a443eccc20af1053fc561
parent6c4b9a2b45127baabf73cdcb6323f3e3e09e5440 (diff)
YQL-19616 Implement ILexer via antlr_ast
- [x] Added `antlr_ast/antlr4` module and moved `TLexerTokensCollector4` there from `proto_ast/antlr4`. - [x] Moved stuff around back and forth. Ready for a review. --- Co-authored-by: vityaman [[email protected]] Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1128 commit_hash:e08785c3408ef813505bdc7511560e9536f4ab79
-rw-r--r--yql/essentials/parser/common/antlr4/error_listener.cpp19
-rw-r--r--yql/essentials/parser/common/antlr4/error_listener.h22
-rw-r--r--yql/essentials/parser/common/antlr4/lexer_tokens_collector.h63
-rw-r--r--yql/essentials/parser/common/antlr4/ya.make12
-rw-r--r--yql/essentials/parser/common/error.cpp47
-rw-r--r--yql/essentials/parser/common/error.h42
-rw-r--r--yql/essentials/parser/common/issue.h30
-rw-r--r--yql/essentials/parser/common/ya.make11
-rw-r--r--yql/essentials/parser/proto_ast/antlr3/proto_ast_antlr3.h1
-rw-r--r--yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h73
-rw-r--r--yql/essentials/parser/proto_ast/antlr4/ya.make1
-rw-r--r--yql/essentials/parser/proto_ast/collect_issues/collect_issues.h28
-rw-r--r--yql/essentials/parser/proto_ast/collect_issues/ya.make7
-rw-r--r--yql/essentials/parser/proto_ast/common.cpp47
-rw-r--r--yql/essentials/parser/proto_ast/common.h43
-rw-r--r--yql/essentials/parser/proto_ast/ya.make5
-rw-r--r--yql/essentials/sql/v0/context.cpp2
-rw-r--r--yql/essentials/sql/v1/context.cpp2
-rw-r--r--yql/essentials/sql/v1/lexer/antlr4/lexer.cpp6
-rw-r--r--yql/essentials/sql/v1/lexer/antlr4_pure/lexer.cpp39
-rw-r--r--yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h9
-rw-r--r--yql/essentials/sql/v1/lexer/antlr4_pure/ya.make13
-rw-r--r--yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.cpp39
-rw-r--r--yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h9
-rw-r--r--yql/essentials/sql/v1/lexer/antlr4_pure_ansi/ya.make13
-rw-r--r--yql/essentials/sql/v1/lexer/lexer.cpp36
-rw-r--r--yql/essentials/sql/v1/lexer/lexer.h4
-rw-r--r--yql/essentials/sql/v1/lexer/lexer_ut.cpp10
-rw-r--r--yql/essentials/sql/v1/lexer/ut/ya.make1
-rw-r--r--yql/essentials/sql/v1/lexer/ya.make2
-rw-r--r--yql/essentials/sql/v1/proto_parser/proto_parser.cpp4
-rw-r--r--yql/essentials/sql/v1/proto_parser/proto_parser.h2
-rw-r--r--yql/essentials/sql/v1/sql.cpp4
33 files changed, 440 insertions, 206 deletions
diff --git a/yql/essentials/parser/common/antlr4/error_listener.cpp b/yql/essentials/parser/common/antlr4/error_listener.cpp
new file mode 100644
index 00000000000..8dfc582e22f
--- /dev/null
+++ b/yql/essentials/parser/common/antlr4/error_listener.cpp
@@ -0,0 +1,19 @@
+#include "error_listener.h"
+
+namespace antlr4 {
+
+ YqlErrorListener::YqlErrorListener(NAST::IErrorCollector* errors, bool* error)
+ : errors(errors)
+ , error(error)
+ {
+ }
+
+ void YqlErrorListener::syntaxError(
+ Recognizer* /*recognizer*/, Token* /*offendingSymbol*/,
+ size_t line, size_t charPositionInLine,
+ const std::string& msg, std::exception_ptr /*e*/) {
+ *error = true;
+ errors->Error(line, charPositionInLine, msg.c_str());
+ }
+
+} // namespace antlr4
diff --git a/yql/essentials/parser/common/antlr4/error_listener.h b/yql/essentials/parser/common/antlr4/error_listener.h
new file mode 100644
index 00000000000..206651fac4b
--- /dev/null
+++ b/yql/essentials/parser/common/antlr4/error_listener.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include <yql/essentials/parser/common/error.h>
+
+#include <contrib/libs/antlr4_cpp_runtime/src/antlr4-runtime.h>
+
+namespace antlr4 {
+
+ class ANTLR4CPP_PUBLIC YqlErrorListener: public BaseErrorListener {
+ NAST::IErrorCollector* errors;
+ bool* error;
+
+ public:
+ YqlErrorListener(NAST::IErrorCollector* errors, bool* error);
+
+ virtual void syntaxError(
+ Recognizer* recognizer, Token* offendingSymbol,
+ size_t line, size_t charPositionInLine,
+ const std::string& msg, std::exception_ptr e) override;
+ };
+
+} // namespace antlr4
diff --git a/yql/essentials/parser/common/antlr4/lexer_tokens_collector.h b/yql/essentials/parser/common/antlr4/lexer_tokens_collector.h
new file mode 100644
index 00000000000..2e5ef52f48e
--- /dev/null
+++ b/yql/essentials/parser/common/antlr4/lexer_tokens_collector.h
@@ -0,0 +1,63 @@
+#pragma once
+
+#include "error_listener.h"
+
+#include <yql/essentials/parser/common/error.h>
+#include <yql/essentials/parser/lexer_common/lexer.h>
+
+#include <contrib/libs/antlr4_cpp_runtime/src/antlr4-runtime.h>
+
+namespace NAST {
+
+ template <typename TLexer>
+ class TLexerTokensCollector4 {
+ public:
+ TLexerTokensCollector4(TStringBuf data, const TString& queryName = "query")
+ : QueryName(queryName)
+ , InputStream(std::string(data))
+ , Lexer(&InputStream)
+ {
+ }
+
+ void CollectTokens(NAST::IErrorCollector& errors, const NSQLTranslation::ILexer::TTokenCallback& onNextToken) {
+ try {
+ bool error = false;
+ typename antlr4::YqlErrorListener listener(&errors, &error);
+ Lexer.removeErrorListeners();
+ Lexer.addErrorListener(&listener);
+
+ for (;;) {
+ auto token = Lexer.nextToken();
+ auto type = token->getType();
+ const bool isEOF = type == TLexer::EOF;
+ NSQLTranslation::TParsedToken last;
+ last.Name = GetTokenName(type);
+ last.Content = token->getText();
+ last.Line = token->getLine();
+ last.LinePos = token->getCharPositionInLine();
+ onNextToken(std::move(last));
+ if (isEOF) {
+ break;
+ }
+ }
+ } catch (const NAST::TTooManyErrors&) {
+ } catch (...) {
+ errors.Error(0, 0, CurrentExceptionMessage());
+ }
+ }
+
+ private:
+ TString GetTokenName(size_t type) const {
+ auto res = Lexer.getVocabulary().getSymbolicName(type);
+ if (res != "") {
+ return TString(res);
+ }
+ return TString(NAST::INVALID_TOKEN_NAME);
+ }
+
+ TString QueryName;
+ antlr4::ANTLRInputStream InputStream;
+ TLexer Lexer;
+ };
+
+} // namespace NAST
diff --git a/yql/essentials/parser/common/antlr4/ya.make b/yql/essentials/parser/common/antlr4/ya.make
new file mode 100644
index 00000000000..d74107fe119
--- /dev/null
+++ b/yql/essentials/parser/common/antlr4/ya.make
@@ -0,0 +1,12 @@
+LIBRARY()
+
+PEERDIR(
+ contrib/libs/antlr4_cpp_runtime
+ yql/essentials/parser/common
+)
+
+SRCS(
+ error_listener.cpp
+)
+
+END()
diff --git a/yql/essentials/parser/common/error.cpp b/yql/essentials/parser/common/error.cpp
new file mode 100644
index 00000000000..9954a037cb4
--- /dev/null
+++ b/yql/essentials/parser/common/error.cpp
@@ -0,0 +1,47 @@
+#include "error.h"
+
+namespace NAST {
+
+ IErrorCollector::IErrorCollector(size_t maxErrors)
+ : MaxErrors(maxErrors)
+ , NumErrors(0)
+ {
+ }
+
+ IErrorCollector::~IErrorCollector()
+ {
+ }
+
+ void IErrorCollector::Error(ui32 line, ui32 col, const TString& message) {
+ if (NumErrors + 1 == MaxErrors) {
+ AddError(0, 0, "Too many errors");
+ ++NumErrors;
+ }
+
+ if (NumErrors >= MaxErrors) {
+ ythrow TTooManyErrors() << "Too many errors";
+ }
+
+ AddError(line, col, message);
+ ++NumErrors;
+ }
+
+ TErrorOutput::TErrorOutput(IOutputStream& err, const TString& name, size_t maxErrors)
+ : IErrorCollector(maxErrors)
+ , Err(err)
+ , Name(name)
+ {
+ }
+
+ TErrorOutput::~TErrorOutput()
+ {
+ }
+
+ void TErrorOutput::AddError(ui32 line, ui32 col, const TString& message) {
+ if (!Name.empty()) {
+ Err << "Query " << Name << ": ";
+ }
+ Err << "Line " << line << " column " << col << " error: " << message;
+ }
+
+} // namespace NAST
diff --git a/yql/essentials/parser/common/error.h b/yql/essentials/parser/common/error.h
new file mode 100644
index 00000000000..281d745a5bf
--- /dev/null
+++ b/yql/essentials/parser/common/error.h
@@ -0,0 +1,42 @@
+#pragma once
+
+#include <util/generic/yexception.h>
+#include <util/generic/fwd.h>
+
+namespace NAST {
+ static const char* INVALID_TOKEN_NAME = "nothing";
+ static const char* ABSENCE = " absence";
+
+ class TTooManyErrors: public yexception {
+ };
+
+ class IErrorCollector {
+ public:
+ explicit IErrorCollector(size_t maxErrors);
+ virtual ~IErrorCollector();
+
+ // throws TTooManyErrors
+ void Error(ui32 line, ui32 col, const TString& message);
+
+ private:
+ virtual void AddError(ui32 line, ui32 col, const TString& message) = 0;
+
+ protected:
+ const size_t MaxErrors;
+ size_t NumErrors;
+ };
+
+ class TErrorOutput: public IErrorCollector {
+ public:
+ TErrorOutput(IOutputStream& err, const TString& name, size_t maxErrors);
+ virtual ~TErrorOutput();
+
+ private:
+ void AddError(ui32 line, ui32 col, const TString& message) override;
+
+ public:
+ IOutputStream& Err;
+ TString Name;
+ };
+
+} // namespace NAST
diff --git a/yql/essentials/parser/common/issue.h b/yql/essentials/parser/common/issue.h
new file mode 100644
index 00000000000..5573adb03c3
--- /dev/null
+++ b/yql/essentials/parser/common/issue.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#include "error.h"
+
+#include <yql/essentials/public/issue/yql_issue.h>
+
+#include <util/generic/string.h>
+
+namespace NSQLTranslation {
+
+ class TErrorCollectorOverIssues: public NAST::IErrorCollector {
+ public:
+ TErrorCollectorOverIssues(NYql::TIssues& issues, size_t maxErrors, const TString& file)
+ : IErrorCollector(maxErrors)
+ , Issues_(issues)
+ , File_(file)
+ {
+ }
+
+ private:
+ void AddError(ui32 line, ui32 col, const TString& message) override {
+ Issues_.AddIssue(NYql::TPosition(col, line, File_), message);
+ }
+
+ private:
+ NYql::TIssues& Issues_;
+ const TString File_;
+ };
+
+} // namespace NSQLTranslation
diff --git a/yql/essentials/parser/common/ya.make b/yql/essentials/parser/common/ya.make
new file mode 100644
index 00000000000..06a7e91363a
--- /dev/null
+++ b/yql/essentials/parser/common/ya.make
@@ -0,0 +1,11 @@
+LIBRARY()
+
+PEERDIR(
+ yql/essentials/public/issue
+)
+
+SRCS(
+ error.cpp
+)
+
+END()
diff --git a/yql/essentials/parser/proto_ast/antlr3/proto_ast_antlr3.h b/yql/essentials/parser/proto_ast/antlr3/proto_ast_antlr3.h
index 22f40fd1fd9..e06d7216063 100644
--- a/yql/essentials/parser/proto_ast/antlr3/proto_ast_antlr3.h
+++ b/yql/essentials/parser/proto_ast/antlr3/proto_ast_antlr3.h
@@ -5,6 +5,7 @@
#include <contrib/libs/antlr3_cpp_runtime/include/antlr3.hpp>
namespace NProtoAST {
+ using namespace NAST;
template <typename TParser, typename TLexer>
class TProtoASTBuilder3 {
diff --git a/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h b/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h
index 81973a400a1..0539cfccf01 100644
--- a/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h
+++ b/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h
@@ -1,5 +1,9 @@
#pragma once
+#include <yql/essentials/parser/common/error.h>
+#include <yql/essentials/parser/common/antlr4/error_listener.h>
+#include <yql/essentials/parser/common/antlr4/lexer_tokens_collector.h>
+
#include <yql/essentials/parser/proto_ast/common.h>
#ifdef ERROR
@@ -7,19 +11,15 @@
#endif
#include <contrib/libs/antlr4_cpp_runtime/src/antlr4-runtime.h>
-namespace antlr4 {
- class ANTLR4CPP_PUBLIC YqlErrorListener : public BaseErrorListener {
- NProtoAST::IErrorCollector* errors;
- bool* error;
- public:
- YqlErrorListener(NProtoAST::IErrorCollector* errors, bool* error);
+namespace NProtoAST {
+ using namespace NAST;
- virtual void syntaxError(Recognizer *recognizer, Token * offendingSymbol, size_t line, size_t charPositionInLine,
- const std::string &msg, std::exception_ptr e) override;
- };
-}
+ template <typename InputType>
+ void InvalidCharacter(IOutputStream& err, const InputType* input);
+
+ template <typename TokenType>
+ inline void InvalidToken(IOutputStream& err, const TokenType* token);
-namespace NProtoAST {
template <>
inline void InvalidToken<antlr4::Token>(IOutputStream& err, const antlr4::Token* token) {
if (token) {
@@ -76,56 +76,5 @@ namespace NProtoAST {
TParser Parser;
};
- template <typename TLexer>
- class TLexerTokensCollector4 {
-
- public:
- TLexerTokensCollector4(TStringBuf data, const TString& queryName = "query")
- : QueryName(queryName)
- , InputStream(std::string(data))
- , Lexer(&InputStream)
- {
- }
-
- void CollectTokens(IErrorCollector& errors, const NSQLTranslation::ILexer::TTokenCallback& onNextToken) {
- try {
- bool error = false;
- typename antlr4::YqlErrorListener listener(&errors, &error);
- Lexer.removeErrorListeners();
- Lexer.addErrorListener(&listener);
-
- for (;;) {
- auto token = Lexer.nextToken();
- auto type = token->getType();
- const bool isEOF = type == TLexer::EOF;
- NSQLTranslation::TParsedToken last;
- last.Name = GetTokenName(type);
- last.Content = token->getText();
- last.Line = token->getLine();
- last.LinePos = token->getCharPositionInLine();
- onNextToken(std::move(last));
- if (isEOF) {
- break;
- }
- }
- } catch (const TTooManyErrors&) {
- } catch (...) {
- errors.Error(0, 0, CurrentExceptionMessage());
- }
- }
-
- private:
- TString GetTokenName(size_t type) const {
- auto res = Lexer.getVocabulary().getSymbolicName(type);
- if (res != ""){
- return TString(res);
- }
- return TString(INVALID_TOKEN_NAME);
- }
-
- TString QueryName;
- antlr4::ANTLRInputStream InputStream;
- TLexer Lexer;
- };
} // namespace NProtoAST
diff --git a/yql/essentials/parser/proto_ast/antlr4/ya.make b/yql/essentials/parser/proto_ast/antlr4/ya.make
index c419174e177..18c68a249bc 100644
--- a/yql/essentials/parser/proto_ast/antlr4/ya.make
+++ b/yql/essentials/parser/proto_ast/antlr4/ya.make
@@ -1,6 +1,7 @@
LIBRARY()
PEERDIR(
+ yql/essentials/parser/common/antlr4
yql/essentials/parser/proto_ast
contrib/libs/antlr4_cpp_runtime
)
diff --git a/yql/essentials/parser/proto_ast/collect_issues/collect_issues.h b/yql/essentials/parser/proto_ast/collect_issues/collect_issues.h
index 8fd8be4b3d4..11ba0c86133 100644
--- a/yql/essentials/parser/proto_ast/collect_issues/collect_issues.h
+++ b/yql/essentials/parser/proto_ast/collect_issues/collect_issues.h
@@ -1,27 +1 @@
-#pragma once
-
-#include <yql/essentials/parser/proto_ast/common.h>
-#include <yql/essentials/public/issue/yql_issue.h>
-
-namespace NSQLTranslation {
-
-class TErrorCollectorOverIssues : public NProtoAST::IErrorCollector {
-public:
- TErrorCollectorOverIssues(NYql::TIssues& issues, size_t maxErrors, const TString& file)
- : IErrorCollector(maxErrors)
- , Issues_(issues)
- , File_(file)
- {
- }
-
-private:
- void AddError(ui32 line, ui32 col, const TString& message) override {
- Issues_.AddIssue(NYql::TPosition(col, line, File_), message);
- }
-
-private:
- NYql::TIssues& Issues_;
- const TString File_;
-};
-
-} // namespace NSQLTranslation
+#include <yql/essentials/parser/common/issue.h>
diff --git a/yql/essentials/parser/proto_ast/collect_issues/ya.make b/yql/essentials/parser/proto_ast/collect_issues/ya.make
index 26fac8cf892..e2e6b681dbb 100644
--- a/yql/essentials/parser/proto_ast/collect_issues/ya.make
+++ b/yql/essentials/parser/proto_ast/collect_issues/ya.make
@@ -1,12 +1,7 @@
LIBRARY()
PEERDIR(
- yql/essentials/public/issue
- yql/essentials/parser/proto_ast
-)
-
-SRCS(
- collect_issues.h
+ yql/essentials/parser/common
)
END()
diff --git a/yql/essentials/parser/proto_ast/common.cpp b/yql/essentials/parser/proto_ast/common.cpp
deleted file mode 100644
index a9d1b9d268e..00000000000
--- a/yql/essentials/parser/proto_ast/common.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-#include "common.h"
-
-namespace NProtoAST {
-
-IErrorCollector::IErrorCollector(size_t maxErrors)
- : MaxErrors(maxErrors)
- , NumErrors(0)
-{
-}
-
-IErrorCollector::~IErrorCollector()
-{
-}
-
-void IErrorCollector::Error(ui32 line, ui32 col, const TString& message) {
- if (NumErrors + 1 == MaxErrors) {
- AddError(0, 0, "Too many errors");
- ++NumErrors;
- }
-
- if (NumErrors >= MaxErrors) {
- ythrow TTooManyErrors() << "Too many errors";
- }
-
- AddError(line, col, message);
- ++NumErrors;
-}
-
-TErrorOutput::TErrorOutput(IOutputStream& err, const TString& name, size_t maxErrors)
- : IErrorCollector(maxErrors)
- , Err(err)
- , Name(name)
-{
-}
-
-TErrorOutput::~TErrorOutput()
-{
-}
-
-void TErrorOutput::AddError(ui32 line, ui32 col, const TString& message) {
- if (!Name.empty()) {
- Err << "Query " << Name << ": ";
- }
- Err << "Line " << line << " column " << col << " error: " << message;
-}
-
-}
diff --git a/yql/essentials/parser/proto_ast/common.h b/yql/essentials/parser/proto_ast/common.h
index 64017a5c449..be0b5ee0810 100644
--- a/yql/essentials/parser/proto_ast/common.h
+++ b/yql/essentials/parser/proto_ast/common.h
@@ -1,15 +1,14 @@
#pragma once
#include <yql/essentials/parser/lexer_common/lexer.h>
+#include <yql/essentials/parser/common/error.h>
#include <google/protobuf/message.h>
#include <util/generic/ptr.h>
#include <util/generic/vector.h>
#include <util/charset/utf8.h>
-namespace NProtoAST {
- static const char* INVALID_TOKEN_NAME = "nothing";
- static const char* ABSENCE = " absence";
+namespace NAST {
template <typename InputType>
void InvalidCharacter(IOutputStream& err, const InputType* input) {
@@ -22,7 +21,6 @@ namespace NProtoAST {
}
}
-
template <typename TokenType>
inline void InvalidToken(IOutputStream& err, const TokenType* token) {
if (token) {
@@ -34,46 +32,16 @@ namespace NProtoAST {
}
}
- class TTooManyErrors : public yexception {
- };
-
- class IErrorCollector {
- public:
- explicit IErrorCollector(size_t maxErrors);
- virtual ~IErrorCollector();
-
- // throws TTooManyErrors
- void Error(ui32 line, ui32 col, const TString& message);
-
- private:
- virtual void AddError(ui32 line, ui32 col, const TString& message) = 0;
-
- protected:
- const size_t MaxErrors;
- size_t NumErrors;
- };
-
- class TErrorOutput: public IErrorCollector {
- public:
- TErrorOutput(IOutputStream& err, const TString& name, size_t maxErrors);
- virtual ~TErrorOutput();
-
- private:
- void AddError(ui32 line, ui32 col, const TString& message) override;
-
- public:
- IOutputStream& Err;
- TString Name;
- };
-} // namespace NProtoAST
+} // namespace NAST
namespace NSQLTranslation {
+
class IParser {
public:
virtual ~IParser() = default;
virtual google::protobuf::Message* Parse(
- const TString& query, const TString& queryName, NProtoAST::IErrorCollector& err,
+ const TString& query, const TString& queryName, NAST::IErrorCollector& err,
google::protobuf::Arena* arena) = 0;
};
@@ -85,4 +53,5 @@ namespace NSQLTranslation {
};
using TParserFactoryPtr = TIntrusivePtr<IParserFactory>;
+
} // namespace NSQLTranslation
diff --git a/yql/essentials/parser/proto_ast/ya.make b/yql/essentials/parser/proto_ast/ya.make
index 6b7493ae893..6daae38e176 100644
--- a/yql/essentials/parser/proto_ast/ya.make
+++ b/yql/essentials/parser/proto_ast/ya.make
@@ -2,10 +2,7 @@ LIBRARY()
PEERDIR(
contrib/libs/protobuf
-)
-
-SRCS(
- common.cpp
+ yql/essentials/parser/common
)
END()
diff --git a/yql/essentials/sql/v0/context.cpp b/yql/essentials/sql/v0/context.cpp
index 5d461ee5f33..01f2c1e71b3 100644
--- a/yql/essentials/sql/v0/context.cpp
+++ b/yql/essentials/sql/v0/context.cpp
@@ -133,7 +133,7 @@ IOutputStream& TContext::MakeIssue(ESeverity severity, TIssueCode code, NYql::TP
}
if (Settings.MaxErrors <= Issues.Size()) {
- ythrow NProtoAST::TTooManyErrors() << "Too many issues";
+ ythrow NAST::TTooManyErrors() << "Too many issues";
}
}
diff --git a/yql/essentials/sql/v1/context.cpp b/yql/essentials/sql/v1/context.cpp
index b72c673b0ec..569ae375ebc 100644
--- a/yql/essentials/sql/v1/context.cpp
+++ b/yql/essentials/sql/v1/context.cpp
@@ -248,7 +248,7 @@ IOutputStream& TContext::MakeIssue(ESeverity severity, TIssueCode code, NYql::TP
}
if (Settings.MaxErrors <= Issues.Size()) {
- ythrow NProtoAST::TTooManyErrors() << "Too many issues";
+ ythrow NAST::TTooManyErrors() << "Too many issues";
}
}
diff --git a/yql/essentials/sql/v1/lexer/antlr4/lexer.cpp b/yql/essentials/sql/v1/lexer/antlr4/lexer.cpp
index 5add4fc6bfb..e3f63c4b65a 100644
--- a/yql/essentials/sql/v1/lexer/antlr4/lexer.cpp
+++ b/yql/essentials/sql/v1/lexer/antlr4/lexer.cpp
@@ -1,8 +1,10 @@
#include "lexer.h"
+
#include <yql/essentials/parser/proto_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h>
-#include <yql/essentials/public/issue/yql_issue.h>
-#include <yql/essentials/parser/proto_ast/collect_issues/collect_issues.h>
#include <yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h>
+#include <yql/essentials/parser/proto_ast/collect_issues/collect_issues.h>
+
+#include <yql/essentials/public/issue/yql_issue.h>
namespace NSQLTranslationV1 {
diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.cpp b/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.cpp
new file mode 100644
index 00000000000..d1cfb228eda
--- /dev/null
+++ b/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.cpp
@@ -0,0 +1,39 @@
+#include "lexer.h"
+
+#include <yql/essentials/parser/common/issue.h>
+#include <yql/essentials/parser/common/antlr4/lexer_tokens_collector.h>
+
+#include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h>
+
+#include <yql/essentials/public/issue/yql_issue.h>
+
+namespace NSQLTranslationV1 {
+
+ namespace {
+
+ class TLexer: public NSQLTranslation::ILexer {
+ public:
+ bool Tokenize(const TString& query, const TString& queryName, const TTokenCallback& onNextToken, NYql::TIssues& issues, size_t maxErrors) final {
+ NYql::TIssues newIssues;
+ NSQLTranslation::TErrorCollectorOverIssues collector(newIssues, maxErrors, queryName);
+ NAST::TLexerTokensCollector4<NALADefaultAntlr4::SQLv1Antlr4Lexer> tokensCollector(query, queryName);
+ tokensCollector.CollectTokens(collector, onNextToken);
+ issues.AddIssues(newIssues);
+ return !AnyOf(newIssues.begin(), newIssues.end(), [](auto issue) { return issue.GetSeverity() == NYql::ESeverity::TSeverityIds_ESeverityId_S_ERROR; });
+ }
+ };
+
+ class TFactory: public NSQLTranslation::ILexerFactory {
+ public:
+ THolder<NSQLTranslation::ILexer> MakeLexer() const final {
+ return MakeHolder<TLexer>();
+ }
+ };
+
+ } // namespace
+
+ NSQLTranslation::TLexerFactoryPtr MakeAntlr4PureLexerFactory() {
+ return MakeIntrusive<TFactory>();
+ }
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h b/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h
new file mode 100644
index 00000000000..21c4651daca
--- /dev/null
+++ b/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <yql/essentials/parser/lexer_common/lexer.h>
+
+namespace NSQLTranslationV1 {
+
+ NSQLTranslation::TLexerFactoryPtr MakeAntlr4PureLexerFactory();
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure/ya.make b/yql/essentials/sql/v1/lexer/antlr4_pure/ya.make
new file mode 100644
index 00000000000..c638733caef
--- /dev/null
+++ b/yql/essentials/sql/v1/lexer/antlr4_pure/ya.make
@@ -0,0 +1,13 @@
+LIBRARY()
+
+PEERDIR(
+ yql/essentials/public/issue
+ yql/essentials/parser/common/antlr4
+ yql/essentials/parser/antlr_ast/gen/v1_antlr4
+)
+
+SRCS(
+ lexer.cpp
+)
+
+END()
diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.cpp b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.cpp
new file mode 100644
index 00000000000..b1df2ac506a
--- /dev/null
+++ b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.cpp
@@ -0,0 +1,39 @@
+#include "lexer.h"
+
+#include <yql/essentials/parser/common/issue.h>
+#include <yql/essentials/parser/common/antlr4/lexer_tokens_collector.h>
+
+#include <yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Lexer.h>
+
+#include <yql/essentials/public/issue/yql_issue.h>
+
+namespace NSQLTranslationV1 {
+
+ namespace {
+
+ class TLexer: public NSQLTranslation::ILexer {
+ public:
+ bool Tokenize(const TString& query, const TString& queryName, const TTokenCallback& onNextToken, NYql::TIssues& issues, size_t maxErrors) final {
+ NYql::TIssues newIssues;
+ NSQLTranslation::TErrorCollectorOverIssues collector(newIssues, maxErrors, queryName);
+ NAST::TLexerTokensCollector4<NALAAnsiAntlr4::SQLv1Antlr4Lexer> tokensCollector(query, queryName);
+ tokensCollector.CollectTokens(collector, onNextToken);
+ issues.AddIssues(newIssues);
+ return !AnyOf(newIssues.begin(), newIssues.end(), [](auto issue) { return issue.GetSeverity() == NYql::ESeverity::TSeverityIds_ESeverityId_S_ERROR; });
+ }
+ };
+
+ class TFactory: public NSQLTranslation::ILexerFactory {
+ public:
+ THolder<NSQLTranslation::ILexer> MakeLexer() const final {
+ return MakeHolder<TLexer>();
+ }
+ };
+
+ } // namespace
+
+ NSQLTranslation::TLexerFactoryPtr MakeAntlr4PureAnsiLexerFactory() {
+ return MakeIntrusive<TFactory>();
+ }
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h
new file mode 100644
index 00000000000..232e3fec749
--- /dev/null
+++ b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <yql/essentials/parser/lexer_common/lexer.h>
+
+namespace NSQLTranslationV1 {
+
+ NSQLTranslation::TLexerFactoryPtr MakeAntlr4PureAnsiLexerFactory();
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/ya.make b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/ya.make
new file mode 100644
index 00000000000..161e2b77f03
--- /dev/null
+++ b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/ya.make
@@ -0,0 +1,13 @@
+LIBRARY()
+
+PEERDIR(
+ yql/essentials/public/issue
+ yql/essentials/parser/common/antlr4
+ yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4
+)
+
+SRCS(
+ lexer.cpp
+)
+
+END()
diff --git a/yql/essentials/sql/v1/lexer/lexer.cpp b/yql/essentials/sql/v1/lexer/lexer.cpp
index 8c94fff7e1a..2b5da9ddd53 100644
--- a/yql/essentials/sql/v1/lexer/lexer.cpp
+++ b/yql/essentials/sql/v1/lexer/lexer.cpp
@@ -29,8 +29,8 @@ using NSQLTranslation::MakeDummyLexerFactory;
class TV1Lexer : public ILexer {
public:
- explicit TV1Lexer(const TLexers& lexers, bool ansi, bool antlr4)
- : Factory(GetFactory(lexers, ansi, antlr4))
+ explicit TV1Lexer(const TLexers& lexers, bool ansi, bool antlr4, bool pure)
+ : Factory(GetFactory(lexers, ansi, antlr4, pure))
{
}
@@ -42,31 +42,41 @@ public:
}
private:
- static NSQLTranslation::TLexerFactoryPtr GetFactory(const TLexers& lexers, bool ansi, bool antlr4) {
- if (!ansi && !antlr4) {
+ static NSQLTranslation::TLexerFactoryPtr GetFactory(const TLexers& lexers, bool ansi, bool antlr4, bool pure = false) {
+ if (!ansi && !antlr4 && !pure) {
if (lexers.Antlr3) {
return lexers.Antlr3;
}
-
return MakeDummyLexerFactory("antlr3");
- } else if (ansi && !antlr4) {
+ } else if (ansi && !antlr4 && !pure) {
if (lexers.Antlr3Ansi) {
return lexers.Antlr3Ansi;
}
-
return MakeDummyLexerFactory("antlr3_ansi");
- } else if (!ansi && antlr4) {
+ } else if (!ansi && antlr4 && !pure) {
if (lexers.Antlr4) {
return lexers.Antlr4;
}
-
return MakeDummyLexerFactory("antlr4");
- } else {
+ } else if (ansi && antlr4 && !pure) {
if (lexers.Antlr4Ansi) {
return lexers.Antlr4Ansi;
}
-
return MakeDummyLexerFactory("antlr4_ansi");
+ } else if (!ansi && antlr4 && pure) {
+ if (lexers.Antlr4Pure) {
+ return lexers.Antlr4Pure;
+ }
+ return MakeDummyLexerFactory("antlr4_pure");
+ } else if (ansi && antlr4 && pure) {
+ if (lexers.Antlr4PureAnsi) {
+ return lexers.Antlr4PureAnsi;
+ }
+ return MakeDummyLexerFactory("antlr4_pure_ansi");
+ } else if (!ansi && !antlr4 && pure) {
+ return MakeDummyLexerFactory("antlr3_pure");
+ } else {
+ return MakeDummyLexerFactory("antlr3_pure_ansi");
}
}
@@ -76,8 +86,8 @@ private:
} // namespace
-NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool antlr4) {
- return NSQLTranslation::ILexer::TPtr(new TV1Lexer(lexers, ansi, antlr4));
+NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool antlr4, bool pure) {
+ return NSQLTranslation::ILexer::TPtr(new TV1Lexer(lexers, ansi, antlr4, pure));
}
bool IsProbablyKeyword(const NSQLTranslation::TParsedToken& token) {
diff --git a/yql/essentials/sql/v1/lexer/lexer.h b/yql/essentials/sql/v1/lexer/lexer.h
index 2a3af96055e..857681ae51f 100644
--- a/yql/essentials/sql/v1/lexer/lexer.h
+++ b/yql/essentials/sql/v1/lexer/lexer.h
@@ -9,9 +9,11 @@ struct TLexers {
NSQLTranslation::TLexerFactoryPtr Antlr3Ansi;
NSQLTranslation::TLexerFactoryPtr Antlr4;
NSQLTranslation::TLexerFactoryPtr Antlr4Ansi;
+ NSQLTranslation::TLexerFactoryPtr Antlr4Pure;
+ NSQLTranslation::TLexerFactoryPtr Antlr4PureAnsi;
};
-NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool antlr4);
+NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool antlr4, bool pure = false);
// "Probably" because YQL keyword can be an identifier
// depending on a query context. For example
diff --git a/yql/essentials/sql/v1/lexer/lexer_ut.cpp b/yql/essentials/sql/v1/lexer/lexer_ut.cpp
index 2f0c8bb8e2b..3ad01f631b6 100644
--- a/yql/essentials/sql/v1/lexer/lexer_ut.cpp
+++ b/yql/essentials/sql/v1/lexer/lexer_ut.cpp
@@ -2,8 +2,10 @@
#include <yql/essentials/core/issue/yql_issue.h>
#include <yql/essentials/sql/settings/translation_settings.h>
+
#include <yql/essentials/sql/v1/lexer/antlr3/lexer.h>
#include <yql/essentials/sql/v1/lexer/antlr4/lexer.h>
+#include <yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h>
#include <library/cpp/testing/unittest/registar.h>
@@ -79,16 +81,21 @@ Y_UNIT_TEST_SUITE(SQLv1Lexer) {
NSQLTranslationV1::TLexers lexers;
lexers.Antlr3 = NSQLTranslationV1::MakeAntlr3LexerFactory();
lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory();
+ lexers.Antlr4Pure = NSQLTranslationV1::MakeAntlr4PureLexerFactory();
auto lexer3 = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ false);
auto lexer4 = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ true);
+ auto lexer4p = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ true, /* pure = */ true);
for (const auto& query : queriesUtf8) {
auto [tokens3, issues3] = Tokenize(lexer3, query);
auto [tokens4, issues4] = Tokenize(lexer4, query);
+ auto [tokens4p, issues4p] = Tokenize(lexer4p, query);
AssertEquivialent(tokens3, tokens4);
+ AssertEquivialent(tokens3, tokens4p);
UNIT_ASSERT(issues3.Empty());
UNIT_ASSERT(issues4.Empty());
+ UNIT_ASSERT(issues4p.Empty());
}
}
@@ -160,13 +167,16 @@ Y_UNIT_TEST_SUITE(SQLv1Lexer) {
auto lexer3 = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ false);
auto lexer4 = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ true);
+ auto lexer4p = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ true, /* pure = */ true);
for (const auto& query : InvalidQueries()) {
auto issues3 = GetIssueMessages(lexer3, query);
auto issues4 = GetIssueMessages(lexer4, query);
+ auto issues4p = GetIssueMessages(lexer4p, query);
UNIT_ASSERT(!issues3.empty());
UNIT_ASSERT(!issues4.empty());
+ UNIT_ASSERT(!issues4p.empty());
}
}
diff --git a/yql/essentials/sql/v1/lexer/ut/ya.make b/yql/essentials/sql/v1/lexer/ut/ya.make
index 70503c127e8..c50c8cd7277 100644
--- a/yql/essentials/sql/v1/lexer/ut/ya.make
+++ b/yql/essentials/sql/v1/lexer/ut/ya.make
@@ -5,6 +5,7 @@ PEERDIR(
yql/essentials/parser/lexer_common
yql/essentials/sql/v1/lexer/antlr3
yql/essentials/sql/v1/lexer/antlr4
+ yql/essentials/sql/v1/lexer/antlr4_pure
)
SRCS(
diff --git a/yql/essentials/sql/v1/lexer/ya.make b/yql/essentials/sql/v1/lexer/ya.make
index 8a3f00d36e1..c38b56c9273 100644
--- a/yql/essentials/sql/v1/lexer/ya.make
+++ b/yql/essentials/sql/v1/lexer/ya.make
@@ -20,6 +20,8 @@ RECURSE(
antlr3_ansi
antlr4
antlr4_ansi
+ antlr4_pure
+ antlr4_pure_ansi
)
RECURSE_FOR_TESTS(
diff --git a/yql/essentials/sql/v1/proto_parser/proto_parser.cpp b/yql/essentials/sql/v1/proto_parser/proto_parser.cpp
index 5651345215c..5fbef92ca52 100644
--- a/yql/essentials/sql/v1/proto_parser/proto_parser.cpp
+++ b/yql/essentials/sql/v1/proto_parser/proto_parser.cpp
@@ -27,7 +27,7 @@ namespace NSQLTranslationV1 {
namespace {
-void ReportError(NProtoAST::IErrorCollector& err, const TString& name) {
+void ReportError(NAST::IErrorCollector& err, const TString& name) {
err.Error(0, 0, TStringBuilder() << "Parser " << name << " is not supported");
}
@@ -39,7 +39,7 @@ google::protobuf::Message* SqlAST(const TParsers& parsers, const TString& query,
return SqlAST(parsers, query, queryName, collector, ansiLexer, anlr4Parser, arena);
}
-google::protobuf::Message* SqlAST(const TParsers& parsers, const TString& query, const TString& queryName, NProtoAST::IErrorCollector& err,
+google::protobuf::Message* SqlAST(const TParsers& parsers, const TString& query, const TString& queryName, NAST::IErrorCollector& err,
bool ansiLexer, bool anlr4Parser, google::protobuf::Arena* arena) {
YQL_ENSURE(arena);
#if defined(_tsan_enabled_)
diff --git a/yql/essentials/sql/v1/proto_parser/proto_parser.h b/yql/essentials/sql/v1/proto_parser/proto_parser.h
index b2002d875b8..39c41771410 100644
--- a/yql/essentials/sql/v1/proto_parser/proto_parser.h
+++ b/yql/essentials/sql/v1/proto_parser/proto_parser.h
@@ -23,5 +23,5 @@ namespace NSQLTranslationV1 {
google::protobuf::Message* SqlAST(const TParsers& parsers, const TString& query, const TString& queryName,
NYql::TIssues& err, size_t maxErrors, bool ansiLexer, bool antlr4Parser, google::protobuf::Arena* arena);
google::protobuf::Message* SqlAST(const TParsers& parsers, const TString& query, const TString& queryName,
- NProtoAST::IErrorCollector& err, bool ansiLexer, bool antlr4Parser, google::protobuf::Arena* arena);
+ NAST::IErrorCollector& err, bool ansiLexer, bool antlr4Parser, google::protobuf::Arena* arena);
} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql.cpp b/yql/essentials/sql/v1/sql.cpp
index 7201c4b0693..3d8951cb83e 100644
--- a/yql/essentials/sql/v1/sql.cpp
+++ b/yql/essentials/sql/v1/sql.cpp
@@ -20,7 +20,7 @@ TAstNode* SqlASTToYql(const google::protobuf::Message& protoAst, TContext& ctx)
if (node && node->Init(ctx, nullptr)) {
return node->Translate(ctx);
}
- } catch (const NProtoAST::TTooManyErrors&) {
+ } catch (const NAST::TTooManyErrors&) {
// do not add error issue, no room for it
}
@@ -34,7 +34,7 @@ TAstNode* SqlASTsToYqls(const std::vector<::NSQLv1Generated::TRule_sql_stmt_core
if (node && node->Init(ctx, nullptr)) {
return node->Translate(ctx);
}
- } catch (const NProtoAST::TTooManyErrors&) {
+ } catch (const NAST::TTooManyErrors&) {
// do not add error issue, no room for it
}