summaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql
diff options
context:
space:
mode:
authorVictor Smirnov <[email protected]>2025-03-19 13:03:56 +0300
committerrobot-piglet <[email protected]>2025-03-19 13:18:48 +0300
commit28b29535ce7b21a3dde60b485c98f66f8c08f882 (patch)
treeb831ec57225a22c3241a443eccc20af1053fc561 /yql/essentials/sql
parent6c4b9a2b45127baabf73cdcb6323f3e3e09e5440 (diff)
YQL-19616 Implement ILexer via antlr_ast
- [x] Added `antlr_ast/antlr4` module and moved `TLexerTokensCollector4` there from `proto_ast/antlr4`. - [x] Moved stuff around back and forth. Ready for a review. --- Co-authored-by: vityaman [[email protected]] Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1128 commit_hash:e08785c3408ef813505bdc7511560e9536f4ab79
Diffstat (limited to 'yql/essentials/sql')
-rw-r--r--yql/essentials/sql/v0/context.cpp2
-rw-r--r--yql/essentials/sql/v1/context.cpp2
-rw-r--r--yql/essentials/sql/v1/lexer/antlr4/lexer.cpp6
-rw-r--r--yql/essentials/sql/v1/lexer/antlr4_pure/lexer.cpp39
-rw-r--r--yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h9
-rw-r--r--yql/essentials/sql/v1/lexer/antlr4_pure/ya.make13
-rw-r--r--yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.cpp39
-rw-r--r--yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h9
-rw-r--r--yql/essentials/sql/v1/lexer/antlr4_pure_ansi/ya.make13
-rw-r--r--yql/essentials/sql/v1/lexer/lexer.cpp36
-rw-r--r--yql/essentials/sql/v1/lexer/lexer.h4
-rw-r--r--yql/essentials/sql/v1/lexer/lexer_ut.cpp10
-rw-r--r--yql/essentials/sql/v1/lexer/ut/ya.make1
-rw-r--r--yql/essentials/sql/v1/lexer/ya.make2
-rw-r--r--yql/essentials/sql/v1/proto_parser/proto_parser.cpp4
-rw-r--r--yql/essentials/sql/v1/proto_parser/proto_parser.h2
-rw-r--r--yql/essentials/sql/v1/sql.cpp4
17 files changed, 172 insertions, 23 deletions
diff --git a/yql/essentials/sql/v0/context.cpp b/yql/essentials/sql/v0/context.cpp
index 5d461ee5f33..01f2c1e71b3 100644
--- a/yql/essentials/sql/v0/context.cpp
+++ b/yql/essentials/sql/v0/context.cpp
@@ -133,7 +133,7 @@ IOutputStream& TContext::MakeIssue(ESeverity severity, TIssueCode code, NYql::TP
}
if (Settings.MaxErrors <= Issues.Size()) {
- ythrow NProtoAST::TTooManyErrors() << "Too many issues";
+ ythrow NAST::TTooManyErrors() << "Too many issues";
}
}
diff --git a/yql/essentials/sql/v1/context.cpp b/yql/essentials/sql/v1/context.cpp
index b72c673b0ec..569ae375ebc 100644
--- a/yql/essentials/sql/v1/context.cpp
+++ b/yql/essentials/sql/v1/context.cpp
@@ -248,7 +248,7 @@ IOutputStream& TContext::MakeIssue(ESeverity severity, TIssueCode code, NYql::TP
}
if (Settings.MaxErrors <= Issues.Size()) {
- ythrow NProtoAST::TTooManyErrors() << "Too many issues";
+ ythrow NAST::TTooManyErrors() << "Too many issues";
}
}
diff --git a/yql/essentials/sql/v1/lexer/antlr4/lexer.cpp b/yql/essentials/sql/v1/lexer/antlr4/lexer.cpp
index 5add4fc6bfb..e3f63c4b65a 100644
--- a/yql/essentials/sql/v1/lexer/antlr4/lexer.cpp
+++ b/yql/essentials/sql/v1/lexer/antlr4/lexer.cpp
@@ -1,8 +1,10 @@
#include "lexer.h"
+
#include <yql/essentials/parser/proto_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h>
-#include <yql/essentials/public/issue/yql_issue.h>
-#include <yql/essentials/parser/proto_ast/collect_issues/collect_issues.h>
#include <yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h>
+#include <yql/essentials/parser/proto_ast/collect_issues/collect_issues.h>
+
+#include <yql/essentials/public/issue/yql_issue.h>
namespace NSQLTranslationV1 {
diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.cpp b/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.cpp
new file mode 100644
index 00000000000..d1cfb228eda
--- /dev/null
+++ b/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.cpp
@@ -0,0 +1,39 @@
+#include "lexer.h"
+
+#include <yql/essentials/parser/common/issue.h>
+#include <yql/essentials/parser/common/antlr4/lexer_tokens_collector.h>
+
+#include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h>
+
+#include <yql/essentials/public/issue/yql_issue.h>
+
+namespace NSQLTranslationV1 {
+
+ namespace {
+
+ class TLexer: public NSQLTranslation::ILexer {
+ public:
+ bool Tokenize(const TString& query, const TString& queryName, const TTokenCallback& onNextToken, NYql::TIssues& issues, size_t maxErrors) final {
+ NYql::TIssues newIssues;
+ NSQLTranslation::TErrorCollectorOverIssues collector(newIssues, maxErrors, queryName);
+ NAST::TLexerTokensCollector4<NALADefaultAntlr4::SQLv1Antlr4Lexer> tokensCollector(query, queryName);
+ tokensCollector.CollectTokens(collector, onNextToken);
+ issues.AddIssues(newIssues);
+ return !AnyOf(newIssues.begin(), newIssues.end(), [](auto issue) { return issue.GetSeverity() == NYql::ESeverity::TSeverityIds_ESeverityId_S_ERROR; });
+ }
+ };
+
+ class TFactory: public NSQLTranslation::ILexerFactory {
+ public:
+ THolder<NSQLTranslation::ILexer> MakeLexer() const final {
+ return MakeHolder<TLexer>();
+ }
+ };
+
+ } // namespace
+
+ NSQLTranslation::TLexerFactoryPtr MakeAntlr4PureLexerFactory() {
+ return MakeIntrusive<TFactory>();
+ }
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h b/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h
new file mode 100644
index 00000000000..21c4651daca
--- /dev/null
+++ b/yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <yql/essentials/parser/lexer_common/lexer.h>
+
+namespace NSQLTranslationV1 {
+
+ NSQLTranslation::TLexerFactoryPtr MakeAntlr4PureLexerFactory();
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure/ya.make b/yql/essentials/sql/v1/lexer/antlr4_pure/ya.make
new file mode 100644
index 00000000000..c638733caef
--- /dev/null
+++ b/yql/essentials/sql/v1/lexer/antlr4_pure/ya.make
@@ -0,0 +1,13 @@
+LIBRARY()
+
+PEERDIR(
+ yql/essentials/public/issue
+ yql/essentials/parser/common/antlr4
+ yql/essentials/parser/antlr_ast/gen/v1_antlr4
+)
+
+SRCS(
+ lexer.cpp
+)
+
+END()
diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.cpp b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.cpp
new file mode 100644
index 00000000000..b1df2ac506a
--- /dev/null
+++ b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.cpp
@@ -0,0 +1,39 @@
+#include "lexer.h"
+
+#include <yql/essentials/parser/common/issue.h>
+#include <yql/essentials/parser/common/antlr4/lexer_tokens_collector.h>
+
+#include <yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Lexer.h>
+
+#include <yql/essentials/public/issue/yql_issue.h>
+
+namespace NSQLTranslationV1 {
+
+ namespace {
+
+ class TLexer: public NSQLTranslation::ILexer {
+ public:
+ bool Tokenize(const TString& query, const TString& queryName, const TTokenCallback& onNextToken, NYql::TIssues& issues, size_t maxErrors) final {
+ NYql::TIssues newIssues;
+ NSQLTranslation::TErrorCollectorOverIssues collector(newIssues, maxErrors, queryName);
+ NAST::TLexerTokensCollector4<NALAAnsiAntlr4::SQLv1Antlr4Lexer> tokensCollector(query, queryName);
+ tokensCollector.CollectTokens(collector, onNextToken);
+ issues.AddIssues(newIssues);
+ return !AnyOf(newIssues.begin(), newIssues.end(), [](auto issue) { return issue.GetSeverity() == NYql::ESeverity::TSeverityIds_ESeverityId_S_ERROR; });
+ }
+ };
+
+ class TFactory: public NSQLTranslation::ILexerFactory {
+ public:
+ THolder<NSQLTranslation::ILexer> MakeLexer() const final {
+ return MakeHolder<TLexer>();
+ }
+ };
+
+ } // namespace
+
+ NSQLTranslation::TLexerFactoryPtr MakeAntlr4PureAnsiLexerFactory() {
+ return MakeIntrusive<TFactory>();
+ }
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h
new file mode 100644
index 00000000000..232e3fec749
--- /dev/null
+++ b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <yql/essentials/parser/lexer_common/lexer.h>
+
+namespace NSQLTranslationV1 {
+
+ NSQLTranslation::TLexerFactoryPtr MakeAntlr4PureAnsiLexerFactory();
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/ya.make b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/ya.make
new file mode 100644
index 00000000000..161e2b77f03
--- /dev/null
+++ b/yql/essentials/sql/v1/lexer/antlr4_pure_ansi/ya.make
@@ -0,0 +1,13 @@
+LIBRARY()
+
+PEERDIR(
+ yql/essentials/public/issue
+ yql/essentials/parser/common/antlr4
+ yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4
+)
+
+SRCS(
+ lexer.cpp
+)
+
+END()
diff --git a/yql/essentials/sql/v1/lexer/lexer.cpp b/yql/essentials/sql/v1/lexer/lexer.cpp
index 8c94fff7e1a..2b5da9ddd53 100644
--- a/yql/essentials/sql/v1/lexer/lexer.cpp
+++ b/yql/essentials/sql/v1/lexer/lexer.cpp
@@ -29,8 +29,8 @@ using NSQLTranslation::MakeDummyLexerFactory;
class TV1Lexer : public ILexer {
public:
- explicit TV1Lexer(const TLexers& lexers, bool ansi, bool antlr4)
- : Factory(GetFactory(lexers, ansi, antlr4))
+ explicit TV1Lexer(const TLexers& lexers, bool ansi, bool antlr4, bool pure)
+ : Factory(GetFactory(lexers, ansi, antlr4, pure))
{
}
@@ -42,31 +42,41 @@ public:
}
private:
- static NSQLTranslation::TLexerFactoryPtr GetFactory(const TLexers& lexers, bool ansi, bool antlr4) {
- if (!ansi && !antlr4) {
+ static NSQLTranslation::TLexerFactoryPtr GetFactory(const TLexers& lexers, bool ansi, bool antlr4, bool pure = false) {
+ if (!ansi && !antlr4 && !pure) {
if (lexers.Antlr3) {
return lexers.Antlr3;
}
-
return MakeDummyLexerFactory("antlr3");
- } else if (ansi && !antlr4) {
+ } else if (ansi && !antlr4 && !pure) {
if (lexers.Antlr3Ansi) {
return lexers.Antlr3Ansi;
}
-
return MakeDummyLexerFactory("antlr3_ansi");
- } else if (!ansi && antlr4) {
+ } else if (!ansi && antlr4 && !pure) {
if (lexers.Antlr4) {
return lexers.Antlr4;
}
-
return MakeDummyLexerFactory("antlr4");
- } else {
+ } else if (ansi && antlr4 && !pure) {
if (lexers.Antlr4Ansi) {
return lexers.Antlr4Ansi;
}
-
return MakeDummyLexerFactory("antlr4_ansi");
+ } else if (!ansi && antlr4 && pure) {
+ if (lexers.Antlr4Pure) {
+ return lexers.Antlr4Pure;
+ }
+ return MakeDummyLexerFactory("antlr4_pure");
+ } else if (ansi && antlr4 && pure) {
+ if (lexers.Antlr4PureAnsi) {
+ return lexers.Antlr4PureAnsi;
+ }
+ return MakeDummyLexerFactory("antlr4_pure_ansi");
+ } else if (!ansi && !antlr4 && pure) {
+ return MakeDummyLexerFactory("antlr3_pure");
+ } else {
+ return MakeDummyLexerFactory("antlr3_pure_ansi");
}
}
@@ -76,8 +86,8 @@ private:
} // namespace
-NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool antlr4) {
- return NSQLTranslation::ILexer::TPtr(new TV1Lexer(lexers, ansi, antlr4));
+NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool antlr4, bool pure) {
+ return NSQLTranslation::ILexer::TPtr(new TV1Lexer(lexers, ansi, antlr4, pure));
}
bool IsProbablyKeyword(const NSQLTranslation::TParsedToken& token) {
diff --git a/yql/essentials/sql/v1/lexer/lexer.h b/yql/essentials/sql/v1/lexer/lexer.h
index 2a3af96055e..857681ae51f 100644
--- a/yql/essentials/sql/v1/lexer/lexer.h
+++ b/yql/essentials/sql/v1/lexer/lexer.h
@@ -9,9 +9,11 @@ struct TLexers {
NSQLTranslation::TLexerFactoryPtr Antlr3Ansi;
NSQLTranslation::TLexerFactoryPtr Antlr4;
NSQLTranslation::TLexerFactoryPtr Antlr4Ansi;
+ NSQLTranslation::TLexerFactoryPtr Antlr4Pure;
+ NSQLTranslation::TLexerFactoryPtr Antlr4PureAnsi;
};
-NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool antlr4);
+NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool antlr4, bool pure = false);
// "Probably" because YQL keyword can be an identifier
// depending on a query context. For example
diff --git a/yql/essentials/sql/v1/lexer/lexer_ut.cpp b/yql/essentials/sql/v1/lexer/lexer_ut.cpp
index 2f0c8bb8e2b..3ad01f631b6 100644
--- a/yql/essentials/sql/v1/lexer/lexer_ut.cpp
+++ b/yql/essentials/sql/v1/lexer/lexer_ut.cpp
@@ -2,8 +2,10 @@
#include <yql/essentials/core/issue/yql_issue.h>
#include <yql/essentials/sql/settings/translation_settings.h>
+
#include <yql/essentials/sql/v1/lexer/antlr3/lexer.h>
#include <yql/essentials/sql/v1/lexer/antlr4/lexer.h>
+#include <yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h>
#include <library/cpp/testing/unittest/registar.h>
@@ -79,16 +81,21 @@ Y_UNIT_TEST_SUITE(SQLv1Lexer) {
NSQLTranslationV1::TLexers lexers;
lexers.Antlr3 = NSQLTranslationV1::MakeAntlr3LexerFactory();
lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory();
+ lexers.Antlr4Pure = NSQLTranslationV1::MakeAntlr4PureLexerFactory();
auto lexer3 = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ false);
auto lexer4 = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ true);
+ auto lexer4p = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ true, /* pure = */ true);
for (const auto& query : queriesUtf8) {
auto [tokens3, issues3] = Tokenize(lexer3, query);
auto [tokens4, issues4] = Tokenize(lexer4, query);
+ auto [tokens4p, issues4p] = Tokenize(lexer4p, query);
AssertEquivialent(tokens3, tokens4);
+ AssertEquivialent(tokens3, tokens4p);
UNIT_ASSERT(issues3.Empty());
UNIT_ASSERT(issues4.Empty());
+ UNIT_ASSERT(issues4p.Empty());
}
}
@@ -160,13 +167,16 @@ Y_UNIT_TEST_SUITE(SQLv1Lexer) {
auto lexer3 = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ false);
auto lexer4 = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ true);
+ auto lexer4p = MakeLexer(lexers, /* ansi = */ false, /* antlr4 = */ true, /* pure = */ true);
for (const auto& query : InvalidQueries()) {
auto issues3 = GetIssueMessages(lexer3, query);
auto issues4 = GetIssueMessages(lexer4, query);
+ auto issues4p = GetIssueMessages(lexer4p, query);
UNIT_ASSERT(!issues3.empty());
UNIT_ASSERT(!issues4.empty());
+ UNIT_ASSERT(!issues4p.empty());
}
}
diff --git a/yql/essentials/sql/v1/lexer/ut/ya.make b/yql/essentials/sql/v1/lexer/ut/ya.make
index 70503c127e8..c50c8cd7277 100644
--- a/yql/essentials/sql/v1/lexer/ut/ya.make
+++ b/yql/essentials/sql/v1/lexer/ut/ya.make
@@ -5,6 +5,7 @@ PEERDIR(
yql/essentials/parser/lexer_common
yql/essentials/sql/v1/lexer/antlr3
yql/essentials/sql/v1/lexer/antlr4
+ yql/essentials/sql/v1/lexer/antlr4_pure
)
SRCS(
diff --git a/yql/essentials/sql/v1/lexer/ya.make b/yql/essentials/sql/v1/lexer/ya.make
index 8a3f00d36e1..c38b56c9273 100644
--- a/yql/essentials/sql/v1/lexer/ya.make
+++ b/yql/essentials/sql/v1/lexer/ya.make
@@ -20,6 +20,8 @@ RECURSE(
antlr3_ansi
antlr4
antlr4_ansi
+ antlr4_pure
+ antlr4_pure_ansi
)
RECURSE_FOR_TESTS(
diff --git a/yql/essentials/sql/v1/proto_parser/proto_parser.cpp b/yql/essentials/sql/v1/proto_parser/proto_parser.cpp
index 5651345215c..5fbef92ca52 100644
--- a/yql/essentials/sql/v1/proto_parser/proto_parser.cpp
+++ b/yql/essentials/sql/v1/proto_parser/proto_parser.cpp
@@ -27,7 +27,7 @@ namespace NSQLTranslationV1 {
namespace {
-void ReportError(NProtoAST::IErrorCollector& err, const TString& name) {
+void ReportError(NAST::IErrorCollector& err, const TString& name) {
err.Error(0, 0, TStringBuilder() << "Parser " << name << " is not supported");
}
@@ -39,7 +39,7 @@ google::protobuf::Message* SqlAST(const TParsers& parsers, const TString& query,
return SqlAST(parsers, query, queryName, collector, ansiLexer, anlr4Parser, arena);
}
-google::protobuf::Message* SqlAST(const TParsers& parsers, const TString& query, const TString& queryName, NProtoAST::IErrorCollector& err,
+google::protobuf::Message* SqlAST(const TParsers& parsers, const TString& query, const TString& queryName, NAST::IErrorCollector& err,
bool ansiLexer, bool anlr4Parser, google::protobuf::Arena* arena) {
YQL_ENSURE(arena);
#if defined(_tsan_enabled_)
diff --git a/yql/essentials/sql/v1/proto_parser/proto_parser.h b/yql/essentials/sql/v1/proto_parser/proto_parser.h
index b2002d875b8..39c41771410 100644
--- a/yql/essentials/sql/v1/proto_parser/proto_parser.h
+++ b/yql/essentials/sql/v1/proto_parser/proto_parser.h
@@ -23,5 +23,5 @@ namespace NSQLTranslationV1 {
google::protobuf::Message* SqlAST(const TParsers& parsers, const TString& query, const TString& queryName,
NYql::TIssues& err, size_t maxErrors, bool ansiLexer, bool antlr4Parser, google::protobuf::Arena* arena);
google::protobuf::Message* SqlAST(const TParsers& parsers, const TString& query, const TString& queryName,
- NProtoAST::IErrorCollector& err, bool ansiLexer, bool antlr4Parser, google::protobuf::Arena* arena);
+ NAST::IErrorCollector& err, bool ansiLexer, bool antlr4Parser, google::protobuf::Arena* arena);
} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql.cpp b/yql/essentials/sql/v1/sql.cpp
index 7201c4b0693..3d8951cb83e 100644
--- a/yql/essentials/sql/v1/sql.cpp
+++ b/yql/essentials/sql/v1/sql.cpp
@@ -20,7 +20,7 @@ TAstNode* SqlASTToYql(const google::protobuf::Message& protoAst, TContext& ctx)
if (node && node->Init(ctx, nullptr)) {
return node->Translate(ctx);
}
- } catch (const NProtoAST::TTooManyErrors&) {
+ } catch (const NAST::TTooManyErrors&) {
// do not add error issue, no room for it
}
@@ -34,7 +34,7 @@ TAstNode* SqlASTsToYqls(const std::vector<::NSQLv1Generated::TRule_sql_stmt_core
if (node && node->Init(ctx, nullptr)) {
return node->Translate(ctx);
}
- } catch (const NProtoAST::TTooManyErrors&) {
+ } catch (const NAST::TTooManyErrors&) {
// do not add error issue, no room for it
}