summaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1/lexer/lexer.cpp
diff options
context:
space:
mode:
authorvityaman <[email protected]>2025-03-28 18:29:24 +0300
committerrobot-piglet <[email protected]>2025-03-28 18:50:04 +0300
commit60b99f11bcb2386c2a1c36ffd2e96e69d0105dac (patch)
tree08c15d732484c6accf16658b09ed8f07286a9338 /yql/essentials/sql/v1/lexer/lexer.cpp
parent1e214be59cbf130bee433c422b42f16148e5acff (diff)
YQL-19616 Convert YQL lexer grammar to regexes
- [x] Parse YQL grammar to extract lexer grammar into `TLexerGrammar`. - [x] Translate `TLexerGrammar` into regexes. - [x] Implement a lexer via regexes `TRegexLexer` to test generated regexes validity. - [x] Test on `Default` syntax mode. - [x] Test on `ANSI` syntax mode. --- - Related to https://github.com/ydb-platform/ydb/issues/15129 - Requirement for https://github.com/ytsaurus/ytsaurus/pull/1112 --- Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1127 commit_hash:03ffffe81cdafe7f93a4d3fd9a3212fe67f1c72d
Diffstat (limited to 'yql/essentials/sql/v1/lexer/lexer.cpp')
-rw-r--r--yql/essentials/sql/v1/lexer/lexer.cpp95
1 files changed, 57 insertions, 38 deletions
diff --git a/yql/essentials/sql/v1/lexer/lexer.cpp b/yql/essentials/sql/v1/lexer/lexer.cpp
index 5621cc65d7b..88ced55ccf4 100644
--- a/yql/essentials/sql/v1/lexer/lexer.cpp
+++ b/yql/essentials/sql/v1/lexer/lexer.cpp
@@ -11,6 +11,7 @@
#include <util/string/ascii.h>
#include <util/string/builder.h>
#include <util/string/strip.h>
+#include <util/string/join.h>
#if defined(_tsan_enabled_)
#include <util/system/mutex.h>
@@ -29,8 +30,8 @@ using NSQLTranslation::MakeDummyLexerFactory;
class TV1Lexer : public ILexer {
public:
- explicit TV1Lexer(const TLexers& lexers, bool ansi, bool antlr4, bool pure)
- : Factory(GetFactory(lexers, ansi, antlr4, pure))
+ explicit TV1Lexer(const TLexers& lexers, bool ansi, bool antlr4, ELexerFlavor flavor)
+ : Factory(GetFactory(lexers, ansi, antlr4, flavor))
{
}
@@ -42,52 +43,70 @@ public:
}
private:
- static NSQLTranslation::TLexerFactoryPtr GetFactory(const TLexers& lexers, bool ansi, bool antlr4, bool pure = false) {
- if (!ansi && !antlr4 && !pure) {
- if (lexers.Antlr3) {
- return lexers.Antlr3;
- }
- return MakeDummyLexerFactory("antlr3");
- } else if (ansi && !antlr4 && !pure) {
- if (lexers.Antlr3Ansi) {
- return lexers.Antlr3Ansi;
- }
- return MakeDummyLexerFactory("antlr3_ansi");
- } else if (!ansi && antlr4 && !pure) {
- if (lexers.Antlr4) {
- return lexers.Antlr4;
- }
- return MakeDummyLexerFactory("antlr4");
- } else if (ansi && antlr4 && !pure) {
- if (lexers.Antlr4Ansi) {
- return lexers.Antlr4Ansi;
- }
- return MakeDummyLexerFactory("antlr4_ansi");
- } else if (!ansi && antlr4 && pure) {
- if (lexers.Antlr4Pure) {
- return lexers.Antlr4Pure;
- }
- return MakeDummyLexerFactory("antlr4_pure");
- } else if (ansi && antlr4 && pure) {
- if (lexers.Antlr4PureAnsi) {
- return lexers.Antlr4PureAnsi;
- }
- return MakeDummyLexerFactory("antlr4_pure_ansi");
- } else if (!ansi && !antlr4 && pure) {
- return MakeDummyLexerFactory("antlr3_pure");
+ static NSQLTranslation::TLexerFactoryPtr GetFactory(const TLexers& lexers, bool ansi, bool antlr4, ELexerFlavor flavor) {
+ if (auto ptr = GetMaybeFactory(lexers, ansi, antlr4, flavor)) {
+ return ptr;
+ }
+ return MakeDummyLexerFactory(GetLexerName(ansi, antlr4, flavor));
+ }
+
+ static NSQLTranslation::TLexerFactoryPtr GetMaybeFactory(const TLexers& lexers, bool ansi, bool antlr4, ELexerFlavor flavor) {
+ if (!ansi && !antlr4 && flavor == ELexerFlavor::Default) {
+ return lexers.Antlr3;
+ } else if (ansi && !antlr4 && flavor == ELexerFlavor::Default) {
+ return lexers.Antlr3Ansi;
+ } else if (!ansi && antlr4 && flavor == ELexerFlavor::Default) {
+ return lexers.Antlr4;
+ } else if (ansi && antlr4 && flavor == ELexerFlavor::Default) {
+ return lexers.Antlr4Ansi;
+ } else if (!ansi && antlr4 && flavor == ELexerFlavor::Pure) {
+ return lexers.Antlr4Pure;
+ } else if (ansi && antlr4 && flavor == ELexerFlavor::Pure) {
+ return lexers.Antlr4PureAnsi;
+ } else if (!ansi && !antlr4 && flavor == ELexerFlavor::Regex) {
+ return lexers.Regex;
+ } else if (ansi && !antlr4 && flavor == ELexerFlavor::Regex) {
+ return lexers.RegexAnsi;
} else {
- return MakeDummyLexerFactory("antlr3_pure_ansi");
+ return nullptr;
}
}
+ static TString GetLexerName(bool ansi, bool antlr4, ELexerFlavor flavor) {
+ TVector<const TStringBuf> parts;
+
+ if (antlr4) {
+ parts.emplace_back("antlr4");
+ } else if (!antlr4 && flavor != ELexerFlavor::Regex) {
+ parts.emplace_back("antlr3");
+ }
+
+ switch (flavor) {
+ case ELexerFlavor::Default: {
+ } break;
+ case ELexerFlavor::Pure: {
+ parts.emplace_back("pure");
+ } break;
+ case ELexerFlavor::Regex: {
+ parts.emplace_back("regex");
+ } break;
+ }
+
+ if (ansi) {
+ parts.emplace_back("ansi");
+ }
+
+ return JoinSeq("_", parts);
+ }
+
private:
NSQLTranslation::TLexerFactoryPtr Factory;
};
} // namespace
-NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool antlr4, bool pure) {
- return NSQLTranslation::ILexer::TPtr(new TV1Lexer(lexers, ansi, antlr4, pure));
+NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool antlr4, ELexerFlavor flavor) {
+ return NSQLTranslation::ILexer::TPtr(new TV1Lexer(lexers, ansi, antlr4, flavor));
}
bool IsProbablyKeyword(const NSQLTranslation::TParsedToken& token) {