summaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1/format
diff options
context:
space:
mode:
authorvvvv <[email protected]>2025-02-19 17:28:26 +0300
committervvvv <[email protected]>2025-02-19 17:46:38 +0300
commit52daccf61e2e827114cfb3372071cddaec7974ba (patch)
tree07404f6ec241c8a2b96a4da07dc0f0d0c247799c /yql/essentials/sql/v1/format
parentca4b5f28703b0d06599c94f3eacc5d6c498c31d5 (diff)
YQL-19594 Explicit lexers & parsers
commit_hash:6be543b7c5bff6ee474ee606c920197fb2569767
Diffstat (limited to 'yql/essentials/sql/v1/format')
-rw-r--r--yql/essentials/sql/v1/format/sql_format.cpp38
-rw-r--r--yql/essentials/sql/v1/format/sql_format.h12
-rw-r--r--yql/essentials/sql/v1/format/sql_format_ut.cpp23
-rw-r--r--yql/essentials/sql/v1/format/sql_format_ut.h2
-rw-r--r--yql/essentials/sql/v1/format/sql_format_ut_antlr4.cpp23
-rw-r--r--yql/essentials/sql/v1/format/ut/ya.make7
-rw-r--r--yql/essentials/sql/v1/format/ut_antlr4/ya.make8
7 files changed, 93 insertions, 20 deletions
diff --git a/yql/essentials/sql/v1/format/sql_format.cpp b/yql/essentials/sql/v1/format/sql_format.cpp
index c6a9aa1cb78..cfb6fcde71d 100644
--- a/yql/essentials/sql/v1/format/sql_format.cpp
+++ b/yql/essentials/sql/v1/format/sql_format.cpp
@@ -3,9 +3,6 @@
#include <yql/essentials/parser/lexer_common/lexer.h>
#include <yql/essentials/core/sql_types/simple_types.h>
-#include <yql/essentials/sql/v1/lexer/lexer.h>
-#include <yql/essentials/sql/v1/proto_parser/proto_parser.h>
-
#include <yql/essentials/parser/proto_ast/gen/v1_proto_split/SQLv1Parser.pb.main.h>
#include <library/cpp/protobuf/util/simple_reflection.h>
@@ -3110,8 +3107,12 @@ TStaticData::TStaticData()
class TSqlFormatter : public NSQLFormat::ISqlFormatter {
public:
- TSqlFormatter(const NSQLTranslation::TTranslationSettings& settings)
- : Settings(settings)
+ TSqlFormatter(const NSQLTranslationV1::TLexers& lexers,
+ const NSQLTranslationV1::TParsers& parsers,
+ const NSQLTranslation::TTranslationSettings& settings)
+ : Lexers(lexers)
+ , Parsers(parsers)
+ , Settings(settings)
{}
bool Format(const TString& query, TString& formattedQuery, NYql::TIssues& issues, EFormatMode mode) override {
@@ -3126,7 +3127,7 @@ public:
}
if (mode == EFormatMode::Obfuscate) {
- auto message = NSQLTranslationV1::SqlAST(query, parsedSettings.File, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS, parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser, parsedSettings.TestAntlr4, parsedSettings.Arena);
+ auto message = NSQLTranslationV1::SqlAST(Parsers, query, parsedSettings.File, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS, parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser, parsedSettings.Arena);
if (!message) {
return false;
}
@@ -3135,7 +3136,7 @@ public:
return Format(visitor.Process(*message), formattedQuery, issues, EFormatMode::Pretty);
}
- auto lexer = NSQLTranslationV1::MakeLexer(parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser);
+ auto lexer = NSQLTranslationV1::MakeLexer(Lexers, parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser);
TVector<TString> statements;
if (!NSQLTranslationV1::SplitQueryToStatements(query, lexer, statements, issues, parsedSettings.File)) {
return false;
@@ -3161,7 +3162,7 @@ public:
}
NYql::TIssues parserIssues;
- auto message = NSQLTranslationV1::SqlAST(currentQuery, parsedSettings.File, parserIssues, NSQLTranslation::SQL_MAX_PARSER_ERRORS, parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser, parsedSettings.TestAntlr4, parsedSettings.Arena);
+ auto message = NSQLTranslationV1::SqlAST(Parsers, currentQuery, parsedSettings.File, parserIssues, NSQLTranslation::SQL_MAX_PARSER_ERRORS, parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser, parsedSettings.Arena);
if (!message) {
finalFormattedQuery << currentQuery;
if (!currentQuery.EndsWith("\n")) {
@@ -3209,23 +3210,32 @@ public:
}
private:
+ const NSQLTranslationV1::TLexers Lexers;
+ const NSQLTranslationV1::TParsers Parsers;
const NSQLTranslation::TTranslationSettings Settings;
};
}
+ISqlFormatter::TPtr MakeSqlFormatter(const NSQLTranslationV1::TLexers& lexers,
+ const NSQLTranslationV1::TParsers& parsers,
+ const NSQLTranslation::TTranslationSettings& settings) {
+ return ISqlFormatter::TPtr(new TSqlFormatter(lexers, parsers, settings));
+}
+
ISqlFormatter::TPtr MakeSqlFormatter(const NSQLTranslation::TTranslationSettings& settings) {
- return ISqlFormatter::TPtr(new TSqlFormatter(settings));
+ return MakeSqlFormatter(NSQLTranslationV1::MakeAllLexers(), NSQLTranslationV1::MakeAllParsers(), settings);
}
-TString MutateQuery(const TString& query, const NSQLTranslation::TTranslationSettings& settings) {
+TString MutateQuery(const NSQLTranslationV1::TLexers& lexers,
+ const TString& query, const NSQLTranslation::TTranslationSettings& settings) {
auto parsedSettings = settings;
NYql::TIssues issues;
if (!NSQLTranslation::ParseTranslationSettings(query, parsedSettings, issues)) {
throw yexception() << issues.ToString();
}
- auto lexer = NSQLTranslationV1::MakeLexer(parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser);
+ auto lexer = NSQLTranslationV1::MakeLexer(lexers, parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser);
TVector<NSQLTranslation::TParsedToken> allTokens;
auto onNextToken = [&](NSQLTranslation::TParsedToken&& token) {
if (token.Name != "EOF") {
@@ -3248,13 +3258,15 @@ TString MutateQuery(const TString& query, const NSQLTranslation::TTranslationSet
return newQueryBuilder;
}
-bool SqlFormatSimple(const TString& query, TString& formattedQuery, TString& error) {
+bool SqlFormatSimple(const NSQLTranslationV1::TLexers& lexers,
+ const NSQLTranslationV1::TParsers& parsers,
+ const TString& query, TString& formattedQuery, TString& error) {
try {
google::protobuf::Arena arena;
NSQLTranslation::TTranslationSettings settings;
settings.Arena = &arena;
- auto formatter = MakeSqlFormatter(settings);
+ auto formatter = MakeSqlFormatter(lexers, parsers, settings);
NYql::TIssues issues;
const bool result = formatter->Format(query, formattedQuery, issues);
if (!result) {
diff --git a/yql/essentials/sql/v1/format/sql_format.h b/yql/essentials/sql/v1/format/sql_format.h
index 6944a730710..3233f2031b8 100644
--- a/yql/essentials/sql/v1/format/sql_format.h
+++ b/yql/essentials/sql/v1/format/sql_format.h
@@ -2,6 +2,8 @@
#include <yql/essentials/public/issue/yql_issue.h>
#include <yql/essentials/sql/settings/translation_settings.h>
+#include <yql/essentials/sql/v1/lexer/lexer.h>
+#include <yql/essentials/sql/v1/proto_parser/proto_parser.h>
#include <util/generic/string.h>
@@ -23,12 +25,18 @@ public:
virtual ~ISqlFormatter() = default;
};
+//FIXME remove
ISqlFormatter::TPtr MakeSqlFormatter(const NSQLTranslation::TTranslationSettings& settings = {});
+ISqlFormatter::TPtr MakeSqlFormatter(const NSQLTranslationV1::TLexers& lexers,
+ const NSQLTranslationV1::TParsers& parsers,
+ const NSQLTranslation::TTranslationSettings& settings = {});
+
// insert spaces and comments between each tokens
-TString MutateQuery(const TString& query, const NSQLTranslation::TTranslationSettings& settings = {});
+TString MutateQuery(const NSQLTranslationV1::TLexers& lexers, const TString& query, const NSQLTranslation::TTranslationSettings& settings = {});
-bool SqlFormatSimple(const TString& query, TString& formattedQuery, TString& error);
+bool SqlFormatSimple(const NSQLTranslationV1::TLexers& lexers,
+ const NSQLTranslationV1::TParsers& parsers, const TString& query, TString& formattedQuery, TString& error);
THashSet<TString> GetKeywords();
diff --git a/yql/essentials/sql/v1/format/sql_format_ut.cpp b/yql/essentials/sql/v1/format/sql_format_ut.cpp
index e721f1fae6e..8714ee2d7c9 100644
--- a/yql/essentials/sql/v1/format/sql_format_ut.cpp
+++ b/yql/essentials/sql/v1/format/sql_format_ut.cpp
@@ -1,6 +1,10 @@
#include <library/cpp/testing/unittest/registar.h>
#include "sql_format.h"
+#include <yql/essentials/sql/v1/lexer/antlr3/lexer.h>
+#include <yql/essentials/sql/v1/lexer/antlr3_ansi/lexer.h>
+#include <yql/essentials/sql/v1/proto_parser/antlr3/proto_parser.h>
+#include <yql/essentials/sql/v1/proto_parser/antlr3_ansi/proto_parser.h>
#include <google/protobuf/arena.h>
#include <util/string/subst.h>
@@ -12,13 +16,25 @@ using TCases = TVector<std::pair<TString, TString>>;
struct TSetup {
TSetup(bool ansiLexer = false) {
+ NSQLTranslationV1::TLexers lexers;
+ lexers.Antlr3 = NSQLTranslationV1::MakeAntlr3LexerFactory();
+ lexers.Antlr3Ansi = NSQLTranslationV1::MakeAntlr3AnsiLexerFactory();
+ NSQLTranslationV1::TParsers parsers;
+ parsers.Antlr3 = NSQLTranslationV1::MakeAntlr3ParserFactory();
+ parsers.Antlr3Ansi = NSQLTranslationV1::MakeAntlr3AnsiParserFactory();
+
NSQLTranslation::TTranslationSettings settings;
+ settings.Antlr4Parser = false;
settings.Arena = &Arena;
settings.AnsiLexer = ansiLexer;
- Formatter = NSQLFormat::MakeSqlFormatter(settings);
+ Formatter = NSQLFormat::MakeSqlFormatter(lexers, parsers, settings);
}
void Run(const TCases& cases, NSQLFormat::EFormatMode mode = NSQLFormat::EFormatMode::Pretty) {
+ NSQLTranslationV1::TLexers lexers;
+ lexers.Antlr3 = NSQLTranslationV1::MakeAntlr3LexerFactory();
+ lexers.Antlr3Ansi = NSQLTranslationV1::MakeAntlr3AnsiLexerFactory();
+
for (const auto& c : cases) {
NYql::TIssues issues;
TString formatted;
@@ -33,8 +49,11 @@ struct TSetup {
UNIT_ASSERT_C(res2, issues.ToString());
UNIT_ASSERT_NO_DIFF(formatted, formatted2);
+
if (mode == NSQLFormat::EFormatMode::Pretty) {
- auto mutatedQuery = NSQLFormat::MutateQuery(c.first);
+ NSQLTranslation::TTranslationSettings settings;
+ settings.Antlr4Parser = false;
+ auto mutatedQuery = NSQLFormat::MutateQuery(lexers, c.first, settings);
auto res3 = Formatter->Format(mutatedQuery, formatted, issues);
UNIT_ASSERT_C(res3, issues.ToString());
}
diff --git a/yql/essentials/sql/v1/format/sql_format_ut.h b/yql/essentials/sql/v1/format/sql_format_ut.h
index ac4a305d981..d92393008f0 100644
--- a/yql/essentials/sql/v1/format/sql_format_ut.h
+++ b/yql/essentials/sql/v1/format/sql_format_ut.h
@@ -1856,6 +1856,6 @@ Y_UNIT_TEST(ValueConstructor) {
"SELECT\n\tCallable(Callable<(Int32) -> Int32>, ($x) -> ($x))(0)\n;\n"},
};
- TSetup setup(/* ansiLexer = */ true);
+ TSetup setup;
setup.Run(cases);
}
diff --git a/yql/essentials/sql/v1/format/sql_format_ut_antlr4.cpp b/yql/essentials/sql/v1/format/sql_format_ut_antlr4.cpp
index 12e80587eac..9e5be312a7f 100644
--- a/yql/essentials/sql/v1/format/sql_format_ut_antlr4.cpp
+++ b/yql/essentials/sql/v1/format/sql_format_ut_antlr4.cpp
@@ -2,6 +2,12 @@
#include "sql_format.h"
+#include <yql/essentials/sql/v1/lexer/antlr4/lexer.h>
+#include <yql/essentials/sql/v1/lexer/antlr4_ansi/lexer.h>
+#include <yql/essentials/sql/v1/proto_parser/antlr4/proto_parser.h>
+#include <yql/essentials/sql/v1/proto_parser/antlr4_ansi/proto_parser.h>
+
+
#include <google/protobuf/arena.h>
#include <util/string/subst.h>
#include <util/string/join.h>
@@ -12,14 +18,25 @@ using TCases = TVector<std::pair<TString, TString>>;
struct TSetup {
TSetup(bool ansiLexer = false) {
+ NSQLTranslationV1::TLexers lexers;
+ lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory();
+ lexers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiLexerFactory();
+ NSQLTranslationV1::TParsers parsers;
+ parsers.Antlr4 = NSQLTranslationV1::MakeAntlr4ParserFactory();
+ parsers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiParserFactory();
+
NSQLTranslation::TTranslationSettings settings;
settings.Arena = &Arena;
settings.Antlr4Parser = true;
settings.AnsiLexer = ansiLexer;
- Formatter = NSQLFormat::MakeSqlFormatter(settings);
+ Formatter = NSQLFormat::MakeSqlFormatter(lexers, parsers, settings);
}
void Run(const TCases& cases, NSQLFormat::EFormatMode mode = NSQLFormat::EFormatMode::Pretty) {
+ NSQLTranslationV1::TLexers lexers;
+ lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory();
+ lexers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiLexerFactory();
+
for (const auto& c : cases) {
NYql::TIssues issues;
TString formatted;
@@ -35,7 +52,9 @@ struct TSetup {
UNIT_ASSERT_NO_DIFF(formatted, formatted2);
if (mode == NSQLFormat::EFormatMode::Pretty) {
- auto mutatedQuery = NSQLFormat::MutateQuery(c.first);
+ NSQLTranslation::TTranslationSettings settings;
+ settings.Antlr4Parser = true;
+ auto mutatedQuery = NSQLFormat::MutateQuery(lexers, c.first, settings);
auto res3 = Formatter->Format(mutatedQuery, formatted, issues);
UNIT_ASSERT_C(res3, issues.ToString());
}
diff --git a/yql/essentials/sql/v1/format/ut/ya.make b/yql/essentials/sql/v1/format/ut/ya.make
index 4c3ef65f965..690b13b6180 100644
--- a/yql/essentials/sql/v1/format/ut/ya.make
+++ b/yql/essentials/sql/v1/format/ut/ya.make
@@ -4,4 +4,11 @@ SRCS(
sql_format_ut.cpp
)
+PEERDIR(
+ yql/essentials/sql/v1/lexer/antlr3
+ yql/essentials/sql/v1/lexer/antlr3_ansi
+ yql/essentials/sql/v1/proto_parser/antlr3
+ yql/essentials/sql/v1/proto_parser/antlr3_ansi
+)
+
END()
diff --git a/yql/essentials/sql/v1/format/ut_antlr4/ya.make b/yql/essentials/sql/v1/format/ut_antlr4/ya.make
index a0f9d710a8c..13b0f72a949 100644
--- a/yql/essentials/sql/v1/format/ut_antlr4/ya.make
+++ b/yql/essentials/sql/v1/format/ut_antlr4/ya.make
@@ -4,4 +4,12 @@ SRCS(
sql_format_ut_antlr4.cpp
)
+PEERDIR(
+ yql/essentials/sql/v1/lexer/antlr4
+ yql/essentials/sql/v1/lexer/antlr4_ansi
+ yql/essentials/sql/v1/proto_parser/antlr4
+ yql/essentials/sql/v1/proto_parser/antlr4_ansi
+)
+
+
END()