diff options
| author | vityaman <[email protected]> | 2025-03-27 23:28:33 +0300 |
|---|---|---|
| committer | robot-piglet <[email protected]> | 2025-03-27 23:42:19 +0300 |
| commit | b24ce722d5cf848fcbe6c6f9b6fce9698174d3de (patch) | |
| tree | 5bc1a78180c095f095db112917afd61450f9cf5a /yql/essentials/sql | |
| parent | 92d7e50254d1edaf2b664e13fba7a34d0dbc161f (diff) | |
YQL-19747 Split statements
When we run completion engine on multi-statement query, where preceding statements are syntactically incorrect, `antlr4-c3` does not return candidates. Running engine only on a current statement provides a best-effort attempt to provide candidates.
- Related to https://github.com/ydb-platform/ydb/issues/9056
- Depends on https://github.com/ytsaurus/ytsaurus/pull/1127 (`ELexerFlavor`)
---
Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1144
commit_hash:0ced9443a9712191f5420246531f781ca4bc5f42
Diffstat (limited to 'yql/essentials/sql')
| -rw-r--r-- | yql/essentials/sql/v1/complete/sql_complete.cpp | 20 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/complete/sql_complete.h | 7 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/complete/sql_complete_ut.cpp | 50 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/complete/sql_context.cpp | 41 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/complete/sql_context.h | 4 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/complete/ut/ya.make | 5 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/complete/ya.make | 7 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/lexer/lexer.cpp | 19 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/lexer/lexer.h | 6 |
9 files changed, 127 insertions, 32 deletions
diff --git a/yql/essentials/sql/v1/complete/sql_complete.cpp b/yql/essentials/sql/v1/complete/sql_complete.cpp index 2a16a250e54..9bba9c5e71e 100644 --- a/yql/essentials/sql/v1/complete/sql_complete.cpp +++ b/yql/essentials/sql/v1/complete/sql_complete.cpp @@ -3,6 +3,10 @@ #include "sql_context.h" #include "string_util.h" +// FIXME(YQL-19747): unwanted dependency on a lexer implementation +#include <yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h> +#include <yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h> + #include <util/generic/algorithm.h> #include <util/charset/utf8.h> @@ -10,8 +14,8 @@ namespace NSQLComplete { class TSqlCompletionEngine: public ISqlCompletionEngine { public: - TSqlCompletionEngine() - : ContextInference(MakeSqlContextInference()) + explicit TSqlCompletionEngine(TLexerSupplier lexer) + : ContextInference(MakeSqlContextInference(lexer)) { } @@ -68,8 +72,18 @@ namespace NSQLComplete { ISqlContextInference::TPtr ContextInference; }; + // FIXME(YQL-19747): unwanted dependency on a lexer implementation ISqlCompletionEngine::TPtr MakeSqlCompletionEngine() { - return ISqlCompletionEngine::TPtr(new TSqlCompletionEngine()); + NSQLTranslationV1::TLexers lexers; + lexers.Antlr4Pure = NSQLTranslationV1::MakeAntlr4PureLexerFactory(); + lexers.Antlr4PureAnsi = NSQLTranslationV1::MakeAntlr4PureAnsiLexerFactory(); + return MakeSqlCompletionEngine([lexers = std::move(lexers)](bool ansi) { + return NSQLTranslationV1::MakeLexer(lexers, ansi, /* antlr4 = */ true, /* pure = */ true); + }); + } + + ISqlCompletionEngine::TPtr MakeSqlCompletionEngine(TLexerSupplier lexer) { + return ISqlCompletionEngine::TPtr(new TSqlCompletionEngine(lexer)); } } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/sql_complete.h b/yql/essentials/sql/v1/complete/sql_complete.h index 99e74cce7a7..354f8ffa756 100644 --- a/yql/essentials/sql/v1/complete/sql_complete.h +++ b/yql/essentials/sql/v1/complete/sql_complete.h @@ -1,5 +1,7 @@ #pragma once +#include <yql/essentials/sql/v1/lexer/lexer.h> + #include <util/generic/string.h> #include <util/generic/vector.h> @@ -39,6 +41,11 @@ namespace NSQLComplete { virtual ~ISqlCompletionEngine() = default; }; + using TLexerSupplier = std::function<NSQLTranslation::ILexer::TPtr(bool ansi)>; + + // FIXME(YQL-19747): unwanted dependency on a lexer implementation ISqlCompletionEngine::TPtr MakeSqlCompletionEngine(); + ISqlCompletionEngine::TPtr MakeSqlCompletionEngine(TLexerSupplier lexer); + } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp index e0a012f9f6e..c65eba0e2d4 100644 --- a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp +++ b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp @@ -1,5 +1,8 @@ #include "sql_complete.h" +#include <yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h> +#include <yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h> + #include <library/cpp/testing/unittest/registar.h> using namespace NSQLComplete; @@ -7,6 +10,15 @@ using namespace NSQLComplete; Y_UNIT_TEST_SUITE(SqlCompleteTests) { using ECandidateKind::Keyword; + ISqlCompletionEngine::TPtr MakeSqlCompletionEngineUT() { + NSQLTranslationV1::TLexers lexers; + lexers.Antlr4Pure = NSQLTranslationV1::MakeAntlr4PureLexerFactory(); + lexers.Antlr4PureAnsi = NSQLTranslationV1::MakeAntlr4PureAnsiLexerFactory(); + return MakeSqlCompletionEngine([lexers = std::move(lexers)](bool ansi) { + return NSQLTranslationV1::MakeLexer(lexers, ansi, /* antlr4 = */ true, /* pure = */ true); + }); + } + TVector<TCandidate> Complete(ISqlCompletionEngine::TPtr& engine, TStringBuf prefix) { return engine->Complete({prefix}).Candidates; } @@ -50,7 +62,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "VALUES"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {""}), expected); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {" "}), expected); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {" "}), expected); @@ -76,7 +88,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "USER"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"ALTER "}), expected); } @@ -99,7 +111,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "VIEW"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"CREATE "}), expected); } @@ -108,7 +120,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "FROM"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"DELETE "}), expected); } @@ -128,7 +140,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "VIEW"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"DROP "}), expected); } @@ -171,7 +183,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "VALUES"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"EXPLAIN "}), expected); } @@ -196,7 +208,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "USE"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"GRANT "}), expected); } @@ -206,7 +218,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "OR"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"INSERT "}), expected); } @@ -227,7 +239,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "VARIANT"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"PRAGMA "}), expected); } @@ -265,7 +277,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "VARIANT"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT "}), expected); } @@ -275,18 +287,18 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "OBJECT"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"UPSERT "}), expected); } Y_UNIT_TEST(UTF8Wide) { - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"\xF0\x9F\x98\x8A"}).size(), 0); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"编码"}).size(), 0); } Y_UNIT_TEST(WordBreak) { - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT ("}).size(), 28); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT (1)"}).size(), 30); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT 1;"}).size(), 35); @@ -300,7 +312,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { " Bool(field), Math::Sin(var) \n" "FROM `local/test/space/table` JOIN test;"); - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); for (std::size_t size = 0; size <= queryUtf16.size(); ++size) { const TWtringBuf prefixUtf16(queryUtf16, 0, size); @@ -314,10 +326,18 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "SELECT"}, }; - auto engine = MakeSqlCompletionEngine(); + auto engine = MakeSqlCompletionEngineUT(); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "se"), expected); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "sE"), expected); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "Se"), expected); UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SE"), expected); } + + Y_UNIT_TEST(InvalidStatementsRecovery) { + auto engine = MakeSqlCompletionEngineUT(); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "select select; ").size(), 35); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "select select;").size(), 35); + UNIT_ASSERT_VALUES_EQUAL_C(Complete(engine, "!;").size(), 0, "Lexer failing"); + } + } // Y_UNIT_TEST_SUITE(SqlCompleteTests) diff --git a/yql/essentials/sql/v1/complete/sql_context.cpp b/yql/essentials/sql/v1/complete/sql_context.cpp index 4195daa6d83..2bd1a2af987 100644 --- a/yql/essentials/sql/v1/complete/sql_context.cpp +++ b/yql/essentials/sql/v1/complete/sql_context.cpp @@ -3,6 +3,7 @@ #include "c3_engine.h" #include "sql_syntax.h" +#include <yql/essentials/core/issue/yql_issue.h> #include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h> #include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4Parser.h> #include <yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Lexer.h> @@ -30,14 +31,19 @@ namespace NSQLComplete { TDefaultYQLGrammar>; public: - TSpecializedSqlContextInference() + explicit TSpecializedSqlContextInference(TLexerSupplier lexer) : Grammar(&GetSqlGrammar(IsAnsiLexer)) + , Lexer_(lexer(/* ansi = */ IsAnsiLexer)) , C3(ComputeC3Config()) { } TCompletionContext Analyze(TCompletionInput input) override { - auto prefix = input.Text.Head(input.CursorPosition); + TStringBuf prefix; + if (!GetC3Prefix(input, &prefix)) { + return {}; + } + auto tokens = C3.Complete(prefix); return { .Keywords = SiftedKeywords(tokens), @@ -71,6 +77,26 @@ namespace NSQLComplete { return preferredRules; } + bool GetC3Prefix(TCompletionInput input, TStringBuf* prefix) { + *prefix = input.Text.Head(input.CursorPosition); + + TVector<TString> statements; + NYql::TIssues issues; + if (!NSQLTranslationV1::SplitQueryToStatements( + TString(*prefix) + (prefix->EndsWith(';') ? ";" : ""), Lexer_, + statements, issues, /* file = */ "", + /* areBlankSkipped = */ false)) { + return false; + } + + if (statements.empty()) { + return true; + } + + *prefix = prefix->Last(statements.back().size()); + return true; + } + TVector<TString> SiftedKeywords(const TVector<TSuggestedToken>& tokens) { const auto& vocabulary = Grammar->GetVocabulary(); const auto& keywordTokens = Grammar->GetKeywordTokens(); @@ -85,11 +111,18 @@ namespace NSQLComplete { } const ISqlGrammar* Grammar; + NSQLTranslation::ILexer::TPtr Lexer_; TC3Engine<G> C3; }; class TSqlContextInference: public ISqlContextInference { public: + explicit TSqlContextInference(TLexerSupplier lexer) + : DefaultEngine(lexer) + , AnsiEngine(lexer) + { + } + TCompletionContext Analyze(TCompletionInput input) override { auto isAnsiLexer = IsAnsiQuery(TString(input.Text)); auto& engine = GetSpecializedEngine(isAnsiLexer); @@ -108,8 +141,8 @@ namespace NSQLComplete { TSpecializedSqlContextInference</* IsAnsiLexer = */ true> AnsiEngine; }; - ISqlContextInference::TPtr MakeSqlContextInference() { - return TSqlContextInference::TPtr(new TSqlContextInference()); + ISqlContextInference::TPtr MakeSqlContextInference(TLexerSupplier lexer) { + return TSqlContextInference::TPtr(new TSqlContextInference(lexer)); } } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/sql_context.h b/yql/essentials/sql/v1/complete/sql_context.h index bc3b8d4840f..72d481ca9c4 100644 --- a/yql/essentials/sql/v1/complete/sql_context.h +++ b/yql/essentials/sql/v1/complete/sql_context.h @@ -2,6 +2,8 @@ #include "sql_complete.h" +#include <yql/essentials/sql/v1/lexer/lexer.h> + #include <util/generic/string.h> namespace NSQLComplete { @@ -18,6 +20,6 @@ namespace NSQLComplete { virtual ~ISqlContextInference() = default; }; - ISqlContextInference::TPtr MakeSqlContextInference(); + ISqlContextInference::TPtr MakeSqlContextInference(TLexerSupplier lexer); } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/ut/ya.make b/yql/essentials/sql/v1/complete/ut/ya.make index 91f7da13612..07e60d5a508 100644 --- a/yql/essentials/sql/v1/complete/ut/ya.make +++ b/yql/essentials/sql/v1/complete/ut/ya.make @@ -5,4 +5,9 @@ SRCS( string_util_ut.cpp ) +PEERDIR( + yql/essentials/sql/v1/lexer/antlr4_pure + yql/essentials/sql/v1/lexer/antlr4_pure_ansi +) + END() diff --git a/yql/essentials/sql/v1/complete/ya.make b/yql/essentials/sql/v1/complete/ya.make index 70189e5f508..7142e57899c 100644 --- a/yql/essentials/sql/v1/complete/ya.make +++ b/yql/essentials/sql/v1/complete/ya.make @@ -13,6 +13,13 @@ PEERDIR( contrib/libs/antlr4-c3 yql/essentials/sql/settings yql/essentials/sql/v1/format + yql/essentials/sql/v1/lexer + + # FIXME(YQL-19747): unwanted dependency on a lexer implementation + yql/essentials/sql/v1/lexer/antlr4_pure + yql/essentials/sql/v1/lexer/antlr4_pure_ansi + + yql/essentials/core/issue yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4 yql/essentials/parser/antlr_ast/gen/v1_antlr4 ) diff --git a/yql/essentials/sql/v1/lexer/lexer.cpp b/yql/essentials/sql/v1/lexer/lexer.cpp index 2b5da9ddd53..5621cc65d7b 100644 --- a/yql/essentials/sql/v1/lexer/lexer.cpp +++ b/yql/essentials/sql/v1/lexer/lexer.cpp @@ -253,7 +253,10 @@ void SplitByStatements(TTokenIterator begin, TTokenIterator end, TVector<TTokenI } -bool SplitQueryToStatements(const TString& query, NSQLTranslation::ILexer::TPtr& lexer, TVector<TString>& statements, NYql::TIssues& issues, const TString& file) { +bool SplitQueryToStatements( + const TString& query, NSQLTranslation::ILexer::TPtr& lexer, + TVector<TString>& statements, NYql::TIssues& issues, const TString& file, + bool areBlankSkipped) { TParsedTokenList allTokens; auto onNextToken = [&](NSQLTranslation::TParsedToken&& token) { if (token.Name != "EOF") { @@ -269,12 +272,14 @@ bool SplitQueryToStatements(const TString& query, NSQLTranslation::ILexer::TPtr& SplitByStatements(allTokens.begin(), allTokens.end(), statementsTokens); for (size_t i = 1; i < statementsTokens.size(); ++i) { - TStringBuilder currentQueryBuilder; + TString statement; for (auto it = statementsTokens[i - 1]; it != statementsTokens[i]; ++it) { - currentQueryBuilder << it->Content; + statement += it->Content; + } + + if (areBlankSkipped) { + statement = StripStringLeft(statement); } - TString statement = currentQueryBuilder; - statement = StripStringLeft(statement); bool isBlank = true; for (auto c : statement) { @@ -284,11 +289,11 @@ bool SplitQueryToStatements(const TString& query, NSQLTranslation::ILexer::TPtr& } }; - if (isBlank) { + if (isBlank && areBlankSkipped) { continue; } - statements.push_back(statement); + statements.emplace_back(std::move(statement)); } return true; diff --git a/yql/essentials/sql/v1/lexer/lexer.h b/yql/essentials/sql/v1/lexer/lexer.h index 857681ae51f..1cc8566fcf6 100644 --- a/yql/essentials/sql/v1/lexer/lexer.h +++ b/yql/essentials/sql/v1/lexer/lexer.h @@ -21,6 +21,8 @@ NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool a // in SELECT * FROM ... GROUP BY ... - group is a keyword. bool IsProbablyKeyword(const NSQLTranslation::TParsedToken& token); -bool SplitQueryToStatements(const TString& query, NSQLTranslation::ILexer::TPtr& lexer, - TVector<TString>& statements, NYql::TIssues& issues, const TString& file = ""); +bool SplitQueryToStatements( + const TString& query, NSQLTranslation::ILexer::TPtr& lexer, + TVector<TString>& statements, NYql::TIssues& issues, const TString& file = "", + bool areBlankSkipped = true); } |
