summaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql
diff options
context:
space:
mode:
authorvityaman <[email protected]>2025-03-27 23:28:33 +0300
committerrobot-piglet <[email protected]>2025-03-27 23:42:19 +0300
commitb24ce722d5cf848fcbe6c6f9b6fce9698174d3de (patch)
tree5bc1a78180c095f095db112917afd61450f9cf5a /yql/essentials/sql
parent92d7e50254d1edaf2b664e13fba7a34d0dbc161f (diff)
YQL-19747 Split statements
When we run completion engine on multi-statement query, where preceding statements are syntactically incorrect, `antlr4-c3` does not return candidates. Running engine only on a current statement provides a best-effort attempt to provide candidates. - Related to https://github.com/ydb-platform/ydb/issues/9056 - Depends on https://github.com/ytsaurus/ytsaurus/pull/1127 (`ELexerFlavor`) --- Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1144 commit_hash:0ced9443a9712191f5420246531f781ca4bc5f42
Diffstat (limited to 'yql/essentials/sql')
-rw-r--r--yql/essentials/sql/v1/complete/sql_complete.cpp20
-rw-r--r--yql/essentials/sql/v1/complete/sql_complete.h7
-rw-r--r--yql/essentials/sql/v1/complete/sql_complete_ut.cpp50
-rw-r--r--yql/essentials/sql/v1/complete/sql_context.cpp41
-rw-r--r--yql/essentials/sql/v1/complete/sql_context.h4
-rw-r--r--yql/essentials/sql/v1/complete/ut/ya.make5
-rw-r--r--yql/essentials/sql/v1/complete/ya.make7
-rw-r--r--yql/essentials/sql/v1/lexer/lexer.cpp19
-rw-r--r--yql/essentials/sql/v1/lexer/lexer.h6
9 files changed, 127 insertions, 32 deletions
diff --git a/yql/essentials/sql/v1/complete/sql_complete.cpp b/yql/essentials/sql/v1/complete/sql_complete.cpp
index 2a16a250e54..9bba9c5e71e 100644
--- a/yql/essentials/sql/v1/complete/sql_complete.cpp
+++ b/yql/essentials/sql/v1/complete/sql_complete.cpp
@@ -3,6 +3,10 @@
#include "sql_context.h"
#include "string_util.h"
+// FIXME(YQL-19747): unwanted dependency on a lexer implementation
+#include <yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h>
+#include <yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h>
+
#include <util/generic/algorithm.h>
#include <util/charset/utf8.h>
@@ -10,8 +14,8 @@ namespace NSQLComplete {
class TSqlCompletionEngine: public ISqlCompletionEngine {
public:
- TSqlCompletionEngine()
- : ContextInference(MakeSqlContextInference())
+ explicit TSqlCompletionEngine(TLexerSupplier lexer)
+ : ContextInference(MakeSqlContextInference(lexer))
{
}
@@ -68,8 +72,18 @@ namespace NSQLComplete {
ISqlContextInference::TPtr ContextInference;
};
+ // FIXME(YQL-19747): unwanted dependency on a lexer implementation
ISqlCompletionEngine::TPtr MakeSqlCompletionEngine() {
- return ISqlCompletionEngine::TPtr(new TSqlCompletionEngine());
+ NSQLTranslationV1::TLexers lexers;
+ lexers.Antlr4Pure = NSQLTranslationV1::MakeAntlr4PureLexerFactory();
+ lexers.Antlr4PureAnsi = NSQLTranslationV1::MakeAntlr4PureAnsiLexerFactory();
+ return MakeSqlCompletionEngine([lexers = std::move(lexers)](bool ansi) {
+ return NSQLTranslationV1::MakeLexer(lexers, ansi, /* antlr4 = */ true, /* pure = */ true);
+ });
+ }
+
+ ISqlCompletionEngine::TPtr MakeSqlCompletionEngine(TLexerSupplier lexer) {
+ return ISqlCompletionEngine::TPtr(new TSqlCompletionEngine(lexer));
}
} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/sql_complete.h b/yql/essentials/sql/v1/complete/sql_complete.h
index 99e74cce7a7..354f8ffa756 100644
--- a/yql/essentials/sql/v1/complete/sql_complete.h
+++ b/yql/essentials/sql/v1/complete/sql_complete.h
@@ -1,5 +1,7 @@
#pragma once
+#include <yql/essentials/sql/v1/lexer/lexer.h>
+
#include <util/generic/string.h>
#include <util/generic/vector.h>
@@ -39,6 +41,11 @@ namespace NSQLComplete {
virtual ~ISqlCompletionEngine() = default;
};
+ using TLexerSupplier = std::function<NSQLTranslation::ILexer::TPtr(bool ansi)>;
+
+ // FIXME(YQL-19747): unwanted dependency on a lexer implementation
ISqlCompletionEngine::TPtr MakeSqlCompletionEngine();
+ ISqlCompletionEngine::TPtr MakeSqlCompletionEngine(TLexerSupplier lexer);
+
} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp
index e0a012f9f6e..c65eba0e2d4 100644
--- a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp
+++ b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp
@@ -1,5 +1,8 @@
#include "sql_complete.h"
+#include <yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h>
+#include <yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h>
+
#include <library/cpp/testing/unittest/registar.h>
using namespace NSQLComplete;
@@ -7,6 +10,15 @@ using namespace NSQLComplete;
Y_UNIT_TEST_SUITE(SqlCompleteTests) {
using ECandidateKind::Keyword;
+ ISqlCompletionEngine::TPtr MakeSqlCompletionEngineUT() {
+ NSQLTranslationV1::TLexers lexers;
+ lexers.Antlr4Pure = NSQLTranslationV1::MakeAntlr4PureLexerFactory();
+ lexers.Antlr4PureAnsi = NSQLTranslationV1::MakeAntlr4PureAnsiLexerFactory();
+ return MakeSqlCompletionEngine([lexers = std::move(lexers)](bool ansi) {
+ return NSQLTranslationV1::MakeLexer(lexers, ansi, /* antlr4 = */ true, /* pure = */ true);
+ });
+ }
+
TVector<TCandidate> Complete(ISqlCompletionEngine::TPtr& engine, TStringBuf prefix) {
return engine->Complete({prefix}).Candidates;
}
@@ -50,7 +62,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "VALUES"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {""}), expected);
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {" "}), expected);
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {" "}), expected);
@@ -76,7 +88,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "USER"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"ALTER "}), expected);
}
@@ -99,7 +111,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "VIEW"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"CREATE "}), expected);
}
@@ -108,7 +120,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "FROM"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"DELETE "}), expected);
}
@@ -128,7 +140,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "VIEW"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"DROP "}), expected);
}
@@ -171,7 +183,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "VALUES"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"EXPLAIN "}), expected);
}
@@ -196,7 +208,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "USE"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"GRANT "}), expected);
}
@@ -206,7 +218,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "OR"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"INSERT "}), expected);
}
@@ -227,7 +239,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "VARIANT"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"PRAGMA "}), expected);
}
@@ -265,7 +277,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "VARIANT"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT "}), expected);
}
@@ -275,18 +287,18 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "OBJECT"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"UPSERT "}), expected);
}
Y_UNIT_TEST(UTF8Wide) {
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"\xF0\x9F\x98\x8A"}).size(), 0);
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"编码"}).size(), 0);
}
Y_UNIT_TEST(WordBreak) {
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT ("}).size(), 28);
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT (1)"}).size(), 30);
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT 1;"}).size(), 35);
@@ -300,7 +312,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
" Bool(field), Math::Sin(var) \n"
"FROM `local/test/space/table` JOIN test;");
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
for (std::size_t size = 0; size <= queryUtf16.size(); ++size) {
const TWtringBuf prefixUtf16(queryUtf16, 0, size);
@@ -314,10 +326,18 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "SELECT"},
};
- auto engine = MakeSqlCompletionEngine();
+ auto engine = MakeSqlCompletionEngineUT();
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "se"), expected);
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "sE"), expected);
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "Se"), expected);
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SE"), expected);
}
+
+ Y_UNIT_TEST(InvalidStatementsRecovery) {
+ auto engine = MakeSqlCompletionEngineUT();
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "select select; ").size(), 35);
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "select select;").size(), 35);
+ UNIT_ASSERT_VALUES_EQUAL_C(Complete(engine, "!;").size(), 0, "Lexer failing");
+ }
+
} // Y_UNIT_TEST_SUITE(SqlCompleteTests)
diff --git a/yql/essentials/sql/v1/complete/sql_context.cpp b/yql/essentials/sql/v1/complete/sql_context.cpp
index 4195daa6d83..2bd1a2af987 100644
--- a/yql/essentials/sql/v1/complete/sql_context.cpp
+++ b/yql/essentials/sql/v1/complete/sql_context.cpp
@@ -3,6 +3,7 @@
#include "c3_engine.h"
#include "sql_syntax.h"
+#include <yql/essentials/core/issue/yql_issue.h>
#include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h>
#include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4Parser.h>
#include <yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Lexer.h>
@@ -30,14 +31,19 @@ namespace NSQLComplete {
TDefaultYQLGrammar>;
public:
- TSpecializedSqlContextInference()
+ explicit TSpecializedSqlContextInference(TLexerSupplier lexer)
: Grammar(&GetSqlGrammar(IsAnsiLexer))
+ , Lexer_(lexer(/* ansi = */ IsAnsiLexer))
, C3(ComputeC3Config())
{
}
TCompletionContext Analyze(TCompletionInput input) override {
- auto prefix = input.Text.Head(input.CursorPosition);
+ TStringBuf prefix;
+ if (!GetC3Prefix(input, &prefix)) {
+ return {};
+ }
+
auto tokens = C3.Complete(prefix);
return {
.Keywords = SiftedKeywords(tokens),
@@ -71,6 +77,26 @@ namespace NSQLComplete {
return preferredRules;
}
+ bool GetC3Prefix(TCompletionInput input, TStringBuf* prefix) {
+ *prefix = input.Text.Head(input.CursorPosition);
+
+ TVector<TString> statements;
+ NYql::TIssues issues;
+ if (!NSQLTranslationV1::SplitQueryToStatements(
+ TString(*prefix) + (prefix->EndsWith(';') ? ";" : ""), Lexer_,
+ statements, issues, /* file = */ "",
+ /* areBlankSkipped = */ false)) {
+ return false;
+ }
+
+ if (statements.empty()) {
+ return true;
+ }
+
+ *prefix = prefix->Last(statements.back().size());
+ return true;
+ }
+
TVector<TString> SiftedKeywords(const TVector<TSuggestedToken>& tokens) {
const auto& vocabulary = Grammar->GetVocabulary();
const auto& keywordTokens = Grammar->GetKeywordTokens();
@@ -85,11 +111,18 @@ namespace NSQLComplete {
}
const ISqlGrammar* Grammar;
+ NSQLTranslation::ILexer::TPtr Lexer_;
TC3Engine<G> C3;
};
class TSqlContextInference: public ISqlContextInference {
public:
+ explicit TSqlContextInference(TLexerSupplier lexer)
+ : DefaultEngine(lexer)
+ , AnsiEngine(lexer)
+ {
+ }
+
TCompletionContext Analyze(TCompletionInput input) override {
auto isAnsiLexer = IsAnsiQuery(TString(input.Text));
auto& engine = GetSpecializedEngine(isAnsiLexer);
@@ -108,8 +141,8 @@ namespace NSQLComplete {
TSpecializedSqlContextInference</* IsAnsiLexer = */ true> AnsiEngine;
};
- ISqlContextInference::TPtr MakeSqlContextInference() {
- return TSqlContextInference::TPtr(new TSqlContextInference());
+ ISqlContextInference::TPtr MakeSqlContextInference(TLexerSupplier lexer) {
+ return TSqlContextInference::TPtr(new TSqlContextInference(lexer));
}
} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/sql_context.h b/yql/essentials/sql/v1/complete/sql_context.h
index bc3b8d4840f..72d481ca9c4 100644
--- a/yql/essentials/sql/v1/complete/sql_context.h
+++ b/yql/essentials/sql/v1/complete/sql_context.h
@@ -2,6 +2,8 @@
#include "sql_complete.h"
+#include <yql/essentials/sql/v1/lexer/lexer.h>
+
#include <util/generic/string.h>
namespace NSQLComplete {
@@ -18,6 +20,6 @@ namespace NSQLComplete {
virtual ~ISqlContextInference() = default;
};
- ISqlContextInference::TPtr MakeSqlContextInference();
+ ISqlContextInference::TPtr MakeSqlContextInference(TLexerSupplier lexer);
} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/ut/ya.make b/yql/essentials/sql/v1/complete/ut/ya.make
index 91f7da13612..07e60d5a508 100644
--- a/yql/essentials/sql/v1/complete/ut/ya.make
+++ b/yql/essentials/sql/v1/complete/ut/ya.make
@@ -5,4 +5,9 @@ SRCS(
string_util_ut.cpp
)
+PEERDIR(
+ yql/essentials/sql/v1/lexer/antlr4_pure
+ yql/essentials/sql/v1/lexer/antlr4_pure_ansi
+)
+
END()
diff --git a/yql/essentials/sql/v1/complete/ya.make b/yql/essentials/sql/v1/complete/ya.make
index 70189e5f508..7142e57899c 100644
--- a/yql/essentials/sql/v1/complete/ya.make
+++ b/yql/essentials/sql/v1/complete/ya.make
@@ -13,6 +13,13 @@ PEERDIR(
contrib/libs/antlr4-c3
yql/essentials/sql/settings
yql/essentials/sql/v1/format
+ yql/essentials/sql/v1/lexer
+
+ # FIXME(YQL-19747): unwanted dependency on a lexer implementation
+ yql/essentials/sql/v1/lexer/antlr4_pure
+ yql/essentials/sql/v1/lexer/antlr4_pure_ansi
+
+ yql/essentials/core/issue
yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4
yql/essentials/parser/antlr_ast/gen/v1_antlr4
)
diff --git a/yql/essentials/sql/v1/lexer/lexer.cpp b/yql/essentials/sql/v1/lexer/lexer.cpp
index 2b5da9ddd53..5621cc65d7b 100644
--- a/yql/essentials/sql/v1/lexer/lexer.cpp
+++ b/yql/essentials/sql/v1/lexer/lexer.cpp
@@ -253,7 +253,10 @@ void SplitByStatements(TTokenIterator begin, TTokenIterator end, TVector<TTokenI
}
-bool SplitQueryToStatements(const TString& query, NSQLTranslation::ILexer::TPtr& lexer, TVector<TString>& statements, NYql::TIssues& issues, const TString& file) {
+bool SplitQueryToStatements(
+ const TString& query, NSQLTranslation::ILexer::TPtr& lexer,
+ TVector<TString>& statements, NYql::TIssues& issues, const TString& file,
+ bool areBlankSkipped) {
TParsedTokenList allTokens;
auto onNextToken = [&](NSQLTranslation::TParsedToken&& token) {
if (token.Name != "EOF") {
@@ -269,12 +272,14 @@ bool SplitQueryToStatements(const TString& query, NSQLTranslation::ILexer::TPtr&
SplitByStatements(allTokens.begin(), allTokens.end(), statementsTokens);
for (size_t i = 1; i < statementsTokens.size(); ++i) {
- TStringBuilder currentQueryBuilder;
+ TString statement;
for (auto it = statementsTokens[i - 1]; it != statementsTokens[i]; ++it) {
- currentQueryBuilder << it->Content;
+ statement += it->Content;
+ }
+
+ if (areBlankSkipped) {
+ statement = StripStringLeft(statement);
}
- TString statement = currentQueryBuilder;
- statement = StripStringLeft(statement);
bool isBlank = true;
for (auto c : statement) {
@@ -284,11 +289,11 @@ bool SplitQueryToStatements(const TString& query, NSQLTranslation::ILexer::TPtr&
}
};
- if (isBlank) {
+ if (isBlank && areBlankSkipped) {
continue;
}
- statements.push_back(statement);
+ statements.emplace_back(std::move(statement));
}
return true;
diff --git a/yql/essentials/sql/v1/lexer/lexer.h b/yql/essentials/sql/v1/lexer/lexer.h
index 857681ae51f..1cc8566fcf6 100644
--- a/yql/essentials/sql/v1/lexer/lexer.h
+++ b/yql/essentials/sql/v1/lexer/lexer.h
@@ -21,6 +21,8 @@ NSQLTranslation::ILexer::TPtr MakeLexer(const TLexers& lexers, bool ansi, bool a
// in SELECT * FROM ... GROUP BY ... - group is a keyword.
bool IsProbablyKeyword(const NSQLTranslation::TParsedToken& token);
-bool SplitQueryToStatements(const TString& query, NSQLTranslation::ILexer::TPtr& lexer,
- TVector<TString>& statements, NYql::TIssues& issues, const TString& file = "");
+bool SplitQueryToStatements(
+ const TString& query, NSQLTranslation::ILexer::TPtr& lexer,
+ TVector<TString>& statements, NYql::TIssues& issues, const TString& file = "",
+ bool areBlankSkipped = true);
}