summaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1/complete
diff options
context:
space:
mode:
authorvitya-smirnov <[email protected]>2025-06-17 16:02:41 +0300
committervitya-smirnov <[email protected]>2025-06-17 16:18:59 +0300
commit0bc6f0197fa813fec678c75c760564724f591309 (patch)
treebb40ca81a21ddb608154e56967202c0548708551 /yql/essentials/sql/v1/complete
parentd84e9cf123d8345dc008b9b0feafbb20be9d9784 (diff)
YQL-19747: Support token ignoring patterns
Added support for token filtration. It is needed because YQL grammar supports multiple systems with various capabilities. For example, some of those do not support `INTERSECT ALL` and therefore `ALL` after `INTERSECT` should be *disabled*. Another example is a support of an only `EVALUATE FOR`, so `FOR` is *forced* to be precendenced by `EVALUATE`. commit_hash:a926c8d3911f8dd025ab58eb7f81e2370fcd9376
Diffstat (limited to 'yql/essentials/sql/v1/complete')
-rw-r--r--yql/essentials/sql/v1/complete/analysis/global/global.h1
-rw-r--r--yql/essentials/sql/v1/complete/analysis/local/local.cpp49
-rw-r--r--yql/essentials/sql/v1/complete/analysis/local/local.h6
-rw-r--r--yql/essentials/sql/v1/complete/antlr4/c3i.h2
-rw-r--r--yql/essentials/sql/v1/complete/antlr4/c3t.h51
-rw-r--r--yql/essentials/sql/v1/complete/sql_complete.cpp32
-rw-r--r--yql/essentials/sql/v1/complete/sql_complete.h16
-rw-r--r--yql/essentials/sql/v1/complete/sql_complete_ut.cpp18
-rw-r--r--yql/essentials/sql/v1/complete/syntax/grammar.cpp6
-rw-r--r--yql/essentials/sql/v1/complete/syntax/grammar.h1
10 files changed, 161 insertions, 21 deletions
diff --git a/yql/essentials/sql/v1/complete/analysis/global/global.h b/yql/essentials/sql/v1/complete/analysis/global/global.h
index fe929bf77c7..1ef1344e3c9 100644
--- a/yql/essentials/sql/v1/complete/analysis/global/global.h
+++ b/yql/essentials/sql/v1/complete/analysis/global/global.h
@@ -29,6 +29,7 @@ namespace NSQLComplete {
TMaybe<TColumnContext> Column;
};
+ // TODO(YQL-19747): Make it thread-safe to make ISqlCompletionEngine thread-safe.
class IGlobalAnalysis {
public:
using TPtr = THolder<IGlobalAnalysis>;
diff --git a/yql/essentials/sql/v1/complete/analysis/local/local.cpp b/yql/essentials/sql/v1/complete/analysis/local/local.cpp
index 1b732ae5db7..13e2fce69ad 100644
--- a/yql/essentials/sql/v1/complete/analysis/local/local.cpp
+++ b/yql/essentials/sql/v1/complete/analysis/local/local.cpp
@@ -51,10 +51,13 @@ namespace NSQLComplete {
public:
TSpecializedLocalSyntaxAnalysis(
- TLexerSupplier lexer, const THashSet<TString>& IgnoredRules)
+ TLexerSupplier lexer,
+ const THashSet<TString>& ignoredRules,
+ const THashMap<TString, THashSet<TString>>& disabledPreviousByToken,
+ const THashMap<TString, THashSet<TString>>& forcedPreviousByToken)
: Grammar_(&GetSqlGrammar())
, Lexer_(lexer(/* ansi = */ IsAnsiLexer))
- , C3_(ComputeC3Config(IgnoredRules))
+ , C3_(ComputeC3Config(ignoredRules, disabledPreviousByToken, forcedPreviousByToken))
{
}
@@ -111,11 +114,16 @@ namespace NSQLComplete {
}
private:
- IC3Engine::TConfig ComputeC3Config(const THashSet<TString>& IgnoredRules) const {
+ IC3Engine::TConfig ComputeC3Config(
+ const THashSet<TString>& ignoredRules,
+ const THashMap<TString, THashSet<TString>>& disabledPreviousByToken,
+ const THashMap<TString, THashSet<TString>>& forcedPreviousByToken) const {
return {
.IgnoredTokens = ComputeIgnoredTokens(),
.PreferredRules = ComputePreferredRules(),
- .IgnoredRules = ComputeIgnoredRules(IgnoredRules),
+ .IgnoredRules = ComputeIgnoredRules(ignoredRules),
+ .DisabledPreviousByToken = Resolved(disabledPreviousByToken),
+ .ForcedPreviousByToken = Resolved(forcedPreviousByToken),
};
}
@@ -143,6 +151,23 @@ namespace NSQLComplete {
return ignored;
}
+ std::unordered_map<TTokenId, std::unordered_set<TTokenId>>
+ Resolved(const THashMap<TString, THashSet<TString>>& tokens) const {
+ std::unordered_map<TTokenId, std::unordered_set<TTokenId>> resolved;
+ for (const auto& [name, set] : tokens) {
+ resolved[Grammar_->GetTokenId(name)] = Resolved(set);
+ }
+ return resolved;
+ }
+
+ std::unordered_set<TTokenId> Resolved(const THashSet<TString>& tokens) const {
+ std::unordered_set<TTokenId> resolved;
+ for (const TString& name : tokens) {
+ resolved.emplace(Grammar_->GetTokenId(name));
+ }
+ return resolved;
+ }
+
TC3Candidates C3Complete(TCompletionInput statement, const TCursorTokenContext& context) {
auto enclosing = context.Enclosing();
@@ -336,9 +361,12 @@ namespace NSQLComplete {
class TLocalSyntaxAnalysis: public ILocalSyntaxAnalysis {
public:
TLocalSyntaxAnalysis(
- TLexerSupplier lexer, const THashSet<TString>& IgnoredRules)
- : DefaultEngine_(lexer, IgnoredRules)
- , AnsiEngine_(lexer, IgnoredRules)
+ TLexerSupplier lexer,
+ const THashSet<TString>& ignoredRules,
+ const THashMap<TString, THashSet<TString>>& disabledPreviousByToken,
+ const THashMap<TString, THashSet<TString>>& forcedPreviousByToken)
+ : DefaultEngine_(lexer, ignoredRules, disabledPreviousByToken, forcedPreviousByToken)
+ , AnsiEngine_(lexer, ignoredRules, disabledPreviousByToken, forcedPreviousByToken)
{
}
@@ -361,8 +389,11 @@ namespace NSQLComplete {
};
ILocalSyntaxAnalysis::TPtr MakeLocalSyntaxAnalysis(
- TLexerSupplier lexer, const THashSet<TString>& IgnoredRules) {
- return MakeHolder<TLocalSyntaxAnalysis>(lexer, IgnoredRules);
+ TLexerSupplier lexer,
+ const THashSet<TString>& ignoredRules,
+ const THashMap<TString, THashSet<TString>>& disabledPreviousByToken,
+ const THashMap<TString, THashSet<TString>>& forcedPreviousByToken) {
+ return MakeHolder<TLocalSyntaxAnalysis>(lexer, ignoredRules, disabledPreviousByToken, forcedPreviousByToken);
}
} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/analysis/local/local.h b/yql/essentials/sql/v1/complete/analysis/local/local.h
index 6cf6fc33c51..cca182aacdb 100644
--- a/yql/essentials/sql/v1/complete/analysis/local/local.h
+++ b/yql/essentials/sql/v1/complete/analysis/local/local.h
@@ -60,6 +60,7 @@ namespace NSQLComplete {
TEditRange EditRange;
};
+ // TODO(YQL-19747): Make it thread-safe to make ISqlCompletionEngine thread-safe.
class ILocalSyntaxAnalysis {
public:
using TPtr = THolder<ILocalSyntaxAnalysis>;
@@ -69,6 +70,9 @@ namespace NSQLComplete {
};
ILocalSyntaxAnalysis::TPtr MakeLocalSyntaxAnalysis(
- TLexerSupplier lexer, const THashSet<TString>& IgnoredRules);
+ TLexerSupplier lexer,
+ const THashSet<TString>& ignoredRules,
+ const THashMap<TString, THashSet<TString>>& disabledPreviousByToken,
+ const THashMap<TString, THashSet<TString>>& forcedPreviousByToken);
} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/antlr4/c3i.h b/yql/essentials/sql/v1/complete/antlr4/c3i.h
index 52920882ee0..b3fcaab7ae0 100644
--- a/yql/essentials/sql/v1/complete/antlr4/c3i.h
+++ b/yql/essentials/sql/v1/complete/antlr4/c3i.h
@@ -37,6 +37,8 @@ namespace NSQLComplete {
std::unordered_set<TTokenId> IgnoredTokens;
std::unordered_set<TRuleId> PreferredRules;
std::unordered_set<TRuleId> IgnoredRules;
+ std::unordered_map<TTokenId, std::unordered_set<TTokenId>> DisabledPreviousByToken;
+ std::unordered_map<TTokenId, std::unordered_set<TTokenId>> ForcedPreviousByToken;
};
virtual ~IC3Engine() = default;
diff --git a/yql/essentials/sql/v1/complete/antlr4/c3t.h b/yql/essentials/sql/v1/complete/antlr4/c3t.h
index 2b49068ac8f..d1c80867a59 100644
--- a/yql/essentials/sql/v1/complete/antlr4/c3t.h
+++ b/yql/essentials/sql/v1/complete/antlr4/c3t.h
@@ -13,6 +13,7 @@
#include <util/generic/fwd.h>
#include <util/generic/string.h>
#include <util/generic/vector.h>
+#include <util/generic/yexception.h>
namespace NSQLComplete {
@@ -26,6 +27,8 @@ namespace NSQLComplete {
, Parser_(&Tokens_)
, CompletionCore_(&Parser_)
, IgnoredRules_(std::move(config.IgnoredRules))
+ , DisabledPreviousByToken_(std::move(config.DisabledPreviousByToken))
+ , ForcedPreviousByToken_(std::move(config.ForcedPreviousByToken))
{
Lexer_.removeErrorListeners();
Parser_.removeErrorListeners();
@@ -36,15 +39,29 @@ namespace NSQLComplete {
for (TRuleId rule : IgnoredRules_) {
CompletionCore_.preferredRules.emplace(rule);
}
+
+ PurifyForcedTokens();
}
TC3Candidates Complete(TStringBuf text, size_t caretTokenIndex) override {
Assign(text);
auto candidates = CompletionCore_.collectCandidates(caretTokenIndex);
- return Converted(std::move(candidates));
+ return Converted(std::move(candidates), caretTokenIndex);
}
private:
+ void PurifyForcedTokens() {
+ for (auto it = ForcedPreviousByToken_.begin(); it != ForcedPreviousByToken_.end();) {
+ const auto& [token, previous] = *it;
+ if (previous.empty()) {
+ CompletionCore_.ignoredTokens.emplace(token);
+ it = ForcedPreviousByToken_.erase(it);
+ } else {
+ it = std::next(it);
+ }
+ }
+ }
+
void Assign(TStringBuf prefix) {
Chars_.load(prefix.Data(), prefix.Size(), /* lenient = */ false);
Lexer_.reset();
@@ -52,10 +69,14 @@ namespace NSQLComplete {
Tokens_.fill();
}
- TC3Candidates Converted(c3::CandidatesCollection candidates) const {
+ TC3Candidates Converted(c3::CandidatesCollection candidates, size_t caretTokenIndex) {
TC3Candidates converted;
for (auto& [token, following] : candidates.tokens) {
+ if (IsIgnored(token, caretTokenIndex)) {
+ continue;
+ }
+
converted.Tokens.emplace_back(token, std::move(following));
}
@@ -71,17 +92,43 @@ namespace NSQLComplete {
return converted;
}
+ bool IsIgnored(TTokenId token, size_t caretTokenIndex) {
+ auto previous = PreviousToken(caretTokenIndex);
+
+ auto disabled = DisabledPreviousByToken_.find(token);
+ auto forced = ForcedPreviousByToken_.find(token);
+
+ return (disabled != DisabledPreviousByToken_.end() && disabled->second.contains(previous)) ||
+ (forced != ForcedPreviousByToken_.end() && !forced->second.contains(previous));
+ }
+
bool IsIgnored(TRuleId head, const std::vector<TRuleId> tail) const {
return IgnoredRules_.contains(head) ||
AnyOf(tail, [this](TRuleId r) { return IgnoredRules_.contains(r); });
}
+ TTokenId PreviousToken(size_t caretTokenIndex) {
+ ssize_t index = static_cast<ssize_t>(caretTokenIndex) - 1;
+ while (0 <= index && Tokens_.get(index)->getChannel() == antlr4::Token::HIDDEN_CHANNEL) {
+ --index;
+ }
+
+ if (index < 0) {
+ return antlr4::Token::INVALID_TYPE;
+ }
+
+ return Tokens_.get(index)->getType();
+ }
+
antlr4::ANTLRInputStream Chars_;
G::TLexer Lexer_;
antlr4::BufferedTokenStream Tokens_;
G::TParser Parser_;
c3::CodeCompletionCore CompletionCore_;
+
std::unordered_set<TRuleId> IgnoredRules_;
+ std::unordered_map<TTokenId, std::unordered_set<TTokenId>> DisabledPreviousByToken_;
+ std::unordered_map<TTokenId, std::unordered_set<TTokenId>> ForcedPreviousByToken_;
};
} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/sql_complete.cpp b/yql/essentials/sql/v1/complete/sql_complete.cpp
index 2f61b1fe4df..2ed153f3ecc 100644
--- a/yql/essentials/sql/v1/complete/sql_complete.cpp
+++ b/yql/essentials/sql/v1/complete/sql_complete.cpp
@@ -22,7 +22,11 @@ namespace NSQLComplete {
INameService::TPtr names,
ISqlCompletionEngine::TConfiguration configuration)
: Configuration_(std::move(configuration))
- , SyntaxAnalysis_(MakeLocalSyntaxAnalysis(lexer, Configuration_.IgnoredRules))
+ , SyntaxAnalysis_(MakeLocalSyntaxAnalysis(
+ lexer,
+ Configuration_.IgnoredRules,
+ Configuration_.DisabledPreviousByToken,
+ Configuration_.ForcedPreviousByToken))
, GlobalAnalysis_(MakeGlobalAnalysis())
, Names_(std::move(names))
{
@@ -272,17 +276,17 @@ namespace NSQLComplete {
}
ISqlCompletionEngine::TConfiguration MakeYDBConfiguration() {
- return {
- .IgnoredRules = {
- "use_stmt",
- "import_stmt",
- "export_stmt",
- },
+ ISqlCompletionEngine::TConfiguration config;
+ config.IgnoredRules = {
+ "use_stmt",
+ "import_stmt",
+ "export_stmt",
};
+ return config;
}
ISqlCompletionEngine::TConfiguration MakeYQLConfiguration() {
- return MakeConfiguration(/* allowedStmts = */ {
+ auto config = MakeConfiguration(/* allowedStmts = */ {
"lambda_stmt",
"pragma_stmt",
"select_stmt",
@@ -300,6 +304,18 @@ namespace NSQLComplete {
"for_stmt",
"values_stmt",
});
+
+ config.DisabledPreviousByToken = {};
+
+ config.ForcedPreviousByToken = {
+ {"PARALLEL", {}},
+ {"TABLESTORE", {}},
+ {"FOR", {"EVALUATE"}},
+ {"IF", {"EVALUATE"}},
+ {"EXTERNAL", {"USING"}},
+ };
+
+ return config;
}
ISqlCompletionEngine::TPtr MakeSqlCompletionEngine(
diff --git a/yql/essentials/sql/v1/complete/sql_complete.h b/yql/essentials/sql/v1/complete/sql_complete.h
index 33ca0c00ac9..90edceacbc4 100644
--- a/yql/essentials/sql/v1/complete/sql_complete.h
+++ b/yql/essentials/sql/v1/complete/sql_complete.h
@@ -9,13 +9,14 @@
#include <util/generic/string.h>
#include <util/generic/vector.h>
+#include <util/generic/hash.h>
#include <util/generic/hash_set.h>
namespace NSQLComplete {
struct TCompletedToken {
TStringBuf Content;
- size_t SourcePosition;
+ size_t SourcePosition = 0;
};
enum class ECandidateKind {
@@ -45,18 +46,31 @@ namespace NSQLComplete {
TVector<TCandidate> Candidates;
};
+ // TODO(YQL-19747): Make it thread-safe.
class ISqlCompletionEngine {
public:
using TPtr = THolder<ISqlCompletionEngine>;
struct TConfiguration {
+ friend class TSqlCompletionEngine;
+ friend ISqlCompletionEngine::TConfiguration MakeYDBConfiguration();
+ friend ISqlCompletionEngine::TConfiguration MakeYQLConfiguration();
+ friend ISqlCompletionEngine::TConfiguration MakeConfiguration(THashSet<TString> allowedStmts);
+
+ public:
size_t Limit = 256;
+
+ private:
THashSet<TString> IgnoredRules;
+ THashMap<TString, THashSet<TString>> DisabledPreviousByToken;
+ THashMap<TString, THashSet<TString>> ForcedPreviousByToken;
};
virtual ~ISqlCompletionEngine() = default;
+
virtual TCompletion
Complete(TCompletionInput input, TEnvironment env = {}) = 0;
+
virtual NThreading::TFuture<TCompletion> // TODO(YQL-19747): Migrate YDB CLI to `Complete` method
CompleteAsync(TCompletionInput input, TEnvironment env = {}) = 0;
};
diff --git a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp
index d37d34be06b..1f29751d017 100644
--- a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp
+++ b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp
@@ -1319,6 +1319,24 @@ JOIN yt:$cluster_name.test;
UNIT_ASSERT_UNEQUAL(Complete(engine, {"SELE"}).size(), 0);
}
+ Y_UNIT_TEST(IgnoredTokens) {
+ auto lexer = MakePureLexerSupplier();
+
+ TNameSet names;
+ TFrequencyData frequency;
+ auto service = MakeStaticNameService(names, MakeDefaultRanking(frequency));
+
+ auto config = MakeYQLConfiguration();
+ auto engine = MakeSqlCompletionEngine(lexer, std::move(service), config);
+
+ UNIT_ASSERT(!FindPtr(Complete(engine, {""}), TCandidate{Keyword, "FOR"}));
+ UNIT_ASSERT(!FindPtr(Complete(engine, {""}), TCandidate{Keyword, "PARALLEL"}));
+
+ UNIT_ASSERT(FindPtr(Complete(engine, {"EVALUATE "}), TCandidate{Keyword, "FOR"}));
+ UNIT_ASSERT(FindPtr(Complete(engine, {"EVALUATE "}), TCandidate{Keyword, "FOR"}));
+ UNIT_ASSERT(FindPtr(Complete(engine, {"EVALUATE /**/"}), TCandidate{Keyword, "FOR"}));
+ }
+
Y_UNIT_TEST(CachedSchema) {
TLexerSupplier lexer = MakePureLexerSupplier();
diff --git a/yql/essentials/sql/v1/complete/syntax/grammar.cpp b/yql/essentials/sql/v1/complete/syntax/grammar.cpp
index 790f272db86..3c0dd579e4f 100644
--- a/yql/essentials/sql/v1/complete/syntax/grammar.cpp
+++ b/yql/essentials/sql/v1/complete/syntax/grammar.cpp
@@ -34,6 +34,12 @@ namespace NSQLComplete {
return Parser_->getRuleNames().at(rule);
}
+ TTokenId GetTokenId(std::string_view symbolized) const override {
+ TTokenId type = Parser_->getTokenType(symbolized);
+ Y_ENSURE(type != antlr4::Token::INVALID_TYPE, "Not found " << symbolized);
+ return type;
+ }
+
TRuleId GetRuleId(std::string_view symbolized) const override {
TRuleId index = Parser_->getRuleIndex(std::string(symbolized));
if (index == INVALID_INDEX) {
diff --git a/yql/essentials/sql/v1/complete/syntax/grammar.h b/yql/essentials/sql/v1/complete/syntax/grammar.h
index 29b81a78984..7e6bb398439 100644
--- a/yql/essentials/sql/v1/complete/syntax/grammar.h
+++ b/yql/essentials/sql/v1/complete/syntax/grammar.h
@@ -22,6 +22,7 @@ namespace NSQLComplete {
virtual ~ISqlGrammar() = default;
virtual const antlr4::dfa::Vocabulary& GetVocabulary() const = 0;
virtual const std::string& SymbolizedRule(TRuleId rule) const = 0;
+ virtual TTokenId GetTokenId(std::string_view symbolized) const = 0;
virtual TRuleId GetRuleId(std::string_view symbolized) const = 0;
virtual const std::vector<std::string>& GetAllRules() const = 0;
virtual const std::unordered_set<TTokenId>& GetAllTokens() const = 0;