diff options
author | vitya-smirnov <[email protected]> | 2025-06-17 16:02:41 +0300 |
---|---|---|
committer | vitya-smirnov <[email protected]> | 2025-06-17 16:18:59 +0300 |
commit | 0bc6f0197fa813fec678c75c760564724f591309 (patch) | |
tree | bb40ca81a21ddb608154e56967202c0548708551 /yql/essentials | |
parent | d84e9cf123d8345dc008b9b0feafbb20be9d9784 (diff) |
YQL-19747: Support token ignoring patterns
Added support for token filtration. It is needed because YQL grammar supports multiple systems with various capabilities. For example, some of those do not support `INTERSECT ALL` and therefore `ALL` after `INTERSECT` should be *disabled*. Another example is a support of an only `EVALUATE FOR`, so `FOR` is *forced* to be precendenced by `EVALUATE`.
commit_hash:a926c8d3911f8dd025ab58eb7f81e2370fcd9376
Diffstat (limited to 'yql/essentials')
-rw-r--r-- | yql/essentials/sql/v1/complete/analysis/global/global.h | 1 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/analysis/local/local.cpp | 49 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/analysis/local/local.h | 6 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/antlr4/c3i.h | 2 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/antlr4/c3t.h | 51 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/sql_complete.cpp | 32 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/sql_complete.h | 16 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/sql_complete_ut.cpp | 18 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/syntax/grammar.cpp | 6 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/syntax/grammar.h | 1 |
10 files changed, 161 insertions, 21 deletions
diff --git a/yql/essentials/sql/v1/complete/analysis/global/global.h b/yql/essentials/sql/v1/complete/analysis/global/global.h index fe929bf77c7..1ef1344e3c9 100644 --- a/yql/essentials/sql/v1/complete/analysis/global/global.h +++ b/yql/essentials/sql/v1/complete/analysis/global/global.h @@ -29,6 +29,7 @@ namespace NSQLComplete { TMaybe<TColumnContext> Column; }; + // TODO(YQL-19747): Make it thread-safe to make ISqlCompletionEngine thread-safe. class IGlobalAnalysis { public: using TPtr = THolder<IGlobalAnalysis>; diff --git a/yql/essentials/sql/v1/complete/analysis/local/local.cpp b/yql/essentials/sql/v1/complete/analysis/local/local.cpp index 1b732ae5db7..13e2fce69ad 100644 --- a/yql/essentials/sql/v1/complete/analysis/local/local.cpp +++ b/yql/essentials/sql/v1/complete/analysis/local/local.cpp @@ -51,10 +51,13 @@ namespace NSQLComplete { public: TSpecializedLocalSyntaxAnalysis( - TLexerSupplier lexer, const THashSet<TString>& IgnoredRules) + TLexerSupplier lexer, + const THashSet<TString>& ignoredRules, + const THashMap<TString, THashSet<TString>>& disabledPreviousByToken, + const THashMap<TString, THashSet<TString>>& forcedPreviousByToken) : Grammar_(&GetSqlGrammar()) , Lexer_(lexer(/* ansi = */ IsAnsiLexer)) - , C3_(ComputeC3Config(IgnoredRules)) + , C3_(ComputeC3Config(ignoredRules, disabledPreviousByToken, forcedPreviousByToken)) { } @@ -111,11 +114,16 @@ namespace NSQLComplete { } private: - IC3Engine::TConfig ComputeC3Config(const THashSet<TString>& IgnoredRules) const { + IC3Engine::TConfig ComputeC3Config( + const THashSet<TString>& ignoredRules, + const THashMap<TString, THashSet<TString>>& disabledPreviousByToken, + const THashMap<TString, THashSet<TString>>& forcedPreviousByToken) const { return { .IgnoredTokens = ComputeIgnoredTokens(), .PreferredRules = ComputePreferredRules(), - .IgnoredRules = ComputeIgnoredRules(IgnoredRules), + .IgnoredRules = ComputeIgnoredRules(ignoredRules), + .DisabledPreviousByToken = Resolved(disabledPreviousByToken), + .ForcedPreviousByToken = Resolved(forcedPreviousByToken), }; } @@ -143,6 +151,23 @@ namespace NSQLComplete { return ignored; } + std::unordered_map<TTokenId, std::unordered_set<TTokenId>> + Resolved(const THashMap<TString, THashSet<TString>>& tokens) const { + std::unordered_map<TTokenId, std::unordered_set<TTokenId>> resolved; + for (const auto& [name, set] : tokens) { + resolved[Grammar_->GetTokenId(name)] = Resolved(set); + } + return resolved; + } + + std::unordered_set<TTokenId> Resolved(const THashSet<TString>& tokens) const { + std::unordered_set<TTokenId> resolved; + for (const TString& name : tokens) { + resolved.emplace(Grammar_->GetTokenId(name)); + } + return resolved; + } + TC3Candidates C3Complete(TCompletionInput statement, const TCursorTokenContext& context) { auto enclosing = context.Enclosing(); @@ -336,9 +361,12 @@ namespace NSQLComplete { class TLocalSyntaxAnalysis: public ILocalSyntaxAnalysis { public: TLocalSyntaxAnalysis( - TLexerSupplier lexer, const THashSet<TString>& IgnoredRules) - : DefaultEngine_(lexer, IgnoredRules) - , AnsiEngine_(lexer, IgnoredRules) + TLexerSupplier lexer, + const THashSet<TString>& ignoredRules, + const THashMap<TString, THashSet<TString>>& disabledPreviousByToken, + const THashMap<TString, THashSet<TString>>& forcedPreviousByToken) + : DefaultEngine_(lexer, ignoredRules, disabledPreviousByToken, forcedPreviousByToken) + , AnsiEngine_(lexer, ignoredRules, disabledPreviousByToken, forcedPreviousByToken) { } @@ -361,8 +389,11 @@ namespace NSQLComplete { }; ILocalSyntaxAnalysis::TPtr MakeLocalSyntaxAnalysis( - TLexerSupplier lexer, const THashSet<TString>& IgnoredRules) { - return MakeHolder<TLocalSyntaxAnalysis>(lexer, IgnoredRules); + TLexerSupplier lexer, + const THashSet<TString>& ignoredRules, + const THashMap<TString, THashSet<TString>>& disabledPreviousByToken, + const THashMap<TString, THashSet<TString>>& forcedPreviousByToken) { + return MakeHolder<TLocalSyntaxAnalysis>(lexer, ignoredRules, disabledPreviousByToken, forcedPreviousByToken); } } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/analysis/local/local.h b/yql/essentials/sql/v1/complete/analysis/local/local.h index 6cf6fc33c51..cca182aacdb 100644 --- a/yql/essentials/sql/v1/complete/analysis/local/local.h +++ b/yql/essentials/sql/v1/complete/analysis/local/local.h @@ -60,6 +60,7 @@ namespace NSQLComplete { TEditRange EditRange; }; + // TODO(YQL-19747): Make it thread-safe to make ISqlCompletionEngine thread-safe. class ILocalSyntaxAnalysis { public: using TPtr = THolder<ILocalSyntaxAnalysis>; @@ -69,6 +70,9 @@ namespace NSQLComplete { }; ILocalSyntaxAnalysis::TPtr MakeLocalSyntaxAnalysis( - TLexerSupplier lexer, const THashSet<TString>& IgnoredRules); + TLexerSupplier lexer, + const THashSet<TString>& ignoredRules, + const THashMap<TString, THashSet<TString>>& disabledPreviousByToken, + const THashMap<TString, THashSet<TString>>& forcedPreviousByToken); } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/antlr4/c3i.h b/yql/essentials/sql/v1/complete/antlr4/c3i.h index 52920882ee0..b3fcaab7ae0 100644 --- a/yql/essentials/sql/v1/complete/antlr4/c3i.h +++ b/yql/essentials/sql/v1/complete/antlr4/c3i.h @@ -37,6 +37,8 @@ namespace NSQLComplete { std::unordered_set<TTokenId> IgnoredTokens; std::unordered_set<TRuleId> PreferredRules; std::unordered_set<TRuleId> IgnoredRules; + std::unordered_map<TTokenId, std::unordered_set<TTokenId>> DisabledPreviousByToken; + std::unordered_map<TTokenId, std::unordered_set<TTokenId>> ForcedPreviousByToken; }; virtual ~IC3Engine() = default; diff --git a/yql/essentials/sql/v1/complete/antlr4/c3t.h b/yql/essentials/sql/v1/complete/antlr4/c3t.h index 2b49068ac8f..d1c80867a59 100644 --- a/yql/essentials/sql/v1/complete/antlr4/c3t.h +++ b/yql/essentials/sql/v1/complete/antlr4/c3t.h @@ -13,6 +13,7 @@ #include <util/generic/fwd.h> #include <util/generic/string.h> #include <util/generic/vector.h> +#include <util/generic/yexception.h> namespace NSQLComplete { @@ -26,6 +27,8 @@ namespace NSQLComplete { , Parser_(&Tokens_) , CompletionCore_(&Parser_) , IgnoredRules_(std::move(config.IgnoredRules)) + , DisabledPreviousByToken_(std::move(config.DisabledPreviousByToken)) + , ForcedPreviousByToken_(std::move(config.ForcedPreviousByToken)) { Lexer_.removeErrorListeners(); Parser_.removeErrorListeners(); @@ -36,15 +39,29 @@ namespace NSQLComplete { for (TRuleId rule : IgnoredRules_) { CompletionCore_.preferredRules.emplace(rule); } + + PurifyForcedTokens(); } TC3Candidates Complete(TStringBuf text, size_t caretTokenIndex) override { Assign(text); auto candidates = CompletionCore_.collectCandidates(caretTokenIndex); - return Converted(std::move(candidates)); + return Converted(std::move(candidates), caretTokenIndex); } private: + void PurifyForcedTokens() { + for (auto it = ForcedPreviousByToken_.begin(); it != ForcedPreviousByToken_.end();) { + const auto& [token, previous] = *it; + if (previous.empty()) { + CompletionCore_.ignoredTokens.emplace(token); + it = ForcedPreviousByToken_.erase(it); + } else { + it = std::next(it); + } + } + } + void Assign(TStringBuf prefix) { Chars_.load(prefix.Data(), prefix.Size(), /* lenient = */ false); Lexer_.reset(); @@ -52,10 +69,14 @@ namespace NSQLComplete { Tokens_.fill(); } - TC3Candidates Converted(c3::CandidatesCollection candidates) const { + TC3Candidates Converted(c3::CandidatesCollection candidates, size_t caretTokenIndex) { TC3Candidates converted; for (auto& [token, following] : candidates.tokens) { + if (IsIgnored(token, caretTokenIndex)) { + continue; + } + converted.Tokens.emplace_back(token, std::move(following)); } @@ -71,17 +92,43 @@ namespace NSQLComplete { return converted; } + bool IsIgnored(TTokenId token, size_t caretTokenIndex) { + auto previous = PreviousToken(caretTokenIndex); + + auto disabled = DisabledPreviousByToken_.find(token); + auto forced = ForcedPreviousByToken_.find(token); + + return (disabled != DisabledPreviousByToken_.end() && disabled->second.contains(previous)) || + (forced != ForcedPreviousByToken_.end() && !forced->second.contains(previous)); + } + bool IsIgnored(TRuleId head, const std::vector<TRuleId> tail) const { return IgnoredRules_.contains(head) || AnyOf(tail, [this](TRuleId r) { return IgnoredRules_.contains(r); }); } + TTokenId PreviousToken(size_t caretTokenIndex) { + ssize_t index = static_cast<ssize_t>(caretTokenIndex) - 1; + while (0 <= index && Tokens_.get(index)->getChannel() == antlr4::Token::HIDDEN_CHANNEL) { + --index; + } + + if (index < 0) { + return antlr4::Token::INVALID_TYPE; + } + + return Tokens_.get(index)->getType(); + } + antlr4::ANTLRInputStream Chars_; G::TLexer Lexer_; antlr4::BufferedTokenStream Tokens_; G::TParser Parser_; c3::CodeCompletionCore CompletionCore_; + std::unordered_set<TRuleId> IgnoredRules_; + std::unordered_map<TTokenId, std::unordered_set<TTokenId>> DisabledPreviousByToken_; + std::unordered_map<TTokenId, std::unordered_set<TTokenId>> ForcedPreviousByToken_; }; } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/sql_complete.cpp b/yql/essentials/sql/v1/complete/sql_complete.cpp index 2f61b1fe4df..2ed153f3ecc 100644 --- a/yql/essentials/sql/v1/complete/sql_complete.cpp +++ b/yql/essentials/sql/v1/complete/sql_complete.cpp @@ -22,7 +22,11 @@ namespace NSQLComplete { INameService::TPtr names, ISqlCompletionEngine::TConfiguration configuration) : Configuration_(std::move(configuration)) - , SyntaxAnalysis_(MakeLocalSyntaxAnalysis(lexer, Configuration_.IgnoredRules)) + , SyntaxAnalysis_(MakeLocalSyntaxAnalysis( + lexer, + Configuration_.IgnoredRules, + Configuration_.DisabledPreviousByToken, + Configuration_.ForcedPreviousByToken)) , GlobalAnalysis_(MakeGlobalAnalysis()) , Names_(std::move(names)) { @@ -272,17 +276,17 @@ namespace NSQLComplete { } ISqlCompletionEngine::TConfiguration MakeYDBConfiguration() { - return { - .IgnoredRules = { - "use_stmt", - "import_stmt", - "export_stmt", - }, + ISqlCompletionEngine::TConfiguration config; + config.IgnoredRules = { + "use_stmt", + "import_stmt", + "export_stmt", }; + return config; } ISqlCompletionEngine::TConfiguration MakeYQLConfiguration() { - return MakeConfiguration(/* allowedStmts = */ { + auto config = MakeConfiguration(/* allowedStmts = */ { "lambda_stmt", "pragma_stmt", "select_stmt", @@ -300,6 +304,18 @@ namespace NSQLComplete { "for_stmt", "values_stmt", }); + + config.DisabledPreviousByToken = {}; + + config.ForcedPreviousByToken = { + {"PARALLEL", {}}, + {"TABLESTORE", {}}, + {"FOR", {"EVALUATE"}}, + {"IF", {"EVALUATE"}}, + {"EXTERNAL", {"USING"}}, + }; + + return config; } ISqlCompletionEngine::TPtr MakeSqlCompletionEngine( diff --git a/yql/essentials/sql/v1/complete/sql_complete.h b/yql/essentials/sql/v1/complete/sql_complete.h index 33ca0c00ac9..90edceacbc4 100644 --- a/yql/essentials/sql/v1/complete/sql_complete.h +++ b/yql/essentials/sql/v1/complete/sql_complete.h @@ -9,13 +9,14 @@ #include <util/generic/string.h> #include <util/generic/vector.h> +#include <util/generic/hash.h> #include <util/generic/hash_set.h> namespace NSQLComplete { struct TCompletedToken { TStringBuf Content; - size_t SourcePosition; + size_t SourcePosition = 0; }; enum class ECandidateKind { @@ -45,18 +46,31 @@ namespace NSQLComplete { TVector<TCandidate> Candidates; }; + // TODO(YQL-19747): Make it thread-safe. class ISqlCompletionEngine { public: using TPtr = THolder<ISqlCompletionEngine>; struct TConfiguration { + friend class TSqlCompletionEngine; + friend ISqlCompletionEngine::TConfiguration MakeYDBConfiguration(); + friend ISqlCompletionEngine::TConfiguration MakeYQLConfiguration(); + friend ISqlCompletionEngine::TConfiguration MakeConfiguration(THashSet<TString> allowedStmts); + + public: size_t Limit = 256; + + private: THashSet<TString> IgnoredRules; + THashMap<TString, THashSet<TString>> DisabledPreviousByToken; + THashMap<TString, THashSet<TString>> ForcedPreviousByToken; }; virtual ~ISqlCompletionEngine() = default; + virtual TCompletion Complete(TCompletionInput input, TEnvironment env = {}) = 0; + virtual NThreading::TFuture<TCompletion> // TODO(YQL-19747): Migrate YDB CLI to `Complete` method CompleteAsync(TCompletionInput input, TEnvironment env = {}) = 0; }; diff --git a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp index d37d34be06b..1f29751d017 100644 --- a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp +++ b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp @@ -1319,6 +1319,24 @@ JOIN yt:$cluster_name.test; UNIT_ASSERT_UNEQUAL(Complete(engine, {"SELE"}).size(), 0); } + Y_UNIT_TEST(IgnoredTokens) { + auto lexer = MakePureLexerSupplier(); + + TNameSet names; + TFrequencyData frequency; + auto service = MakeStaticNameService(names, MakeDefaultRanking(frequency)); + + auto config = MakeYQLConfiguration(); + auto engine = MakeSqlCompletionEngine(lexer, std::move(service), config); + + UNIT_ASSERT(!FindPtr(Complete(engine, {""}), TCandidate{Keyword, "FOR"})); + UNIT_ASSERT(!FindPtr(Complete(engine, {""}), TCandidate{Keyword, "PARALLEL"})); + + UNIT_ASSERT(FindPtr(Complete(engine, {"EVALUATE "}), TCandidate{Keyword, "FOR"})); + UNIT_ASSERT(FindPtr(Complete(engine, {"EVALUATE "}), TCandidate{Keyword, "FOR"})); + UNIT_ASSERT(FindPtr(Complete(engine, {"EVALUATE /**/"}), TCandidate{Keyword, "FOR"})); + } + Y_UNIT_TEST(CachedSchema) { TLexerSupplier lexer = MakePureLexerSupplier(); diff --git a/yql/essentials/sql/v1/complete/syntax/grammar.cpp b/yql/essentials/sql/v1/complete/syntax/grammar.cpp index 790f272db86..3c0dd579e4f 100644 --- a/yql/essentials/sql/v1/complete/syntax/grammar.cpp +++ b/yql/essentials/sql/v1/complete/syntax/grammar.cpp @@ -34,6 +34,12 @@ namespace NSQLComplete { return Parser_->getRuleNames().at(rule); } + TTokenId GetTokenId(std::string_view symbolized) const override { + TTokenId type = Parser_->getTokenType(symbolized); + Y_ENSURE(type != antlr4::Token::INVALID_TYPE, "Not found " << symbolized); + return type; + } + TRuleId GetRuleId(std::string_view symbolized) const override { TRuleId index = Parser_->getRuleIndex(std::string(symbolized)); if (index == INVALID_INDEX) { diff --git a/yql/essentials/sql/v1/complete/syntax/grammar.h b/yql/essentials/sql/v1/complete/syntax/grammar.h index 29b81a78984..7e6bb398439 100644 --- a/yql/essentials/sql/v1/complete/syntax/grammar.h +++ b/yql/essentials/sql/v1/complete/syntax/grammar.h @@ -22,6 +22,7 @@ namespace NSQLComplete { virtual ~ISqlGrammar() = default; virtual const antlr4::dfa::Vocabulary& GetVocabulary() const = 0; virtual const std::string& SymbolizedRule(TRuleId rule) const = 0; + virtual TTokenId GetTokenId(std::string_view symbolized) const = 0; virtual TRuleId GetRuleId(std::string_view symbolized) const = 0; virtual const std::vector<std::string>& GetAllRules() const = 0; virtual const std::unordered_set<TTokenId>& GetAllTokens() const = 0; |