YQL-19747 Complete folder, table and cluster names

--- - Related to `YQL-19747` - On top of https://github.com/ytsaurus/ytsaurus/pull/1253 - Related to https://github.com/ydb-platform/ydb/issues/9056 - Related to https://github.com/vityaman/ydb/issues/14 - Related to https://github.com/vityaman/ydb/issues/35 - Related to https://github.com/vityaman/ydb/issues/40 --- Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1257 commit_hash:0b842abb27184c88b8177beeea29fb1ea86b7a04
author: vityaman <vityaman.dev@yandex.ru> 2025-05-06 15:49:02 +0300
committer: robot-piglet <robot-piglet@yandex-team.com> 2025-05-06 16:04:08 +0300
commit: 9c3fdca51d8ae892c5ad8f6ef92df73fafc09e28 (patch)
tree: 561c599fae4ea29b537a6958b65e1b052548edf2 /yql/essentials/sql/v1/complete/syntax
parent: c131e959456f9f9a4adada5623ce3bae4097a8c1 (diff)
download: ydb-9c3fdca51d8ae892c5ad8f6ef92df73fafc09e28.tar.gz
14 files changed, 489 insertions, 188 deletions
diff --git a/yql/essentials/sql/v1/complete/syntax/cursor_token_context.cpp b/yql/essentials/sql/v1/complete/syntax/cursor_token_context.cpp
new file mode 100644
index 00000000000..33aef36847a
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/syntax/cursor_token_context.cpp
@@ -0,0 +1,160 @@
+#include "cursor_token_context.h"
+
+#include <yql/essentials/core/issue/yql_issue.h>
+#include <yql/essentials/sql/v1/lexer/lexer.h>
+
+namespace NSQLComplete {
+
+    namespace {
+
+        bool Tokenize(ILexer::TPtr& lexer, TCompletionInput input, TParsedTokenList& tokens) {
+            NYql::TIssues issues;
+            if (!NSQLTranslation::Tokenize(
+                    *lexer, TString(input.Text), /* queryName = */ "",
+                    tokens, issues, /* maxErrors = */ 1)) {
+                return false;
+            }
+            return true;
+        }
+
+        TCursor GetCursor(const TParsedTokenList& tokens, size_t cursorPosition) {
+            size_t current = 0;
+            for (size_t i = 0; i < tokens.size() && current < cursorPosition; ++i) {
+                const auto& content = tokens[i].Content;
+
+                current += content.size();
+                if (current < cursorPosition) {
+                    continue;
+                }
+
+                TCursor cursor = {
+                    .PrevTokenIndex = i,
+                    .NextTokenIndex = i,
+                    .Position = cursorPosition,
+                };
+
+                if (current == cursorPosition) {
+                    cursor.NextTokenIndex += 1;
+                }
+
+                return cursor;
+            }
+
+            return {
+                .PrevTokenIndex = Nothing(),
+                .NextTokenIndex = 0,
+                .Position = cursorPosition,
+            };
+        }
+
+        TVector<size_t> GetTokenPositions(const TParsedTokenList& tokens) {
+            TVector<size_t> positions;
+            positions.reserve(tokens.size());
+            size_t pos = 0;
+            for (const auto& token : tokens) {
+                positions.emplace_back(pos);
+                pos += token.Content.size();
+            }
+            return positions;
+        }
+
+    } // namespace
+
+    bool TRichParsedToken::IsLiteral() const {
+        return Base->Name == "STRING_VALUE" ||
+               Base->Name == "DIGIGTS" ||
+               Base->Name == "INTEGER_VALUE" ||
+               Base->Name == "REAL";
+    }
+
+    TRichParsedToken TokenAt(const TCursorTokenContext& context, size_t index) {
+        return {
+            .Base = &context.Tokens.at(index),
+            .Index = index,
+            .Position = context.TokenPositions.at(index),
+        };
+    }
+
+    TMaybe<TRichParsedToken> TCursorTokenContext::Enclosing() const {
+        if (Tokens.size() == 1) {
+            Y_ENSURE(Tokens[0].Name == "EOF");
+            return Nothing();
+        }
+
+        if (Cursor.PrevTokenIndex.Empty()) {
+            return Nothing();
+        }
+
+        auto token = TokenAt(*this, *Cursor.PrevTokenIndex);
+        if (Cursor.PrevTokenIndex == Cursor.NextTokenIndex ||
+            !IsWordBoundary(token.Base->Content.back())) {
+            return token;
+        }
+
+        return Nothing();
+    }
+
+    TMaybe<TRichParsedToken> TCursorTokenContext::MatchCursorPrefix(const TVector<TStringBuf>& pattern) const {
+        const auto prefix = std::span{Tokens.begin(), Cursor.NextTokenIndex};
+        if (prefix.size() < pattern.size()) {
+            return Nothing();
+        }
+
+        ssize_t i = static_cast<ssize_t>(prefix.size()) - 1;
+        ssize_t j = static_cast<ssize_t>(pattern.size()) - 1;
+        for (; 0 <= j; --i, --j) {
+            if (!pattern[j].empty() && prefix[i].Name != pattern[j]) {
+                return Nothing();
+            }
+        }
+        return TokenAt(*this, prefix.size() - pattern.size());
+    }
+
+    bool GetStatement(
+        ILexer::TPtr& lexer,
+        TCompletionInput input,
+        TCompletionInput& output,
+        size_t& output_position) {
+        TVector<TString> statements;
+        NYql::TIssues issues;
+        if (!NSQLTranslationV1::SplitQueryToStatements(
+                TString(input.Text) + ";", lexer,
+                statements, issues, /* file = */ "",
+                /* areBlankSkipped = */ false)) {
+            return false;
+        }
+
+        size_t& cursor = output_position;
+        cursor = 0;
+        for (const auto& statement : statements) {
+            if (input.CursorPosition < cursor + statement.size()) {
+                output = {
+                    .Text = input.Text.SubStr(cursor, statement.size()),
+                    .CursorPosition = input.CursorPosition - cursor,
+                };
+                return true;
+            }
+            cursor += statement.size();
+        }
+
+        output = input;
+        return true;
+    }
+
+    bool GetCursorTokenContext(ILexer::TPtr& lexer, TCompletionInput input, TCursorTokenContext& context) {
+        TParsedTokenList tokens;
+        if (!Tokenize(lexer, input, tokens)) {
+            return false;
+        }
+
+        TVector<size_t> positions = GetTokenPositions(tokens);
+        TCursor cursor = GetCursor(tokens, input.CursorPosition);
+        context = {
+            .Tokens = std::move(tokens),
+            .TokenPositions = std::move(positions),
+            .Cursor = cursor,
+        };
+        return true;
+    }
+
+} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/syntax/cursor_token_context.h b/yql/essentials/sql/v1/complete/syntax/cursor_token_context.h
new file mode 100644
index 00000000000..35d22231e35
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/syntax/cursor_token_context.h
@@ -0,0 +1,50 @@
+#pragma once
+
+#include <yql/essentials/sql/v1/complete/core/input.h>
+#include <yql/essentials/sql/v1/complete/text/word.h>
+
+#include <yql/essentials/parser/lexer_common/lexer.h>
+
+#include <util/generic/maybe.h>
+
+namespace NSQLComplete {
+
+    using NSQLTranslation::ILexer;
+    using NSQLTranslation::TParsedToken;
+    using NSQLTranslation::TParsedTokenList;
+
+    struct TCursor {
+        TMaybe<size_t> PrevTokenIndex = Nothing();
+        size_t NextTokenIndex = PrevTokenIndex ? *PrevTokenIndex : 0;
+        size_t Position = 0;
+    };
+
+    struct TRichParsedToken {
+        const TParsedToken* Base = nullptr;
+        size_t Index = 0;
+        size_t Position = 0;
+
+        bool IsLiteral() const;
+    };
+
+    struct TCursorTokenContext {
+        TParsedTokenList Tokens;
+        TVector<size_t> TokenPositions;
+        TCursor Cursor;
+
+        TMaybe<TRichParsedToken> Enclosing() const;
+        TMaybe<TRichParsedToken> MatchCursorPrefix(const TVector<TStringBuf>& pattern) const;
+    };
+
+    bool GetStatement(
+        ILexer::TPtr& lexer,
+        TCompletionInput input,
+        TCompletionInput& output,
+        size_t& output_position);
+
+    bool GetCursorTokenContext(
+        ILexer::TPtr& lexer,
+        TCompletionInput input,
+        TCursorTokenContext& context);
+
+} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/syntax/cursor_token_context_ut.cpp b/yql/essentials/sql/v1/complete/syntax/cursor_token_context_ut.cpp
new file mode 100644
index 00000000000..0e275cca3b8
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/syntax/cursor_token_context_ut.cpp
@@ -0,0 +1,50 @@
+#include "cursor_token_context.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h>
+#include <yql/essentials/sql/v1/lexer/lexer.h>
+
+using namespace NSQLComplete;
+
+Y_UNIT_TEST_SUITE(CursorTokenContextTests) {
+
+    NSQLTranslation::ILexer::TPtr MakeLexer() {
+        NSQLTranslationV1::TLexers lexers;
+        lexers.Antlr4Pure = NSQLTranslationV1::MakeAntlr4PureLexerFactory();
+        return NSQLTranslationV1::MakeLexer(
+            lexers, /* ansi = */ false, /* antlr4 = */ true,
+            NSQLTranslationV1::ELexerFlavor::Pure);
+    }
+
+    TCursorTokenContext Context(TString input) {
+        auto lexer = MakeLexer();
+        TCursorTokenContext context;
+        UNIT_ASSERT(GetCursorTokenContext(lexer, SharpedInput(input), context));
+        return context;
+    }
+
+    Y_UNIT_TEST(Empty) {
+        auto context = Context("");
+        UNIT_ASSERT(context.Cursor.PrevTokenIndex.Empty());
+        UNIT_ASSERT_VALUES_EQUAL(context.Cursor.NextTokenIndex, 0);
+        UNIT_ASSERT_VALUES_EQUAL(context.Cursor.Position, 0);
+        UNIT_ASSERT(context.Enclosing().Empty());
+    }
+
+    Y_UNIT_TEST(Blank) {
+        UNIT_ASSERT(Context("# ").Enclosing().Empty());
+        UNIT_ASSERT(Context(" #").Enclosing().Empty());
+        UNIT_ASSERT(Context(" # ").Enclosing().Empty());
+    }
+
+    Y_UNIT_TEST(Enclosing) {
+        UNIT_ASSERT(Context("se#").Enclosing().Defined());
+        UNIT_ASSERT(Context("#se").Enclosing().Empty());
+        UNIT_ASSERT(Context("`se`#").Enclosing().Empty());
+        UNIT_ASSERT(Context("#`se`").Enclosing().Empty());
+        UNIT_ASSERT(Context("`se`#`se`").Enclosing().Defined());
+        UNIT_ASSERT(Context("\"se\"#\"se\"").Enclosing().Empty());
+    }
+
+} // Y_UNIT_TEST_SUITE(CursorTokenContextTests)
diff --git a/yql/essentials/sql/v1/complete/syntax/format.cpp b/yql/essentials/sql/v1/complete/syntax/format.cpp
index 1c9f146c923..43c36aea9dd 100644
--- a/yql/essentials/sql/v1/complete/syntax/format.cpp
+++ b/yql/essentials/sql/v1/complete/syntax/format.cpp
@@ -35,4 +35,17 @@ namespace NSQLComplete {
         return text;
     }
 
+    TString Quoted(TString content) {
+        content.prepend('`');
+        content.append('`');
+        return content;
+    }
+
+    TString Unquoted(TString content) {
+        Y_ENSURE(2 <= content.size() && content.front() == '`' && content.back() == '`');
+        content.erase(0, 1);
+        content.pop_back();
+        return content;
+    }
+
 } // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/syntax/format.h b/yql/essentials/sql/v1/complete/syntax/format.h
index 6c2f1b72ac2..58e5d1f1e4a 100644
--- a/yql/essentials/sql/v1/complete/syntax/format.h
+++ b/yql/essentials/sql/v1/complete/syntax/format.h
@@ -6,5 +6,7 @@
 namespace NSQLComplete {
 
     TString FormatKeywords(const TVector<TString>& seq);
+    TString Quoted(TString content);
+    TString Unquoted(TString content);
 
 } // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/syntax/grammar.cpp b/yql/essentials/sql/v1/complete/syntax/grammar.cpp
index 252deaf682c..c080fae5ae4 100644
--- a/yql/essentials/sql/v1/complete/syntax/grammar.cpp
+++ b/yql/essentials/sql/v1/complete/syntax/grammar.cpp
@@ -7,31 +7,31 @@ namespace NSQLComplete {
     class TSqlGrammar: public ISqlGrammar {
     public:
         TSqlGrammar(const NSQLReflect::TLexerGrammar& grammar)
-            : Parser(MakeDummyParser())
-            , AllTokens(ComputeAllTokens())
-            , KeywordTokens(ComputeKeywordTokens(grammar))
-            , PunctuationTokens(ComputePunctuationTokens(grammar))
+            : Parser_(MakeDummyParser())
+            , AllTokens_(ComputeAllTokens())
+            , KeywordTokens_(ComputeKeywordTokens(grammar))
+            , PunctuationTokens_(ComputePunctuationTokens(grammar))
         {
         }
 
         const antlr4::dfa::Vocabulary& GetVocabulary() const override {
-            return Parser->getVocabulary();
+            return Parser_->getVocabulary();
         }
 
         const std::unordered_set<TTokenId>& GetAllTokens() const override {
-            return AllTokens;
+            return AllTokens_;
         }
 
         const std::unordered_set<TTokenId>& GetKeywordTokens() const override {
-            return KeywordTokens;
+            return KeywordTokens_;
         }
 
         const std::unordered_set<TTokenId>& GetPunctuationTokens() const override {
-            return PunctuationTokens;
+            return PunctuationTokens_;
         }
 
         const std::string& SymbolizedRule(TRuleId rule) const override {
-            return Parser->getRuleNames().at(rule);
+            return Parser_->getRuleNames().at(rule);
         }
 
     private:
@@ -76,10 +76,10 @@ namespace NSQLComplete {
             return punctuationTokens;
         }
 
-        const THolder<antlr4::Parser> Parser;
-        const std::unordered_set<TTokenId> AllTokens;
-        const std::unordered_set<TTokenId> KeywordTokens;
-        const std::unordered_set<TTokenId> PunctuationTokens;
+        const THolder<antlr4::Parser> Parser_;
+        const std::unordered_set<TTokenId> AllTokens_;
+        const std::unordered_set<TTokenId> KeywordTokens_;
+        const std::unordered_set<TTokenId> PunctuationTokens_;
     };
 
     const ISqlGrammar& GetSqlGrammar() {
diff --git a/yql/essentials/sql/v1/complete/syntax/local.cpp b/yql/essentials/sql/v1/complete/syntax/local.cpp
index c434fa28daf..549208d4cab 100644
--- a/yql/essentials/sql/v1/complete/syntax/local.cpp
+++ b/yql/essentials/sql/v1/complete/syntax/local.cpp
@@ -1,9 +1,10 @@
 #include "local.h"
 
 #include "ansi.h"
+#include "cursor_token_context.h"
+#include "format.h"
 #include "grammar.h"
 #include "parser_call_stack.h"
-#include "token.h"
 
 #include <yql/essentials/sql/v1/complete/antlr4/c3i.h>
 #include <yql/essentials/sql/v1/complete/antlr4/c3t.h>
@@ -49,65 +50,77 @@ namespace NSQLComplete {
 
     public:
         explicit TSpecializedLocalSyntaxAnalysis(TLexerSupplier lexer)
-            : Grammar(&GetSqlGrammar())
+            : Grammar_(&GetSqlGrammar())
             , Lexer_(lexer(/* ansi = */ IsAnsiLexer))
-            , C3(ComputeC3Config())
+            , C3_(ComputeC3Config())
         {
         }
 
         TLocalSyntaxContext Analyze(TCompletionInput input) override {
             TCompletionInput statement;
-            if (!GetStatement(Lexer_, input, statement)) {
+            size_t statement_position;
+            if (!GetStatement(Lexer_, input, statement, statement_position)) {
                 return {};
             }
 
-            auto candidates = C3.Complete(statement);
-
-            TParsedTokenList tokens;
-            TCaretTokenPosition caret;
-            if (!TokenizePrefix(statement, tokens, caret)) {
+            TCursorTokenContext context;
+            if (!GetCursorTokenContext(Lexer_, statement, context)) {
                 return {};
             }
 
-            if (IsCaretEnslosed(tokens, caret)) {
-                return {};
+            TC3Candidates candidates = C3_.Complete(statement);
+
+            TLocalSyntaxContext result;
+
+            result.EditRange = EditRange(context);
+            result.EditRange.Begin += statement_position;
+
+            if (auto enclosing = context.Enclosing()) {
+                if (enclosing->IsLiteral()) {
+                    return result;
+                } else if (enclosing->Base->Name == "ID_QUOTED") {
+                    result.Object = ObjectMatch(context, candidates);
+                    return result;
+                }
             }
 
-            return {
-                .Keywords = SiftedKeywords(candidates),
-                .Pragma = PragmaMatch(tokens, candidates),
-                .IsTypeName = IsTypeNameMatched(candidates),
-                .Function = FunctionMatch(tokens, candidates),
-                .Hint = HintMatch(candidates),
-            };
+            result.Keywords = SiftedKeywords(candidates);
+            result.Pragma = PragmaMatch(context, candidates);
+            result.Type = TypeMatch(candidates);
+            result.Function = FunctionMatch(context, candidates);
+            result.Hint = HintMatch(candidates);
+            result.Object = ObjectMatch(context, candidates);
+            result.Cluster = ClusterMatch(context, candidates);
+
+            return result;
         }
 
     private:
-        IC3Engine::TConfig ComputeC3Config() {
+        IC3Engine::TConfig ComputeC3Config() const {
             return {
                 .IgnoredTokens = ComputeIgnoredTokens(),
                 .PreferredRules = ComputePreferredRules(),
             };
         }
 
-        std::unordered_set<TTokenId> ComputeIgnoredTokens() {
-            auto ignoredTokens = Grammar->GetAllTokens();
-            for (auto keywordToken : Grammar->GetKeywordTokens()) {
+        std::unordered_set<TTokenId> ComputeIgnoredTokens() const {
+            auto ignoredTokens = Grammar_->GetAllTokens();
+            for (auto keywordToken : Grammar_->GetKeywordTokens()) {
                 ignoredTokens.erase(keywordToken);
             }
-            for (auto punctuationToken : Grammar->GetPunctuationTokens()) {
+            for (auto punctuationToken : Grammar_->GetPunctuationTokens()) {
                 ignoredTokens.erase(punctuationToken);
             }
             return ignoredTokens;
         }
 
-        std::unordered_set<TRuleId> ComputePreferredRules() {
+        std::unordered_set<TRuleId> ComputePreferredRules() const {
             return GetC3PreferredRules();
         }
 
-        TLocalSyntaxContext::TKeywords SiftedKeywords(const TC3Candidates& candidates) {
-            const auto& vocabulary = Grammar->GetVocabulary();
-            const auto& keywordTokens = Grammar->GetKeywordTokens();
+        TLocalSyntaxContext::TKeywords SiftedKeywords(const TC3Candidates& candidates) const {
+            const auto& vocabulary = Grammar_->GetVocabulary();
+            const auto& keywordTokens = Grammar_->GetKeywordTokens();
 
             TLocalSyntaxContext::TKeywords keywords;
             for (const auto& token : candidates.Tokens) {
@@ -122,40 +135,41 @@ namespace NSQLComplete {
         }
 
         TMaybe<TLocalSyntaxContext::TPragma> PragmaMatch(
-            const TParsedTokenList& tokens, const TC3Candidates& candidates) {
+            const TCursorTokenContext& context, const TC3Candidates& candidates) const {
             if (!AnyOf(candidates.Rules, RuleAdapted(IsLikelyPragmaStack))) {
                 return Nothing();
             }
 
             TLocalSyntaxContext::TPragma pragma;
-            if (EndsWith(tokens, {"ID_PLAIN", "DOT"})) {
-                pragma.Namespace = tokens[tokens.size() - 2].Content;
-            } else if (EndsWith(tokens, {"ID_PLAIN", "DOT", ""})) {
-                pragma.Namespace = tokens[tokens.size() - 3].Content;
+
+            if (TMaybe<TRichParsedToken> begin;
+                (begin = context.MatchCursorPrefix({"ID_PLAIN", "DOT"})) ||
+                (begin = context.MatchCursorPrefix({"ID_PLAIN", "DOT", ""}))) {
+                pragma.Namespace = begin->Base->Content;
             }
             return pragma;
         }
 
-        bool IsTypeNameMatched(const TC3Candidates& candidates) {
+        bool TypeMatch(const TC3Candidates& candidates) const {
             return AnyOf(candidates.Rules, RuleAdapted(IsLikelyTypeStack));
         }
 
         TMaybe<TLocalSyntaxContext::TFunction> FunctionMatch(
-            const TParsedTokenList& tokens, const TC3Candidates& candidates) {
+            const TCursorTokenContext& context, const TC3Candidates& candidates) const {
             if (!AnyOf(candidates.Rules, RuleAdapted(IsLikelyFunctionStack))) {
                 return Nothing();
             }
 
             TLocalSyntaxContext::TFunction function;
-            if (EndsWith(tokens, {"ID_PLAIN", "NAMESPACE"})) {
-                function.Namespace = tokens[tokens.size() - 2].Content;
-            } else if (EndsWith(tokens, {"ID_PLAIN", "NAMESPACE", ""})) {
-                function.Namespace = tokens[tokens.size() - 3].Content;
+            if (TMaybe<TRichParsedToken> begin;
+                (begin = context.MatchCursorPrefix({"ID_PLAIN", "NAMESPACE"})) ||
+                (begin = context.MatchCursorPrefix({"ID_PLAIN", "NAMESPACE", ""}))) {
+                function.Namespace = begin->Base->Content;
             }
             return function;
         }
 
-        TMaybe<TLocalSyntaxContext::THint> HintMatch(const TC3Candidates& candidates) {
+        TMaybe<TLocalSyntaxContext::THint> HintMatch(const TC3Candidates& candidates) const {
             // TODO(YQL-19747): detect local contexts with a single iteration through the candidates.Rules
             auto rule = FindIf(candidates.Rules, RuleAdapted(IsLikelyHintStack));
             if (rule == std::end(candidates.Rules)) {
@@ -172,45 +186,103 @@ namespace NSQLComplete {
             };
         }
 
-        bool TokenizePrefix(TCompletionInput input, TParsedTokenList& tokens, TCaretTokenPosition& caret) {
-            NYql::TIssues issues;
-            if (!NSQLTranslation::Tokenize(
-                    *Lexer_, TString(input.Text), /* queryName = */ "",
-                    tokens, issues, /* maxErrors = */ 1)) {
-                return false;
+        TMaybe<TLocalSyntaxContext::TObject> ObjectMatch(
+            const TCursorTokenContext& context, const TC3Candidates& candidates) const {
+            TLocalSyntaxContext::TObject object;
+
+            if (AnyOf(candidates.Rules, RuleAdapted(IsLikelyObjectRefStack))) {
+                object.Kinds.emplace(EObjectKind::Folder);
+            }
+
+            if (AnyOf(candidates.Rules, RuleAdapted(IsLikelyExistingTableStack))) {
+                object.Kinds.emplace(EObjectKind::Folder);
+                object.Kinds.emplace(EObjectKind::Table);
+            }
+
+            if (object.Kinds.empty()) {
+                return Nothing();
             }
 
-            Y_ENSURE(!tokens.empty() && tokens.back().Name == "EOF");
-            tokens.pop_back();
+            if (TMaybe<TRichParsedToken> begin;
+                (begin = context.MatchCursorPrefix({"ID_PLAIN", "DOT"})) ||
+                (begin = context.MatchCursorPrefix({"ID_PLAIN", "DOT", ""}))) {
+                object.Cluster = begin->Base->Content;
+            }
 
-            caret = CaretTokenPosition(tokens, input.CursorPosition);
-            tokens.crop(caret.NextTokenIndex + 1);
-            return true;
+            if (TMaybe<TRichParsedToken> begin;
+                (begin = context.MatchCursorPrefix({"ID_PLAIN", "COLON", "ID_PLAIN", "DOT"})) ||
+                (begin = context.MatchCursorPrefix({"ID_PLAIN", "COLON", "ID_PLAIN", "DOT", ""}))) {
+                object.Provider = begin->Base->Content;
+            }
+
+            if (auto path = ObjectPath(context)) {
+                object.Path = *path;
+                object.IsEnclosed = true;
+            }
+
+            return object;
+        }
+
+        TMaybe<TString> ObjectPath(const TCursorTokenContext& context) const {
+            if (auto enclosing = context.Enclosing()) {
+                TString path = enclosing->Base->Content;
+                if (enclosing->Base->Name == "ID_QUOTED") {
+                    path = Unquoted(std::move(path));
+                }
+                path.resize(context.Cursor.Position - enclosing->Position - 1);
+                return path;
+            }
+            return Nothing();
         }
 
-        bool IsCaretEnslosed(const TParsedTokenList& tokens, TCaretTokenPosition caret) {
-            if (tokens.empty() || caret.PrevTokenIndex != caret.NextTokenIndex) {
-                return false;
+        TMaybe<TLocalSyntaxContext::TCluster> ClusterMatch(
+            const TCursorTokenContext& context, const TC3Candidates& candidates) const {
+            if (!AnyOf(candidates.Rules, RuleAdapted(IsLikelyClusterStack))) {
+                return Nothing();
             }
 
-            const auto& token = tokens.back();
-            return token.Name == "STRING_VALUE" ||
-                   token.Name == "ID_QUOTED" ||
-                   token.Name == "DIGIGTS" ||
-                   token.Name == "INTEGER_VALUE" ||
-                   token.Name == "REAL";
+            TLocalSyntaxContext::TCluster cluster;
+            if (TMaybe<TRichParsedToken> begin;
+                (begin = context.MatchCursorPrefix({"ID_PLAIN", "COLON"})) ||
+                (begin = context.MatchCursorPrefix({"ID_PLAIN", "COLON", ""}))) {
+                cluster.Provider = begin->Base->Content;
+            }
+            return cluster;
+        }
+
+        TEditRange EditRange(const TCursorTokenContext& context) const {
+            if (auto enclosing = context.Enclosing()) {
+                return EditRange(*enclosing, context.Cursor);
+            }
+
+            return {
+                .Begin = context.Cursor.Position,
+                .Length = 0,
+            };
+        }
+
+        TEditRange EditRange(const TRichParsedToken& token, const TCursor& cursor) const {
+            size_t begin = token.Position;
+            if (token.Base->Name == "NOT_EQUALS2") {
+                begin += 1;
+            }
+
+            return {
+                .Begin = begin,
+                .Length = cursor.Position - begin,
+            };
         }
 
-        const ISqlGrammar* Grammar;
+        const ISqlGrammar* Grammar_;
         NSQLTranslation::ILexer::TPtr Lexer_;
-        TC3Engine<G> C3;
+        TC3Engine<G> C3_;
     };
 
     class TLocalSyntaxAnalysis: public ILocalSyntaxAnalysis {
     public:
         explicit TLocalSyntaxAnalysis(TLexerSupplier lexer)
-            : DefaultEngine(lexer)
-            , AnsiEngine(lexer)
+            : DefaultEngine_(lexer)
+            , AnsiEngine_(lexer)
         {
         }
 
@@ -223,13 +295,13 @@ namespace NSQLComplete {
     private:
         ILocalSyntaxAnalysis& GetSpecializedEngine(bool isAnsiLexer) {
             if (isAnsiLexer) {
-                return AnsiEngine;
+                return AnsiEngine_;
             }
-            return DefaultEngine;
+            return DefaultEngine_;
         }
 
-        TSpecializedLocalSyntaxAnalysis</* IsAnsiLexer = */ false> DefaultEngine;
-        TSpecializedLocalSyntaxAnalysis</* IsAnsiLexer = */ true> AnsiEngine;
+        TSpecializedLocalSyntaxAnalysis</* IsAnsiLexer = */ false> DefaultEngine_;
+        TSpecializedLocalSyntaxAnalysis</* IsAnsiLexer = */ true> AnsiEngine_;
     };
 
     ILocalSyntaxAnalysis::TPtr MakeLocalSyntaxAnalysis(TLexerSupplier lexer) {
diff --git a/yql/essentials/sql/v1/complete/syntax/local.h b/yql/essentials/sql/v1/complete/syntax/local.h
index d58b62c62cd..8f88d5aa71c 100644
--- a/yql/essentials/sql/v1/complete/syntax/local.h
+++ b/yql/essentials/sql/v1/complete/syntax/local.h
@@ -1,15 +1,22 @@
 #pragma once
 
+#include <yql/essentials/sql/v1/complete/core/name.h>
 #include <yql/essentials/sql/v1/complete/sql_complete.h>
 
 #include <yql/essentials/sql/v1/lexer/lexer.h>
 
 #include <util/generic/string.h>
 #include <util/generic/hash.h>
+#include <util/generic/hash_set.h>
 #include <util/generic/maybe.h>
 
 namespace NSQLComplete {
 
+    struct TEditRange {
+        size_t Begin = 0;
+        size_t Length = 0;
+    };
+
     struct TLocalSyntaxContext {
         using TKeywords = THashMap<TString, TVector<TString>>;
 
@@ -25,11 +32,26 @@ namespace NSQLComplete {
             EStatementKind StatementKind;
         };
 
+        struct TCluster {
+            TString Provider;
+        };
+
+        struct TObject {
+            TString Provider;
+            TString Cluster;
+            TString Path;
+            THashSet<EObjectKind> Kinds;
+            bool IsEnclosed = false;
+        };
+
         TKeywords Keywords;
         TMaybe<TPragma> Pragma;
-        bool IsTypeName = false;
+        bool Type = false;
         TMaybe<TFunction> Function;
         TMaybe<THint> Hint;
+        TMaybe<TObject> Object;
+        TMaybe<TCluster> Cluster;
+        TEditRange EditRange;
     };
 
     class ILocalSyntaxAnalysis {
diff --git a/yql/essentials/sql/v1/complete/syntax/parser_call_stack.cpp b/yql/essentials/sql/v1/complete/syntax/parser_call_stack.cpp
index 938483438b1..ce6c94306d4 100644
--- a/yql/essentials/sql/v1/complete/syntax/parser_call_stack.cpp
+++ b/yql/essentials/sql/v1/complete/syntax/parser_call_stack.cpp
@@ -13,7 +13,7 @@
 
 namespace NSQLComplete {
 
-    const TVector<TRuleId> KeywordRules = {
+    const TVector<TRuleId> PreferredRules = {
         RULE(Keyword),
         RULE(Keyword_expr_uncompat),
         RULE(Keyword_table_uncompat),
@@ -24,27 +24,13 @@ namespace NSQLComplete {
         RULE(Keyword_hint_uncompat),
         RULE(Keyword_as_compat),
         RULE(Keyword_compat),
-    };
-
-    const TVector<TRuleId> PragmaNameRules = {
-        RULE(Opt_id_prefix_or_type),
-        RULE(An_id),
-    };
-
-    const TVector<TRuleId> TypeNameRules = {
-        RULE(Type_name_simple),
         RULE(An_id_or_type),
-    };
-
-    const TVector<TRuleId> FunctionNameRules = {
+        RULE(An_id),
         RULE(Id_expr),
-        RULE(An_id_or_type),
         RULE(Id_or_type),
-    };
-
-    const TVector<TRuleId> HintNameRules = {
         RULE(Id_hint),
-        RULE(An_id),
+        RULE(Opt_id_prefix_or_type),
+        RULE(Type_name_simple),
     };
 
     TVector<std::string> Symbolized(const TParserCallStack& stack) {
@@ -101,6 +87,26 @@ namespace NSQLComplete {
                Contains({RULE(External_call_param), RULE(An_id)}, stack);
     }
 
+    bool IsLikelyObjectRefStack(const TParserCallStack& stack) {
+        return Contains({RULE(Object_ref)}, stack);
+    }
+
+    bool IsLikelyExistingTableStack(const TParserCallStack& stack) {
+        return !Contains({RULE(Create_table_stmt),
+                          RULE(Simple_table_ref)}, stack) &&
+               (Contains({RULE(Simple_table_ref),
+                          RULE(Simple_table_ref_core),
+                          RULE(Object_ref)}, stack) ||
+                Contains({RULE(Single_source),
+                          RULE(Table_ref),
+                          RULE(Table_key),
+                          RULE(Id_table_or_type)}, stack));
+    }
+
+    bool IsLikelyClusterStack(const TParserCallStack& stack) {
+        return Contains({RULE(Cluster_expr)}, stack);
+    }
+
     TMaybe<EStatementKind> StatementKindOf(const TParserCallStack& stack) {
         for (TRuleId rule : std::ranges::views::reverse(stack)) {
             if (rule == RULE(Process_core) || rule == RULE(Reduce_core) || rule == RULE(Select_core)) {
@@ -115,10 +121,7 @@ namespace NSQLComplete {
 
     std::unordered_set<TRuleId> GetC3PreferredRules() {
         std::unordered_set<TRuleId> preferredRules;
-        preferredRules.insert(std::begin(KeywordRules), std::end(KeywordRules));
-        preferredRules.insert(std::begin(PragmaNameRules), std::end(PragmaNameRules));
-        preferredRules.insert(std::begin(TypeNameRules), std::end(TypeNameRules));
-        preferredRules.insert(std::begin(FunctionNameRules), std::end(FunctionNameRules));
+        preferredRules.insert(std::begin(PreferredRules), std::end(PreferredRules));
         return preferredRules;
     }
 
diff --git a/yql/essentials/sql/v1/complete/syntax/parser_call_stack.h b/yql/essentials/sql/v1/complete/syntax/parser_call_stack.h
index d185b72d628..d44b824a05e 100644
--- a/yql/essentials/sql/v1/complete/syntax/parser_call_stack.h
+++ b/yql/essentials/sql/v1/complete/syntax/parser_call_stack.h
@@ -15,6 +15,12 @@ namespace NSQLComplete {
 
     bool IsLikelyHintStack(const TParserCallStack& stack);
 
+    bool IsLikelyObjectRefStack(const TParserCallStack& stack);
+
+    bool IsLikelyExistingTableStack(const TParserCallStack& stack);
+
+    bool IsLikelyClusterStack(const TParserCallStack& stack);
+
     TMaybe<EStatementKind> StatementKindOf(const TParserCallStack& stack);
 
     std::unordered_set<TRuleId> GetC3PreferredRules();
diff --git a/yql/essentials/sql/v1/complete/syntax/token.cpp b/yql/essentials/sql/v1/complete/syntax/token.cpp
deleted file mode 100644
index b8aee3211c6..00000000000
--- a/yql/essentials/sql/v1/complete/syntax/token.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-#include "token.h"
-
-#include <yql/essentials/core/issue/yql_issue.h>
-#include <yql/essentials/sql/v1/lexer/lexer.h>
-
-namespace NSQLComplete {
-
-    bool GetStatement(NSQLTranslation::ILexer::TPtr& lexer, TCompletionInput input, TCompletionInput& output) {
-        TVector<TString> statements;
-        NYql::TIssues issues;
-        if (!NSQLTranslationV1::SplitQueryToStatements(
-                TString(input.Text) + ";", lexer,
-                statements, issues, /* file = */ "",
-                /* areBlankSkipped = */ false)) {
-            return false;
-        }
-
-        size_t cursor = 0;
-        for (const auto& statement : statements) {
-            if (input.CursorPosition < cursor + statement.size()) {
-                output = {
-                    .Text = input.Text.SubStr(cursor, statement.size()),
-                    .CursorPosition = input.CursorPosition - cursor,
-                };
-                return true;
-            }
-            cursor += statement.size();
-        }
-
-        output = input;
-        return true;
-    }
-
-    TCaretTokenPosition CaretTokenPosition(const TParsedTokenList& tokens, size_t cursorPosition) {
-        size_t cursor = 0;
-        for (size_t i = 0; i < tokens.size(); ++i) {
-            const auto& content = tokens[i].Content;
-            cursor += content.size();
-            if (cursorPosition < cursor) {
-                return {i, i};
-            } else if (cursorPosition == cursor && IsWordBoundary(content.back())) {
-                return {i, i + 1};
-            }
-        }
-        return {std::max(tokens.size(), static_cast<size_t>(1)) - 1, tokens.size()};
-    }
-
-    bool EndsWith(const TParsedTokenList& tokens, const TVector<TStringBuf>& pattern) {
-        if (tokens.size() < pattern.size()) {
-            return false;
-        }
-        for (yssize_t i = tokens.ysize() - 1, j = pattern.ysize() - 1; 0 <= j; --i, --j) {
-            if (!pattern[j].empty() && tokens[i].Name != pattern[j]) {
-                return false;
-            }
-        }
-        return true;
-    }
-
-} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/syntax/token.h b/yql/essentials/sql/v1/complete/syntax/token.h
deleted file mode 100644
index d1e215285a9..00000000000
--- a/yql/essentials/sql/v1/complete/syntax/token.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#pragma once
-
-#include <yql/essentials/sql/v1/complete/core/input.h>
-#include <yql/essentials/sql/v1/complete/text/word.h>
-
-#include <yql/essentials/parser/lexer_common/lexer.h>
-
-namespace NSQLComplete {
-
-    using NSQLTranslation::TParsedTokenList;
-
-    // `PrevTokenIndex` = `NextTokenIndex`, iff caret is enclosed
-    struct TCaretTokenPosition {
-        size_t PrevTokenIndex;
-        size_t NextTokenIndex;
-    };
-
-    bool GetStatement(NSQLTranslation::ILexer::TPtr& lexer, TCompletionInput input, TCompletionInput& output);
-
-    TCaretTokenPosition CaretTokenPosition(const TParsedTokenList& tokens, size_t cursorPosition);
-
-    bool EndsWith(const TParsedTokenList& tokens, const TVector<TStringBuf>& pattern);
-
-} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/syntax/ut/ya.make b/yql/essentials/sql/v1/complete/syntax/ut/ya.make
index e070185af9f..7e682c5bac0 100644
--- a/yql/essentials/sql/v1/complete/syntax/ut/ya.make
+++ b/yql/essentials/sql/v1/complete/syntax/ut/ya.make
@@ -2,6 +2,11 @@ UNITTEST_FOR(yql/essentials/sql/v1/complete/syntax)
 
 SRCS(
     grammar_ut.cpp
+    cursor_token_context_ut.cpp
+)
+
+PEERDIR(
+    yql/essentials/sql/v1/lexer/antlr4_pure
 )
 
 END()
diff --git a/yql/essentials/sql/v1/complete/syntax/ya.make b/yql/essentials/sql/v1/complete/syntax/ya.make
index 9e2e908454b..7f63e5b2374 100644
--- a/yql/essentials/sql/v1/complete/syntax/ya.make
+++ b/yql/essentials/sql/v1/complete/syntax/ya.make
@@ -2,11 +2,11 @@ LIBRARY()
 
 SRCS(
     ansi.cpp
+    cursor_token_context.cpp
     format.cpp
     grammar.cpp
     local.cpp
     parser_call_stack.cpp
-    token.cpp
 )
 
 ADDINCL(
@@ -21,6 +21,8 @@ PEERDIR(
     yql/essentials/sql/settings
     yql/essentials/sql/v1/lexer
     yql/essentials/sql/v1/reflect
+    yql/essentials/sql/v1/complete/core
+    yql/essentials/sql/v1/complete/text
 )
 
 END()
author	vityaman <vityaman.dev@yandex.ru>	2025-05-06 15:49:02 +0300
committer	robot-piglet <robot-piglet@yandex-team.com>	2025-05-06 16:04:08 +0300
commit	9c3fdca51d8ae892c5ad8f6ef92df73fafc09e28 (patch)
tree	561c599fae4ea29b537a6958b65e1b052548edf2 /yql/essentials/sql/v1/complete/syntax
parent	c131e959456f9f9a4adada5623ce3bae4097a8c1 (diff)
download	ydb-9c3fdca51d8ae892c5ad8f6ef92df73fafc09e28.tar.gz