aboutsummaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1/complete/syntax
diff options
context:
space:
mode:
authorvityaman <vityaman.dev@yandex.ru>2025-05-06 15:49:02 +0300
committerrobot-piglet <robot-piglet@yandex-team.com>2025-05-06 16:04:08 +0300
commit9c3fdca51d8ae892c5ad8f6ef92df73fafc09e28 (patch)
tree561c599fae4ea29b537a6958b65e1b052548edf2 /yql/essentials/sql/v1/complete/syntax
parentc131e959456f9f9a4adada5623ce3bae4097a8c1 (diff)
downloadydb-9c3fdca51d8ae892c5ad8f6ef92df73fafc09e28.tar.gz
YQL-19747 Complete folder, table and cluster names
--- - Related to `YQL-19747` - On top of https://github.com/ytsaurus/ytsaurus/pull/1253 - Related to https://github.com/ydb-platform/ydb/issues/9056 - Related to https://github.com/vityaman/ydb/issues/14 - Related to https://github.com/vityaman/ydb/issues/35 - Related to https://github.com/vityaman/ydb/issues/40 --- Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1257 commit_hash:0b842abb27184c88b8177beeea29fb1ea86b7a04
Diffstat (limited to 'yql/essentials/sql/v1/complete/syntax')
-rw-r--r--yql/essentials/sql/v1/complete/syntax/cursor_token_context.cpp160
-rw-r--r--yql/essentials/sql/v1/complete/syntax/cursor_token_context.h50
-rw-r--r--yql/essentials/sql/v1/complete/syntax/cursor_token_context_ut.cpp50
-rw-r--r--yql/essentials/sql/v1/complete/syntax/format.cpp13
-rw-r--r--yql/essentials/sql/v1/complete/syntax/format.h2
-rw-r--r--yql/essentials/sql/v1/complete/syntax/grammar.cpp26
-rw-r--r--yql/essentials/sql/v1/complete/syntax/local.cpp206
-rw-r--r--yql/essentials/sql/v1/complete/syntax/local.h24
-rw-r--r--yql/essentials/sql/v1/complete/syntax/parser_call_stack.cpp47
-rw-r--r--yql/essentials/sql/v1/complete/syntax/parser_call_stack.h6
-rw-r--r--yql/essentials/sql/v1/complete/syntax/token.cpp60
-rw-r--r--yql/essentials/sql/v1/complete/syntax/token.h24
-rw-r--r--yql/essentials/sql/v1/complete/syntax/ut/ya.make5
-rw-r--r--yql/essentials/sql/v1/complete/syntax/ya.make4
14 files changed, 489 insertions, 188 deletions
diff --git a/yql/essentials/sql/v1/complete/syntax/cursor_token_context.cpp b/yql/essentials/sql/v1/complete/syntax/cursor_token_context.cpp
new file mode 100644
index 00000000000..33aef36847a
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/syntax/cursor_token_context.cpp
@@ -0,0 +1,160 @@
+#include "cursor_token_context.h"
+
+#include <yql/essentials/core/issue/yql_issue.h>
+#include <yql/essentials/sql/v1/lexer/lexer.h>
+
+namespace NSQLComplete {
+
+ namespace {
+
+ bool Tokenize(ILexer::TPtr& lexer, TCompletionInput input, TParsedTokenList& tokens) {
+ NYql::TIssues issues;
+ if (!NSQLTranslation::Tokenize(
+ *lexer, TString(input.Text), /* queryName = */ "",
+ tokens, issues, /* maxErrors = */ 1)) {
+ return false;
+ }
+ return true;
+ }
+
+ TCursor GetCursor(const TParsedTokenList& tokens, size_t cursorPosition) {
+ size_t current = 0;
+ for (size_t i = 0; i < tokens.size() && current < cursorPosition; ++i) {
+ const auto& content = tokens[i].Content;
+
+ current += content.size();
+ if (current < cursorPosition) {
+ continue;
+ }
+
+ TCursor cursor = {
+ .PrevTokenIndex = i,
+ .NextTokenIndex = i,
+ .Position = cursorPosition,
+ };
+
+ if (current == cursorPosition) {
+ cursor.NextTokenIndex += 1;
+ }
+
+ return cursor;
+ }
+
+ return {
+ .PrevTokenIndex = Nothing(),
+ .NextTokenIndex = 0,
+ .Position = cursorPosition,
+ };
+ }
+
+ TVector<size_t> GetTokenPositions(const TParsedTokenList& tokens) {
+ TVector<size_t> positions;
+ positions.reserve(tokens.size());
+ size_t pos = 0;
+ for (const auto& token : tokens) {
+ positions.emplace_back(pos);
+ pos += token.Content.size();
+ }
+ return positions;
+ }
+
+ } // namespace
+
+ bool TRichParsedToken::IsLiteral() const {
+ return Base->Name == "STRING_VALUE" ||
+ Base->Name == "DIGIGTS" ||
+ Base->Name == "INTEGER_VALUE" ||
+ Base->Name == "REAL";
+ }
+
+ TRichParsedToken TokenAt(const TCursorTokenContext& context, size_t index) {
+ return {
+ .Base = &context.Tokens.at(index),
+ .Index = index,
+ .Position = context.TokenPositions.at(index),
+ };
+ }
+
+ TMaybe<TRichParsedToken> TCursorTokenContext::Enclosing() const {
+ if (Tokens.size() == 1) {
+ Y_ENSURE(Tokens[0].Name == "EOF");
+ return Nothing();
+ }
+
+ if (Cursor.PrevTokenIndex.Empty()) {
+ return Nothing();
+ }
+
+ auto token = TokenAt(*this, *Cursor.PrevTokenIndex);
+ if (Cursor.PrevTokenIndex == Cursor.NextTokenIndex ||
+ !IsWordBoundary(token.Base->Content.back())) {
+ return token;
+ }
+
+ return Nothing();
+ }
+
+ TMaybe<TRichParsedToken> TCursorTokenContext::MatchCursorPrefix(const TVector<TStringBuf>& pattern) const {
+ const auto prefix = std::span{Tokens.begin(), Cursor.NextTokenIndex};
+ if (prefix.size() < pattern.size()) {
+ return Nothing();
+ }
+
+ ssize_t i = static_cast<ssize_t>(prefix.size()) - 1;
+ ssize_t j = static_cast<ssize_t>(pattern.size()) - 1;
+ for (; 0 <= j; --i, --j) {
+ if (!pattern[j].empty() && prefix[i].Name != pattern[j]) {
+ return Nothing();
+ }
+ }
+ return TokenAt(*this, prefix.size() - pattern.size());
+ }
+
+ bool GetStatement(
+ ILexer::TPtr& lexer,
+ TCompletionInput input,
+ TCompletionInput& output,
+ size_t& output_position) {
+ TVector<TString> statements;
+ NYql::TIssues issues;
+ if (!NSQLTranslationV1::SplitQueryToStatements(
+ TString(input.Text) + ";", lexer,
+ statements, issues, /* file = */ "",
+ /* areBlankSkipped = */ false)) {
+ return false;
+ }
+
+ size_t& cursor = output_position;
+ cursor = 0;
+ for (const auto& statement : statements) {
+ if (input.CursorPosition < cursor + statement.size()) {
+ output = {
+ .Text = input.Text.SubStr(cursor, statement.size()),
+ .CursorPosition = input.CursorPosition - cursor,
+ };
+ return true;
+ }
+ cursor += statement.size();
+ }
+
+ output = input;
+ return true;
+ }
+
+ bool GetCursorTokenContext(ILexer::TPtr& lexer, TCompletionInput input, TCursorTokenContext& context) {
+ TParsedTokenList tokens;
+ if (!Tokenize(lexer, input, tokens)) {
+ return false;
+ }
+
+ TVector<size_t> positions = GetTokenPositions(tokens);
+ TCursor cursor = GetCursor(tokens, input.CursorPosition);
+ context = {
+ .Tokens = std::move(tokens),
+ .TokenPositions = std::move(positions),
+ .Cursor = cursor,
+ };
+ return true;
+ }
+
+} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/syntax/cursor_token_context.h b/yql/essentials/sql/v1/complete/syntax/cursor_token_context.h
new file mode 100644
index 00000000000..35d22231e35
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/syntax/cursor_token_context.h
@@ -0,0 +1,50 @@
+#pragma once
+
+#include <yql/essentials/sql/v1/complete/core/input.h>
+#include <yql/essentials/sql/v1/complete/text/word.h>
+
+#include <yql/essentials/parser/lexer_common/lexer.h>
+
+#include <util/generic/maybe.h>
+
+namespace NSQLComplete {
+
+ using NSQLTranslation::ILexer;
+ using NSQLTranslation::TParsedToken;
+ using NSQLTranslation::TParsedTokenList;
+
+ struct TCursor {
+ TMaybe<size_t> PrevTokenIndex = Nothing();
+ size_t NextTokenIndex = PrevTokenIndex ? *PrevTokenIndex : 0;
+ size_t Position = 0;
+ };
+
+ struct TRichParsedToken {
+ const TParsedToken* Base = nullptr;
+ size_t Index = 0;
+ size_t Position = 0;
+
+ bool IsLiteral() const;
+ };
+
+ struct TCursorTokenContext {
+ TParsedTokenList Tokens;
+ TVector<size_t> TokenPositions;
+ TCursor Cursor;
+
+ TMaybe<TRichParsedToken> Enclosing() const;
+ TMaybe<TRichParsedToken> MatchCursorPrefix(const TVector<TStringBuf>& pattern) const;
+ };
+
+ bool GetStatement(
+ ILexer::TPtr& lexer,
+ TCompletionInput input,
+ TCompletionInput& output,
+ size_t& output_position);
+
+ bool GetCursorTokenContext(
+ ILexer::TPtr& lexer,
+ TCompletionInput input,
+ TCursorTokenContext& context);
+
+} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/syntax/cursor_token_context_ut.cpp b/yql/essentials/sql/v1/complete/syntax/cursor_token_context_ut.cpp
new file mode 100644
index 00000000000..0e275cca3b8
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/syntax/cursor_token_context_ut.cpp
@@ -0,0 +1,50 @@
+#include "cursor_token_context.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h>
+#include <yql/essentials/sql/v1/lexer/lexer.h>
+
+using namespace NSQLComplete;
+
+Y_UNIT_TEST_SUITE(CursorTokenContextTests) {
+
+ NSQLTranslation::ILexer::TPtr MakeLexer() {
+ NSQLTranslationV1::TLexers lexers;
+ lexers.Antlr4Pure = NSQLTranslationV1::MakeAntlr4PureLexerFactory();
+ return NSQLTranslationV1::MakeLexer(
+ lexers, /* ansi = */ false, /* antlr4 = */ true,
+ NSQLTranslationV1::ELexerFlavor::Pure);
+ }
+
+ TCursorTokenContext Context(TString input) {
+ auto lexer = MakeLexer();
+ TCursorTokenContext context;
+ UNIT_ASSERT(GetCursorTokenContext(lexer, SharpedInput(input), context));
+ return context;
+ }
+
+ Y_UNIT_TEST(Empty) {
+ auto context = Context("");
+ UNIT_ASSERT(context.Cursor.PrevTokenIndex.Empty());
+ UNIT_ASSERT_VALUES_EQUAL(context.Cursor.NextTokenIndex, 0);
+ UNIT_ASSERT_VALUES_EQUAL(context.Cursor.Position, 0);
+ UNIT_ASSERT(context.Enclosing().Empty());
+ }
+
+ Y_UNIT_TEST(Blank) {
+ UNIT_ASSERT(Context("# ").Enclosing().Empty());
+ UNIT_ASSERT(Context(" #").Enclosing().Empty());
+ UNIT_ASSERT(Context(" # ").Enclosing().Empty());
+ }
+
+ Y_UNIT_TEST(Enclosing) {
+ UNIT_ASSERT(Context("se#").Enclosing().Defined());
+ UNIT_ASSERT(Context("#se").Enclosing().Empty());
+ UNIT_ASSERT(Context("`se`#").Enclosing().Empty());
+ UNIT_ASSERT(Context("#`se`").Enclosing().Empty());
+ UNIT_ASSERT(Context("`se`#`se`").Enclosing().Defined());
+ UNIT_ASSERT(Context("\"se\"#\"se\"").Enclosing().Empty());
+ }
+
+} // Y_UNIT_TEST_SUITE(CursorTokenContextTests)
diff --git a/yql/essentials/sql/v1/complete/syntax/format.cpp b/yql/essentials/sql/v1/complete/syntax/format.cpp
index 1c9f146c923..43c36aea9dd 100644
--- a/yql/essentials/sql/v1/complete/syntax/format.cpp
+++ b/yql/essentials/sql/v1/complete/syntax/format.cpp
@@ -35,4 +35,17 @@ namespace NSQLComplete {
return text;
}
+ TString Quoted(TString content) {
+ content.prepend('`');
+ content.append('`');
+ return content;
+ }
+
+ TString Unquoted(TString content) {
+ Y_ENSURE(2 <= content.size() && content.front() == '`' && content.back() == '`');
+ content.erase(0, 1);
+ content.pop_back();
+ return content;
+ }
+
} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/syntax/format.h b/yql/essentials/sql/v1/complete/syntax/format.h
index 6c2f1b72ac2..58e5d1f1e4a 100644
--- a/yql/essentials/sql/v1/complete/syntax/format.h
+++ b/yql/essentials/sql/v1/complete/syntax/format.h
@@ -6,5 +6,7 @@
namespace NSQLComplete {
TString FormatKeywords(const TVector<TString>& seq);
+ TString Quoted(TString content);
+ TString Unquoted(TString content);
} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/syntax/grammar.cpp b/yql/essentials/sql/v1/complete/syntax/grammar.cpp
index 252deaf682c..c080fae5ae4 100644
--- a/yql/essentials/sql/v1/complete/syntax/grammar.cpp
+++ b/yql/essentials/sql/v1/complete/syntax/grammar.cpp
@@ -7,31 +7,31 @@ namespace NSQLComplete {
class TSqlGrammar: public ISqlGrammar {
public:
TSqlGrammar(const NSQLReflect::TLexerGrammar& grammar)
- : Parser(MakeDummyParser())
- , AllTokens(ComputeAllTokens())
- , KeywordTokens(ComputeKeywordTokens(grammar))
- , PunctuationTokens(ComputePunctuationTokens(grammar))
+ : Parser_(MakeDummyParser())
+ , AllTokens_(ComputeAllTokens())
+ , KeywordTokens_(ComputeKeywordTokens(grammar))
+ , PunctuationTokens_(ComputePunctuationTokens(grammar))
{
}
const antlr4::dfa::Vocabulary& GetVocabulary() const override {
- return Parser->getVocabulary();
+ return Parser_->getVocabulary();
}
const std::unordered_set<TTokenId>& GetAllTokens() const override {
- return AllTokens;
+ return AllTokens_;
}
const std::unordered_set<TTokenId>& GetKeywordTokens() const override {
- return KeywordTokens;
+ return KeywordTokens_;
}
const std::unordered_set<TTokenId>& GetPunctuationTokens() const override {
- return PunctuationTokens;
+ return PunctuationTokens_;
}
const std::string& SymbolizedRule(TRuleId rule) const override {
- return Parser->getRuleNames().at(rule);
+ return Parser_->getRuleNames().at(rule);
}
private:
@@ -76,10 +76,10 @@ namespace NSQLComplete {
return punctuationTokens;
}
- const THolder<antlr4::Parser> Parser;
- const std::unordered_set<TTokenId> AllTokens;
- const std::unordered_set<TTokenId> KeywordTokens;
- const std::unordered_set<TTokenId> PunctuationTokens;
+ const THolder<antlr4::Parser> Parser_;
+ const std::unordered_set<TTokenId> AllTokens_;
+ const std::unordered_set<TTokenId> KeywordTokens_;
+ const std::unordered_set<TTokenId> PunctuationTokens_;
};
const ISqlGrammar& GetSqlGrammar() {
diff --git a/yql/essentials/sql/v1/complete/syntax/local.cpp b/yql/essentials/sql/v1/complete/syntax/local.cpp
index c434fa28daf..549208d4cab 100644
--- a/yql/essentials/sql/v1/complete/syntax/local.cpp
+++ b/yql/essentials/sql/v1/complete/syntax/local.cpp
@@ -1,9 +1,10 @@
#include "local.h"
#include "ansi.h"
+#include "cursor_token_context.h"
+#include "format.h"
#include "grammar.h"
#include "parser_call_stack.h"
-#include "token.h"
#include <yql/essentials/sql/v1/complete/antlr4/c3i.h>
#include <yql/essentials/sql/v1/complete/antlr4/c3t.h>
@@ -49,65 +50,77 @@ namespace NSQLComplete {
public:
explicit TSpecializedLocalSyntaxAnalysis(TLexerSupplier lexer)
- : Grammar(&GetSqlGrammar())
+ : Grammar_(&GetSqlGrammar())
, Lexer_(lexer(/* ansi = */ IsAnsiLexer))
- , C3(ComputeC3Config())
+ , C3_(ComputeC3Config())
{
}
TLocalSyntaxContext Analyze(TCompletionInput input) override {
TCompletionInput statement;
- if (!GetStatement(Lexer_, input, statement)) {
+ size_t statement_position;
+ if (!GetStatement(Lexer_, input, statement, statement_position)) {
return {};
}
- auto candidates = C3.Complete(statement);
-
- TParsedTokenList tokens;
- TCaretTokenPosition caret;
- if (!TokenizePrefix(statement, tokens, caret)) {
+ TCursorTokenContext context;
+ if (!GetCursorTokenContext(Lexer_, statement, context)) {
return {};
}
- if (IsCaretEnslosed(tokens, caret)) {
- return {};
+ TC3Candidates candidates = C3_.Complete(statement);
+
+ TLocalSyntaxContext result;
+
+ result.EditRange = EditRange(context);
+ result.EditRange.Begin += statement_position;
+
+ if (auto enclosing = context.Enclosing()) {
+ if (enclosing->IsLiteral()) {
+ return result;
+ } else if (enclosing->Base->Name == "ID_QUOTED") {
+ result.Object = ObjectMatch(context, candidates);
+ return result;
+ }
}
- return {
- .Keywords = SiftedKeywords(candidates),
- .Pragma = PragmaMatch(tokens, candidates),
- .IsTypeName = IsTypeNameMatched(candidates),
- .Function = FunctionMatch(tokens, candidates),
- .Hint = HintMatch(candidates),
- };
+ result.Keywords = SiftedKeywords(candidates);
+ result.Pragma = PragmaMatch(context, candidates);
+ result.Type = TypeMatch(candidates);
+ result.Function = FunctionMatch(context, candidates);
+ result.Hint = HintMatch(candidates);
+ result.Object = ObjectMatch(context, candidates);
+ result.Cluster = ClusterMatch(context, candidates);
+
+ return result;
}
private:
- IC3Engine::TConfig ComputeC3Config() {
+ IC3Engine::TConfig ComputeC3Config() const {
return {
.IgnoredTokens = ComputeIgnoredTokens(),
.PreferredRules = ComputePreferredRules(),
};
}
- std::unordered_set<TTokenId> ComputeIgnoredTokens() {
- auto ignoredTokens = Grammar->GetAllTokens();
- for (auto keywordToken : Grammar->GetKeywordTokens()) {
+ std::unordered_set<TTokenId> ComputeIgnoredTokens() const {
+ auto ignoredTokens = Grammar_->GetAllTokens();
+ for (auto keywordToken : Grammar_->GetKeywordTokens()) {
ignoredTokens.erase(keywordToken);
}
- for (auto punctuationToken : Grammar->GetPunctuationTokens()) {
+ for (auto punctuationToken : Grammar_->GetPunctuationTokens()) {
ignoredTokens.erase(punctuationToken);
}
return ignoredTokens;
}
- std::unordered_set<TRuleId> ComputePreferredRules() {
+ std::unordered_set<TRuleId> ComputePreferredRules() const {
return GetC3PreferredRules();
}
- TLocalSyntaxContext::TKeywords SiftedKeywords(const TC3Candidates& candidates) {
- const auto& vocabulary = Grammar->GetVocabulary();
- const auto& keywordTokens = Grammar->GetKeywordTokens();
+ TLocalSyntaxContext::TKeywords SiftedKeywords(const TC3Candidates& candidates) const {
+ const auto& vocabulary = Grammar_->GetVocabulary();
+ const auto& keywordTokens = Grammar_->GetKeywordTokens();
TLocalSyntaxContext::TKeywords keywords;
for (const auto& token : candidates.Tokens) {
@@ -122,40 +135,41 @@ namespace NSQLComplete {
}
TMaybe<TLocalSyntaxContext::TPragma> PragmaMatch(
- const TParsedTokenList& tokens, const TC3Candidates& candidates) {
+ const TCursorTokenContext& context, const TC3Candidates& candidates) const {
if (!AnyOf(candidates.Rules, RuleAdapted(IsLikelyPragmaStack))) {
return Nothing();
}
TLocalSyntaxContext::TPragma pragma;
- if (EndsWith(tokens, {"ID_PLAIN", "DOT"})) {
- pragma.Namespace = tokens[tokens.size() - 2].Content;
- } else if (EndsWith(tokens, {"ID_PLAIN", "DOT", ""})) {
- pragma.Namespace = tokens[tokens.size() - 3].Content;
+
+ if (TMaybe<TRichParsedToken> begin;
+ (begin = context.MatchCursorPrefix({"ID_PLAIN", "DOT"})) ||
+ (begin = context.MatchCursorPrefix({"ID_PLAIN", "DOT", ""}))) {
+ pragma.Namespace = begin->Base->Content;
}
return pragma;
}
- bool IsTypeNameMatched(const TC3Candidates& candidates) {
+ bool TypeMatch(const TC3Candidates& candidates) const {
return AnyOf(candidates.Rules, RuleAdapted(IsLikelyTypeStack));
}
TMaybe<TLocalSyntaxContext::TFunction> FunctionMatch(
- const TParsedTokenList& tokens, const TC3Candidates& candidates) {
+ const TCursorTokenContext& context, const TC3Candidates& candidates) const {
if (!AnyOf(candidates.Rules, RuleAdapted(IsLikelyFunctionStack))) {
return Nothing();
}
TLocalSyntaxContext::TFunction function;
- if (EndsWith(tokens, {"ID_PLAIN", "NAMESPACE"})) {
- function.Namespace = tokens[tokens.size() - 2].Content;
- } else if (EndsWith(tokens, {"ID_PLAIN", "NAMESPACE", ""})) {
- function.Namespace = tokens[tokens.size() - 3].Content;
+ if (TMaybe<TRichParsedToken> begin;
+ (begin = context.MatchCursorPrefix({"ID_PLAIN", "NAMESPACE"})) ||
+ (begin = context.MatchCursorPrefix({"ID_PLAIN", "NAMESPACE", ""}))) {
+ function.Namespace = begin->Base->Content;
}
return function;
}
- TMaybe<TLocalSyntaxContext::THint> HintMatch(const TC3Candidates& candidates) {
+ TMaybe<TLocalSyntaxContext::THint> HintMatch(const TC3Candidates& candidates) const {
// TODO(YQL-19747): detect local contexts with a single iteration through the candidates.Rules
auto rule = FindIf(candidates.Rules, RuleAdapted(IsLikelyHintStack));
if (rule == std::end(candidates.Rules)) {
@@ -172,45 +186,103 @@ namespace NSQLComplete {
};
}
- bool TokenizePrefix(TCompletionInput input, TParsedTokenList& tokens, TCaretTokenPosition& caret) {
- NYql::TIssues issues;
- if (!NSQLTranslation::Tokenize(
- *Lexer_, TString(input.Text), /* queryName = */ "",
- tokens, issues, /* maxErrors = */ 1)) {
- return false;
+ TMaybe<TLocalSyntaxContext::TObject> ObjectMatch(
+ const TCursorTokenContext& context, const TC3Candidates& candidates) const {
+ TLocalSyntaxContext::TObject object;
+
+ if (AnyOf(candidates.Rules, RuleAdapted(IsLikelyObjectRefStack))) {
+ object.Kinds.emplace(EObjectKind::Folder);
+ }
+
+ if (AnyOf(candidates.Rules, RuleAdapted(IsLikelyExistingTableStack))) {
+ object.Kinds.emplace(EObjectKind::Folder);
+ object.Kinds.emplace(EObjectKind::Table);
+ }
+
+ if (object.Kinds.empty()) {
+ return Nothing();
}
- Y_ENSURE(!tokens.empty() && tokens.back().Name == "EOF");
- tokens.pop_back();
+ if (TMaybe<TRichParsedToken> begin;
+ (begin = context.MatchCursorPrefix({"ID_PLAIN", "DOT"})) ||
+ (begin = context.MatchCursorPrefix({"ID_PLAIN", "DOT", ""}))) {
+ object.Cluster = begin->Base->Content;
+ }
- caret = CaretTokenPosition(tokens, input.CursorPosition);
- tokens.crop(caret.NextTokenIndex + 1);
- return true;
+ if (TMaybe<TRichParsedToken> begin;
+ (begin = context.MatchCursorPrefix({"ID_PLAIN", "COLON", "ID_PLAIN", "DOT"})) ||
+ (begin = context.MatchCursorPrefix({"ID_PLAIN", "COLON", "ID_PLAIN", "DOT", ""}))) {
+ object.Provider = begin->Base->Content;
+ }
+
+ if (auto path = ObjectPath(context)) {
+ object.Path = *path;
+ object.IsEnclosed = true;
+ }
+
+ return object;
+ }
+
+ TMaybe<TString> ObjectPath(const TCursorTokenContext& context) const {
+ if (auto enclosing = context.Enclosing()) {
+ TString path = enclosing->Base->Content;
+ if (enclosing->Base->Name == "ID_QUOTED") {
+ path = Unquoted(std::move(path));
+ }
+ path.resize(context.Cursor.Position - enclosing->Position - 1);
+ return path;
+ }
+ return Nothing();
}
- bool IsCaretEnslosed(const TParsedTokenList& tokens, TCaretTokenPosition caret) {
- if (tokens.empty() || caret.PrevTokenIndex != caret.NextTokenIndex) {
- return false;
+ TMaybe<TLocalSyntaxContext::TCluster> ClusterMatch(
+ const TCursorTokenContext& context, const TC3Candidates& candidates) const {
+ if (!AnyOf(candidates.Rules, RuleAdapted(IsLikelyClusterStack))) {
+ return Nothing();
}
- const auto& token = tokens.back();
- return token.Name == "STRING_VALUE" ||
- token.Name == "ID_QUOTED" ||
- token.Name == "DIGIGTS" ||
- token.Name == "INTEGER_VALUE" ||
- token.Name == "REAL";
+ TLocalSyntaxContext::TCluster cluster;
+ if (TMaybe<TRichParsedToken> begin;
+ (begin = context.MatchCursorPrefix({"ID_PLAIN", "COLON"})) ||
+ (begin = context.MatchCursorPrefix({"ID_PLAIN", "COLON", ""}))) {
+ cluster.Provider = begin->Base->Content;
+ }
+ return cluster;
+ }
+
+ TEditRange EditRange(const TCursorTokenContext& context) const {
+ if (auto enclosing = context.Enclosing()) {
+ return EditRange(*enclosing, context.Cursor);
+ }
+
+ return {
+ .Begin = context.Cursor.Position,
+ .Length = 0,
+ };
+ }
+
+ TEditRange EditRange(const TRichParsedToken& token, const TCursor& cursor) const {
+ size_t begin = token.Position;
+ if (token.Base->Name == "NOT_EQUALS2") {
+ begin += 1;
+ }
+
+ return {
+ .Begin = begin,
+ .Length = cursor.Position - begin,
+ };
}
- const ISqlGrammar* Grammar;
+ const ISqlGrammar* Grammar_;
NSQLTranslation::ILexer::TPtr Lexer_;
- TC3Engine<G> C3;
+ TC3Engine<G> C3_;
};
class TLocalSyntaxAnalysis: public ILocalSyntaxAnalysis {
public:
explicit TLocalSyntaxAnalysis(TLexerSupplier lexer)
- : DefaultEngine(lexer)
- , AnsiEngine(lexer)
+ : DefaultEngine_(lexer)
+ , AnsiEngine_(lexer)
{
}
@@ -223,13 +295,13 @@ namespace NSQLComplete {
private:
ILocalSyntaxAnalysis& GetSpecializedEngine(bool isAnsiLexer) {
if (isAnsiLexer) {
- return AnsiEngine;
+ return AnsiEngine_;
}
- return DefaultEngine;
+ return DefaultEngine_;
}
- TSpecializedLocalSyntaxAnalysis</* IsAnsiLexer = */ false> DefaultEngine;
- TSpecializedLocalSyntaxAnalysis</* IsAnsiLexer = */ true> AnsiEngine;
+ TSpecializedLocalSyntaxAnalysis</* IsAnsiLexer = */ false> DefaultEngine_;
+ TSpecializedLocalSyntaxAnalysis</* IsAnsiLexer = */ true> AnsiEngine_;
};
ILocalSyntaxAnalysis::TPtr MakeLocalSyntaxAnalysis(TLexerSupplier lexer) {
diff --git a/yql/essentials/sql/v1/complete/syntax/local.h b/yql/essentials/sql/v1/complete/syntax/local.h
index d58b62c62cd..8f88d5aa71c 100644
--- a/yql/essentials/sql/v1/complete/syntax/local.h
+++ b/yql/essentials/sql/v1/complete/syntax/local.h
@@ -1,15 +1,22 @@
#pragma once
+#include <yql/essentials/sql/v1/complete/core/name.h>
#include <yql/essentials/sql/v1/complete/sql_complete.h>
#include <yql/essentials/sql/v1/lexer/lexer.h>
#include <util/generic/string.h>
#include <util/generic/hash.h>
+#include <util/generic/hash_set.h>
#include <util/generic/maybe.h>
namespace NSQLComplete {
+ struct TEditRange {
+ size_t Begin = 0;
+ size_t Length = 0;
+ };
+
struct TLocalSyntaxContext {
using TKeywords = THashMap<TString, TVector<TString>>;
@@ -25,11 +32,26 @@ namespace NSQLComplete {
EStatementKind StatementKind;
};
+ struct TCluster {
+ TString Provider;
+ };
+
+ struct TObject {
+ TString Provider;
+ TString Cluster;
+ TString Path;
+ THashSet<EObjectKind> Kinds;
+ bool IsEnclosed = false;
+ };
+
TKeywords Keywords;
TMaybe<TPragma> Pragma;
- bool IsTypeName = false;
+ bool Type = false;
TMaybe<TFunction> Function;
TMaybe<THint> Hint;
+ TMaybe<TObject> Object;
+ TMaybe<TCluster> Cluster;
+ TEditRange EditRange;
};
class ILocalSyntaxAnalysis {
diff --git a/yql/essentials/sql/v1/complete/syntax/parser_call_stack.cpp b/yql/essentials/sql/v1/complete/syntax/parser_call_stack.cpp
index 938483438b1..ce6c94306d4 100644
--- a/yql/essentials/sql/v1/complete/syntax/parser_call_stack.cpp
+++ b/yql/essentials/sql/v1/complete/syntax/parser_call_stack.cpp
@@ -13,7 +13,7 @@
namespace NSQLComplete {
- const TVector<TRuleId> KeywordRules = {
+ const TVector<TRuleId> PreferredRules = {
RULE(Keyword),
RULE(Keyword_expr_uncompat),
RULE(Keyword_table_uncompat),
@@ -24,27 +24,13 @@ namespace NSQLComplete {
RULE(Keyword_hint_uncompat),
RULE(Keyword_as_compat),
RULE(Keyword_compat),
- };
-
- const TVector<TRuleId> PragmaNameRules = {
- RULE(Opt_id_prefix_or_type),
- RULE(An_id),
- };
-
- const TVector<TRuleId> TypeNameRules = {
- RULE(Type_name_simple),
RULE(An_id_or_type),
- };
-
- const TVector<TRuleId> FunctionNameRules = {
+ RULE(An_id),
RULE(Id_expr),
- RULE(An_id_or_type),
RULE(Id_or_type),
- };
-
- const TVector<TRuleId> HintNameRules = {
RULE(Id_hint),
- RULE(An_id),
+ RULE(Opt_id_prefix_or_type),
+ RULE(Type_name_simple),
};
TVector<std::string> Symbolized(const TParserCallStack& stack) {
@@ -101,6 +87,26 @@ namespace NSQLComplete {
Contains({RULE(External_call_param), RULE(An_id)}, stack);
}
+ bool IsLikelyObjectRefStack(const TParserCallStack& stack) {
+ return Contains({RULE(Object_ref)}, stack);
+ }
+
+ bool IsLikelyExistingTableStack(const TParserCallStack& stack) {
+ return !Contains({RULE(Create_table_stmt),
+ RULE(Simple_table_ref)}, stack) &&
+ (Contains({RULE(Simple_table_ref),
+ RULE(Simple_table_ref_core),
+ RULE(Object_ref)}, stack) ||
+ Contains({RULE(Single_source),
+ RULE(Table_ref),
+ RULE(Table_key),
+ RULE(Id_table_or_type)}, stack));
+ }
+
+ bool IsLikelyClusterStack(const TParserCallStack& stack) {
+ return Contains({RULE(Cluster_expr)}, stack);
+ }
+
TMaybe<EStatementKind> StatementKindOf(const TParserCallStack& stack) {
for (TRuleId rule : std::ranges::views::reverse(stack)) {
if (rule == RULE(Process_core) || rule == RULE(Reduce_core) || rule == RULE(Select_core)) {
@@ -115,10 +121,7 @@ namespace NSQLComplete {
std::unordered_set<TRuleId> GetC3PreferredRules() {
std::unordered_set<TRuleId> preferredRules;
- preferredRules.insert(std::begin(KeywordRules), std::end(KeywordRules));
- preferredRules.insert(std::begin(PragmaNameRules), std::end(PragmaNameRules));
- preferredRules.insert(std::begin(TypeNameRules), std::end(TypeNameRules));
- preferredRules.insert(std::begin(FunctionNameRules), std::end(FunctionNameRules));
+ preferredRules.insert(std::begin(PreferredRules), std::end(PreferredRules));
return preferredRules;
}
diff --git a/yql/essentials/sql/v1/complete/syntax/parser_call_stack.h b/yql/essentials/sql/v1/complete/syntax/parser_call_stack.h
index d185b72d628..d44b824a05e 100644
--- a/yql/essentials/sql/v1/complete/syntax/parser_call_stack.h
+++ b/yql/essentials/sql/v1/complete/syntax/parser_call_stack.h
@@ -15,6 +15,12 @@ namespace NSQLComplete {
bool IsLikelyHintStack(const TParserCallStack& stack);
+ bool IsLikelyObjectRefStack(const TParserCallStack& stack);
+
+ bool IsLikelyExistingTableStack(const TParserCallStack& stack);
+
+ bool IsLikelyClusterStack(const TParserCallStack& stack);
+
TMaybe<EStatementKind> StatementKindOf(const TParserCallStack& stack);
std::unordered_set<TRuleId> GetC3PreferredRules();
diff --git a/yql/essentials/sql/v1/complete/syntax/token.cpp b/yql/essentials/sql/v1/complete/syntax/token.cpp
deleted file mode 100644
index b8aee3211c6..00000000000
--- a/yql/essentials/sql/v1/complete/syntax/token.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-#include "token.h"
-
-#include <yql/essentials/core/issue/yql_issue.h>
-#include <yql/essentials/sql/v1/lexer/lexer.h>
-
-namespace NSQLComplete {
-
- bool GetStatement(NSQLTranslation::ILexer::TPtr& lexer, TCompletionInput input, TCompletionInput& output) {
- TVector<TString> statements;
- NYql::TIssues issues;
- if (!NSQLTranslationV1::SplitQueryToStatements(
- TString(input.Text) + ";", lexer,
- statements, issues, /* file = */ "",
- /* areBlankSkipped = */ false)) {
- return false;
- }
-
- size_t cursor = 0;
- for (const auto& statement : statements) {
- if (input.CursorPosition < cursor + statement.size()) {
- output = {
- .Text = input.Text.SubStr(cursor, statement.size()),
- .CursorPosition = input.CursorPosition - cursor,
- };
- return true;
- }
- cursor += statement.size();
- }
-
- output = input;
- return true;
- }
-
- TCaretTokenPosition CaretTokenPosition(const TParsedTokenList& tokens, size_t cursorPosition) {
- size_t cursor = 0;
- for (size_t i = 0; i < tokens.size(); ++i) {
- const auto& content = tokens[i].Content;
- cursor += content.size();
- if (cursorPosition < cursor) {
- return {i, i};
- } else if (cursorPosition == cursor && IsWordBoundary(content.back())) {
- return {i, i + 1};
- }
- }
- return {std::max(tokens.size(), static_cast<size_t>(1)) - 1, tokens.size()};
- }
-
- bool EndsWith(const TParsedTokenList& tokens, const TVector<TStringBuf>& pattern) {
- if (tokens.size() < pattern.size()) {
- return false;
- }
- for (yssize_t i = tokens.ysize() - 1, j = pattern.ysize() - 1; 0 <= j; --i, --j) {
- if (!pattern[j].empty() && tokens[i].Name != pattern[j]) {
- return false;
- }
- }
- return true;
- }
-
-} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/syntax/token.h b/yql/essentials/sql/v1/complete/syntax/token.h
deleted file mode 100644
index d1e215285a9..00000000000
--- a/yql/essentials/sql/v1/complete/syntax/token.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#pragma once
-
-#include <yql/essentials/sql/v1/complete/core/input.h>
-#include <yql/essentials/sql/v1/complete/text/word.h>
-
-#include <yql/essentials/parser/lexer_common/lexer.h>
-
-namespace NSQLComplete {
-
- using NSQLTranslation::TParsedTokenList;
-
- // `PrevTokenIndex` = `NextTokenIndex`, iff caret is enclosed
- struct TCaretTokenPosition {
- size_t PrevTokenIndex;
- size_t NextTokenIndex;
- };
-
- bool GetStatement(NSQLTranslation::ILexer::TPtr& lexer, TCompletionInput input, TCompletionInput& output);
-
- TCaretTokenPosition CaretTokenPosition(const TParsedTokenList& tokens, size_t cursorPosition);
-
- bool EndsWith(const TParsedTokenList& tokens, const TVector<TStringBuf>& pattern);
-
-} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/syntax/ut/ya.make b/yql/essentials/sql/v1/complete/syntax/ut/ya.make
index e070185af9f..7e682c5bac0 100644
--- a/yql/essentials/sql/v1/complete/syntax/ut/ya.make
+++ b/yql/essentials/sql/v1/complete/syntax/ut/ya.make
@@ -2,6 +2,11 @@ UNITTEST_FOR(yql/essentials/sql/v1/complete/syntax)
SRCS(
grammar_ut.cpp
+ cursor_token_context_ut.cpp
+)
+
+PEERDIR(
+ yql/essentials/sql/v1/lexer/antlr4_pure
)
END()
diff --git a/yql/essentials/sql/v1/complete/syntax/ya.make b/yql/essentials/sql/v1/complete/syntax/ya.make
index 9e2e908454b..7f63e5b2374 100644
--- a/yql/essentials/sql/v1/complete/syntax/ya.make
+++ b/yql/essentials/sql/v1/complete/syntax/ya.make
@@ -2,11 +2,11 @@ LIBRARY()
SRCS(
ansi.cpp
+ cursor_token_context.cpp
format.cpp
grammar.cpp
local.cpp
parser_call_stack.cpp
- token.cpp
)
ADDINCL(
@@ -21,6 +21,8 @@ PEERDIR(
yql/essentials/sql/settings
yql/essentials/sql/v1/lexer
yql/essentials/sql/v1/reflect
+ yql/essentials/sql/v1/complete/core
+ yql/essentials/sql/v1/complete/text
)
END()