diff options
author | vityaman <vityaman.dev@yandex.ru> | 2025-05-06 15:49:02 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2025-05-06 16:04:08 +0300 |
commit | 9c3fdca51d8ae892c5ad8f6ef92df73fafc09e28 (patch) | |
tree | 561c599fae4ea29b537a6958b65e1b052548edf2 /yql/essentials/sql/v1/complete/syntax | |
parent | c131e959456f9f9a4adada5623ce3bae4097a8c1 (diff) | |
download | ydb-9c3fdca51d8ae892c5ad8f6ef92df73fafc09e28.tar.gz |
YQL-19747 Complete folder, table and cluster names
---
- Related to `YQL-19747`
- On top of https://github.com/ytsaurus/ytsaurus/pull/1253
- Related to https://github.com/ydb-platform/ydb/issues/9056
- Related to https://github.com/vityaman/ydb/issues/14
- Related to https://github.com/vityaman/ydb/issues/35
- Related to https://github.com/vityaman/ydb/issues/40
---
Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1257
commit_hash:0b842abb27184c88b8177beeea29fb1ea86b7a04
Diffstat (limited to 'yql/essentials/sql/v1/complete/syntax')
14 files changed, 489 insertions, 188 deletions
diff --git a/yql/essentials/sql/v1/complete/syntax/cursor_token_context.cpp b/yql/essentials/sql/v1/complete/syntax/cursor_token_context.cpp new file mode 100644 index 00000000000..33aef36847a --- /dev/null +++ b/yql/essentials/sql/v1/complete/syntax/cursor_token_context.cpp @@ -0,0 +1,160 @@ +#include "cursor_token_context.h" + +#include <yql/essentials/core/issue/yql_issue.h> +#include <yql/essentials/sql/v1/lexer/lexer.h> + +namespace NSQLComplete { + + namespace { + + bool Tokenize(ILexer::TPtr& lexer, TCompletionInput input, TParsedTokenList& tokens) { + NYql::TIssues issues; + if (!NSQLTranslation::Tokenize( + *lexer, TString(input.Text), /* queryName = */ "", + tokens, issues, /* maxErrors = */ 1)) { + return false; + } + return true; + } + + TCursor GetCursor(const TParsedTokenList& tokens, size_t cursorPosition) { + size_t current = 0; + for (size_t i = 0; i < tokens.size() && current < cursorPosition; ++i) { + const auto& content = tokens[i].Content; + + current += content.size(); + if (current < cursorPosition) { + continue; + } + + TCursor cursor = { + .PrevTokenIndex = i, + .NextTokenIndex = i, + .Position = cursorPosition, + }; + + if (current == cursorPosition) { + cursor.NextTokenIndex += 1; + } + + return cursor; + } + + return { + .PrevTokenIndex = Nothing(), + .NextTokenIndex = 0, + .Position = cursorPosition, + }; + } + + TVector<size_t> GetTokenPositions(const TParsedTokenList& tokens) { + TVector<size_t> positions; + positions.reserve(tokens.size()); + size_t pos = 0; + for (const auto& token : tokens) { + positions.emplace_back(pos); + pos += token.Content.size(); + } + return positions; + } + + } // namespace + + bool TRichParsedToken::IsLiteral() const { + return Base->Name == "STRING_VALUE" || + Base->Name == "DIGIGTS" || + Base->Name == "INTEGER_VALUE" || + Base->Name == "REAL"; + } + + TRichParsedToken TokenAt(const TCursorTokenContext& context, size_t index) { + return { + .Base = &context.Tokens.at(index), + .Index = index, + .Position = context.TokenPositions.at(index), + }; + } + + TMaybe<TRichParsedToken> TCursorTokenContext::Enclosing() const { + if (Tokens.size() == 1) { + Y_ENSURE(Tokens[0].Name == "EOF"); + return Nothing(); + } + + if (Cursor.PrevTokenIndex.Empty()) { + return Nothing(); + } + + auto token = TokenAt(*this, *Cursor.PrevTokenIndex); + if (Cursor.PrevTokenIndex == Cursor.NextTokenIndex || + !IsWordBoundary(token.Base->Content.back())) { + return token; + } + + return Nothing(); + } + + TMaybe<TRichParsedToken> TCursorTokenContext::MatchCursorPrefix(const TVector<TStringBuf>& pattern) const { + const auto prefix = std::span{Tokens.begin(), Cursor.NextTokenIndex}; + if (prefix.size() < pattern.size()) { + return Nothing(); + } + + ssize_t i = static_cast<ssize_t>(prefix.size()) - 1; + ssize_t j = static_cast<ssize_t>(pattern.size()) - 1; + for (; 0 <= j; --i, --j) { + if (!pattern[j].empty() && prefix[i].Name != pattern[j]) { + return Nothing(); + } + } + return TokenAt(*this, prefix.size() - pattern.size()); + } + + bool GetStatement( + ILexer::TPtr& lexer, + TCompletionInput input, + TCompletionInput& output, + size_t& output_position) { + TVector<TString> statements; + NYql::TIssues issues; + if (!NSQLTranslationV1::SplitQueryToStatements( + TString(input.Text) + ";", lexer, + statements, issues, /* file = */ "", + /* areBlankSkipped = */ false)) { + return false; + } + + size_t& cursor = output_position; + cursor = 0; + for (const auto& statement : statements) { + if (input.CursorPosition < cursor + statement.size()) { + output = { + .Text = input.Text.SubStr(cursor, statement.size()), + .CursorPosition = input.CursorPosition - cursor, + }; + return true; + } + cursor += statement.size(); + } + + output = input; + return true; + } + + bool GetCursorTokenContext(ILexer::TPtr& lexer, TCompletionInput input, TCursorTokenContext& context) { + TParsedTokenList tokens; + if (!Tokenize(lexer, input, tokens)) { + return false; + } + + TVector<size_t> positions = GetTokenPositions(tokens); + TCursor cursor = GetCursor(tokens, input.CursorPosition); + context = { + .Tokens = std::move(tokens), + .TokenPositions = std::move(positions), + .Cursor = cursor, + }; + return true; + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/syntax/cursor_token_context.h b/yql/essentials/sql/v1/complete/syntax/cursor_token_context.h new file mode 100644 index 00000000000..35d22231e35 --- /dev/null +++ b/yql/essentials/sql/v1/complete/syntax/cursor_token_context.h @@ -0,0 +1,50 @@ +#pragma once + +#include <yql/essentials/sql/v1/complete/core/input.h> +#include <yql/essentials/sql/v1/complete/text/word.h> + +#include <yql/essentials/parser/lexer_common/lexer.h> + +#include <util/generic/maybe.h> + +namespace NSQLComplete { + + using NSQLTranslation::ILexer; + using NSQLTranslation::TParsedToken; + using NSQLTranslation::TParsedTokenList; + + struct TCursor { + TMaybe<size_t> PrevTokenIndex = Nothing(); + size_t NextTokenIndex = PrevTokenIndex ? *PrevTokenIndex : 0; + size_t Position = 0; + }; + + struct TRichParsedToken { + const TParsedToken* Base = nullptr; + size_t Index = 0; + size_t Position = 0; + + bool IsLiteral() const; + }; + + struct TCursorTokenContext { + TParsedTokenList Tokens; + TVector<size_t> TokenPositions; + TCursor Cursor; + + TMaybe<TRichParsedToken> Enclosing() const; + TMaybe<TRichParsedToken> MatchCursorPrefix(const TVector<TStringBuf>& pattern) const; + }; + + bool GetStatement( + ILexer::TPtr& lexer, + TCompletionInput input, + TCompletionInput& output, + size_t& output_position); + + bool GetCursorTokenContext( + ILexer::TPtr& lexer, + TCompletionInput input, + TCursorTokenContext& context); + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/syntax/cursor_token_context_ut.cpp b/yql/essentials/sql/v1/complete/syntax/cursor_token_context_ut.cpp new file mode 100644 index 00000000000..0e275cca3b8 --- /dev/null +++ b/yql/essentials/sql/v1/complete/syntax/cursor_token_context_ut.cpp @@ -0,0 +1,50 @@ +#include "cursor_token_context.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h> +#include <yql/essentials/sql/v1/lexer/lexer.h> + +using namespace NSQLComplete; + +Y_UNIT_TEST_SUITE(CursorTokenContextTests) { + + NSQLTranslation::ILexer::TPtr MakeLexer() { + NSQLTranslationV1::TLexers lexers; + lexers.Antlr4Pure = NSQLTranslationV1::MakeAntlr4PureLexerFactory(); + return NSQLTranslationV1::MakeLexer( + lexers, /* ansi = */ false, /* antlr4 = */ true, + NSQLTranslationV1::ELexerFlavor::Pure); + } + + TCursorTokenContext Context(TString input) { + auto lexer = MakeLexer(); + TCursorTokenContext context; + UNIT_ASSERT(GetCursorTokenContext(lexer, SharpedInput(input), context)); + return context; + } + + Y_UNIT_TEST(Empty) { + auto context = Context(""); + UNIT_ASSERT(context.Cursor.PrevTokenIndex.Empty()); + UNIT_ASSERT_VALUES_EQUAL(context.Cursor.NextTokenIndex, 0); + UNIT_ASSERT_VALUES_EQUAL(context.Cursor.Position, 0); + UNIT_ASSERT(context.Enclosing().Empty()); + } + + Y_UNIT_TEST(Blank) { + UNIT_ASSERT(Context("# ").Enclosing().Empty()); + UNIT_ASSERT(Context(" #").Enclosing().Empty()); + UNIT_ASSERT(Context(" # ").Enclosing().Empty()); + } + + Y_UNIT_TEST(Enclosing) { + UNIT_ASSERT(Context("se#").Enclosing().Defined()); + UNIT_ASSERT(Context("#se").Enclosing().Empty()); + UNIT_ASSERT(Context("`se`#").Enclosing().Empty()); + UNIT_ASSERT(Context("#`se`").Enclosing().Empty()); + UNIT_ASSERT(Context("`se`#`se`").Enclosing().Defined()); + UNIT_ASSERT(Context("\"se\"#\"se\"").Enclosing().Empty()); + } + +} // Y_UNIT_TEST_SUITE(CursorTokenContextTests) diff --git a/yql/essentials/sql/v1/complete/syntax/format.cpp b/yql/essentials/sql/v1/complete/syntax/format.cpp index 1c9f146c923..43c36aea9dd 100644 --- a/yql/essentials/sql/v1/complete/syntax/format.cpp +++ b/yql/essentials/sql/v1/complete/syntax/format.cpp @@ -35,4 +35,17 @@ namespace NSQLComplete { return text; } + TString Quoted(TString content) { + content.prepend('`'); + content.append('`'); + return content; + } + + TString Unquoted(TString content) { + Y_ENSURE(2 <= content.size() && content.front() == '`' && content.back() == '`'); + content.erase(0, 1); + content.pop_back(); + return content; + } + } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/syntax/format.h b/yql/essentials/sql/v1/complete/syntax/format.h index 6c2f1b72ac2..58e5d1f1e4a 100644 --- a/yql/essentials/sql/v1/complete/syntax/format.h +++ b/yql/essentials/sql/v1/complete/syntax/format.h @@ -6,5 +6,7 @@ namespace NSQLComplete { TString FormatKeywords(const TVector<TString>& seq); + TString Quoted(TString content); + TString Unquoted(TString content); } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/syntax/grammar.cpp b/yql/essentials/sql/v1/complete/syntax/grammar.cpp index 252deaf682c..c080fae5ae4 100644 --- a/yql/essentials/sql/v1/complete/syntax/grammar.cpp +++ b/yql/essentials/sql/v1/complete/syntax/grammar.cpp @@ -7,31 +7,31 @@ namespace NSQLComplete { class TSqlGrammar: public ISqlGrammar { public: TSqlGrammar(const NSQLReflect::TLexerGrammar& grammar) - : Parser(MakeDummyParser()) - , AllTokens(ComputeAllTokens()) - , KeywordTokens(ComputeKeywordTokens(grammar)) - , PunctuationTokens(ComputePunctuationTokens(grammar)) + : Parser_(MakeDummyParser()) + , AllTokens_(ComputeAllTokens()) + , KeywordTokens_(ComputeKeywordTokens(grammar)) + , PunctuationTokens_(ComputePunctuationTokens(grammar)) { } const antlr4::dfa::Vocabulary& GetVocabulary() const override { - return Parser->getVocabulary(); + return Parser_->getVocabulary(); } const std::unordered_set<TTokenId>& GetAllTokens() const override { - return AllTokens; + return AllTokens_; } const std::unordered_set<TTokenId>& GetKeywordTokens() const override { - return KeywordTokens; + return KeywordTokens_; } const std::unordered_set<TTokenId>& GetPunctuationTokens() const override { - return PunctuationTokens; + return PunctuationTokens_; } const std::string& SymbolizedRule(TRuleId rule) const override { - return Parser->getRuleNames().at(rule); + return Parser_->getRuleNames().at(rule); } private: @@ -76,10 +76,10 @@ namespace NSQLComplete { return punctuationTokens; } - const THolder<antlr4::Parser> Parser; - const std::unordered_set<TTokenId> AllTokens; - const std::unordered_set<TTokenId> KeywordTokens; - const std::unordered_set<TTokenId> PunctuationTokens; + const THolder<antlr4::Parser> Parser_; + const std::unordered_set<TTokenId> AllTokens_; + const std::unordered_set<TTokenId> KeywordTokens_; + const std::unordered_set<TTokenId> PunctuationTokens_; }; const ISqlGrammar& GetSqlGrammar() { diff --git a/yql/essentials/sql/v1/complete/syntax/local.cpp b/yql/essentials/sql/v1/complete/syntax/local.cpp index c434fa28daf..549208d4cab 100644 --- a/yql/essentials/sql/v1/complete/syntax/local.cpp +++ b/yql/essentials/sql/v1/complete/syntax/local.cpp @@ -1,9 +1,10 @@ #include "local.h" #include "ansi.h" +#include "cursor_token_context.h" +#include "format.h" #include "grammar.h" #include "parser_call_stack.h" -#include "token.h" #include <yql/essentials/sql/v1/complete/antlr4/c3i.h> #include <yql/essentials/sql/v1/complete/antlr4/c3t.h> @@ -49,65 +50,77 @@ namespace NSQLComplete { public: explicit TSpecializedLocalSyntaxAnalysis(TLexerSupplier lexer) - : Grammar(&GetSqlGrammar()) + : Grammar_(&GetSqlGrammar()) , Lexer_(lexer(/* ansi = */ IsAnsiLexer)) - , C3(ComputeC3Config()) + , C3_(ComputeC3Config()) { } TLocalSyntaxContext Analyze(TCompletionInput input) override { TCompletionInput statement; - if (!GetStatement(Lexer_, input, statement)) { + size_t statement_position; + if (!GetStatement(Lexer_, input, statement, statement_position)) { return {}; } - auto candidates = C3.Complete(statement); - - TParsedTokenList tokens; - TCaretTokenPosition caret; - if (!TokenizePrefix(statement, tokens, caret)) { + TCursorTokenContext context; + if (!GetCursorTokenContext(Lexer_, statement, context)) { return {}; } - if (IsCaretEnslosed(tokens, caret)) { - return {}; + TC3Candidates candidates = C3_.Complete(statement); + + TLocalSyntaxContext result; + + result.EditRange = EditRange(context); + result.EditRange.Begin += statement_position; + + if (auto enclosing = context.Enclosing()) { + if (enclosing->IsLiteral()) { + return result; + } else if (enclosing->Base->Name == "ID_QUOTED") { + result.Object = ObjectMatch(context, candidates); + return result; + } } - return { - .Keywords = SiftedKeywords(candidates), - .Pragma = PragmaMatch(tokens, candidates), - .IsTypeName = IsTypeNameMatched(candidates), - .Function = FunctionMatch(tokens, candidates), - .Hint = HintMatch(candidates), - }; + result.Keywords = SiftedKeywords(candidates); + result.Pragma = PragmaMatch(context, candidates); + result.Type = TypeMatch(candidates); + result.Function = FunctionMatch(context, candidates); + result.Hint = HintMatch(candidates); + result.Object = ObjectMatch(context, candidates); + result.Cluster = ClusterMatch(context, candidates); + + return result; } private: - IC3Engine::TConfig ComputeC3Config() { + IC3Engine::TConfig ComputeC3Config() const { return { .IgnoredTokens = ComputeIgnoredTokens(), .PreferredRules = ComputePreferredRules(), }; } - std::unordered_set<TTokenId> ComputeIgnoredTokens() { - auto ignoredTokens = Grammar->GetAllTokens(); - for (auto keywordToken : Grammar->GetKeywordTokens()) { + std::unordered_set<TTokenId> ComputeIgnoredTokens() const { + auto ignoredTokens = Grammar_->GetAllTokens(); + for (auto keywordToken : Grammar_->GetKeywordTokens()) { ignoredTokens.erase(keywordToken); } - for (auto punctuationToken : Grammar->GetPunctuationTokens()) { + for (auto punctuationToken : Grammar_->GetPunctuationTokens()) { ignoredTokens.erase(punctuationToken); } return ignoredTokens; } - std::unordered_set<TRuleId> ComputePreferredRules() { + std::unordered_set<TRuleId> ComputePreferredRules() const { return GetC3PreferredRules(); } - TLocalSyntaxContext::TKeywords SiftedKeywords(const TC3Candidates& candidates) { - const auto& vocabulary = Grammar->GetVocabulary(); - const auto& keywordTokens = Grammar->GetKeywordTokens(); + TLocalSyntaxContext::TKeywords SiftedKeywords(const TC3Candidates& candidates) const { + const auto& vocabulary = Grammar_->GetVocabulary(); + const auto& keywordTokens = Grammar_->GetKeywordTokens(); TLocalSyntaxContext::TKeywords keywords; for (const auto& token : candidates.Tokens) { @@ -122,40 +135,41 @@ namespace NSQLComplete { } TMaybe<TLocalSyntaxContext::TPragma> PragmaMatch( - const TParsedTokenList& tokens, const TC3Candidates& candidates) { + const TCursorTokenContext& context, const TC3Candidates& candidates) const { if (!AnyOf(candidates.Rules, RuleAdapted(IsLikelyPragmaStack))) { return Nothing(); } TLocalSyntaxContext::TPragma pragma; - if (EndsWith(tokens, {"ID_PLAIN", "DOT"})) { - pragma.Namespace = tokens[tokens.size() - 2].Content; - } else if (EndsWith(tokens, {"ID_PLAIN", "DOT", ""})) { - pragma.Namespace = tokens[tokens.size() - 3].Content; + + if (TMaybe<TRichParsedToken> begin; + (begin = context.MatchCursorPrefix({"ID_PLAIN", "DOT"})) || + (begin = context.MatchCursorPrefix({"ID_PLAIN", "DOT", ""}))) { + pragma.Namespace = begin->Base->Content; } return pragma; } - bool IsTypeNameMatched(const TC3Candidates& candidates) { + bool TypeMatch(const TC3Candidates& candidates) const { return AnyOf(candidates.Rules, RuleAdapted(IsLikelyTypeStack)); } TMaybe<TLocalSyntaxContext::TFunction> FunctionMatch( - const TParsedTokenList& tokens, const TC3Candidates& candidates) { + const TCursorTokenContext& context, const TC3Candidates& candidates) const { if (!AnyOf(candidates.Rules, RuleAdapted(IsLikelyFunctionStack))) { return Nothing(); } TLocalSyntaxContext::TFunction function; - if (EndsWith(tokens, {"ID_PLAIN", "NAMESPACE"})) { - function.Namespace = tokens[tokens.size() - 2].Content; - } else if (EndsWith(tokens, {"ID_PLAIN", "NAMESPACE", ""})) { - function.Namespace = tokens[tokens.size() - 3].Content; + if (TMaybe<TRichParsedToken> begin; + (begin = context.MatchCursorPrefix({"ID_PLAIN", "NAMESPACE"})) || + (begin = context.MatchCursorPrefix({"ID_PLAIN", "NAMESPACE", ""}))) { + function.Namespace = begin->Base->Content; } return function; } - TMaybe<TLocalSyntaxContext::THint> HintMatch(const TC3Candidates& candidates) { + TMaybe<TLocalSyntaxContext::THint> HintMatch(const TC3Candidates& candidates) const { // TODO(YQL-19747): detect local contexts with a single iteration through the candidates.Rules auto rule = FindIf(candidates.Rules, RuleAdapted(IsLikelyHintStack)); if (rule == std::end(candidates.Rules)) { @@ -172,45 +186,103 @@ namespace NSQLComplete { }; } - bool TokenizePrefix(TCompletionInput input, TParsedTokenList& tokens, TCaretTokenPosition& caret) { - NYql::TIssues issues; - if (!NSQLTranslation::Tokenize( - *Lexer_, TString(input.Text), /* queryName = */ "", - tokens, issues, /* maxErrors = */ 1)) { - return false; + TMaybe<TLocalSyntaxContext::TObject> ObjectMatch( + const TCursorTokenContext& context, const TC3Candidates& candidates) const { + TLocalSyntaxContext::TObject object; + + if (AnyOf(candidates.Rules, RuleAdapted(IsLikelyObjectRefStack))) { + object.Kinds.emplace(EObjectKind::Folder); + } + + if (AnyOf(candidates.Rules, RuleAdapted(IsLikelyExistingTableStack))) { + object.Kinds.emplace(EObjectKind::Folder); + object.Kinds.emplace(EObjectKind::Table); + } + + if (object.Kinds.empty()) { + return Nothing(); } - Y_ENSURE(!tokens.empty() && tokens.back().Name == "EOF"); - tokens.pop_back(); + if (TMaybe<TRichParsedToken> begin; + (begin = context.MatchCursorPrefix({"ID_PLAIN", "DOT"})) || + (begin = context.MatchCursorPrefix({"ID_PLAIN", "DOT", ""}))) { + object.Cluster = begin->Base->Content; + } - caret = CaretTokenPosition(tokens, input.CursorPosition); - tokens.crop(caret.NextTokenIndex + 1); - return true; + if (TMaybe<TRichParsedToken> begin; + (begin = context.MatchCursorPrefix({"ID_PLAIN", "COLON", "ID_PLAIN", "DOT"})) || + (begin = context.MatchCursorPrefix({"ID_PLAIN", "COLON", "ID_PLAIN", "DOT", ""}))) { + object.Provider = begin->Base->Content; + } + + if (auto path = ObjectPath(context)) { + object.Path = *path; + object.IsEnclosed = true; + } + + return object; + } + + TMaybe<TString> ObjectPath(const TCursorTokenContext& context) const { + if (auto enclosing = context.Enclosing()) { + TString path = enclosing->Base->Content; + if (enclosing->Base->Name == "ID_QUOTED") { + path = Unquoted(std::move(path)); + } + path.resize(context.Cursor.Position - enclosing->Position - 1); + return path; + } + return Nothing(); } - bool IsCaretEnslosed(const TParsedTokenList& tokens, TCaretTokenPosition caret) { - if (tokens.empty() || caret.PrevTokenIndex != caret.NextTokenIndex) { - return false; + TMaybe<TLocalSyntaxContext::TCluster> ClusterMatch( + const TCursorTokenContext& context, const TC3Candidates& candidates) const { + if (!AnyOf(candidates.Rules, RuleAdapted(IsLikelyClusterStack))) { + return Nothing(); } - const auto& token = tokens.back(); - return token.Name == "STRING_VALUE" || - token.Name == "ID_QUOTED" || - token.Name == "DIGIGTS" || - token.Name == "INTEGER_VALUE" || - token.Name == "REAL"; + TLocalSyntaxContext::TCluster cluster; + if (TMaybe<TRichParsedToken> begin; + (begin = context.MatchCursorPrefix({"ID_PLAIN", "COLON"})) || + (begin = context.MatchCursorPrefix({"ID_PLAIN", "COLON", ""}))) { + cluster.Provider = begin->Base->Content; + } + return cluster; + } + + TEditRange EditRange(const TCursorTokenContext& context) const { + if (auto enclosing = context.Enclosing()) { + return EditRange(*enclosing, context.Cursor); + } + + return { + .Begin = context.Cursor.Position, + .Length = 0, + }; + } + + TEditRange EditRange(const TRichParsedToken& token, const TCursor& cursor) const { + size_t begin = token.Position; + if (token.Base->Name == "NOT_EQUALS2") { + begin += 1; + } + + return { + .Begin = begin, + .Length = cursor.Position - begin, + }; } - const ISqlGrammar* Grammar; + const ISqlGrammar* Grammar_; NSQLTranslation::ILexer::TPtr Lexer_; - TC3Engine<G> C3; + TC3Engine<G> C3_; }; class TLocalSyntaxAnalysis: public ILocalSyntaxAnalysis { public: explicit TLocalSyntaxAnalysis(TLexerSupplier lexer) - : DefaultEngine(lexer) - , AnsiEngine(lexer) + : DefaultEngine_(lexer) + , AnsiEngine_(lexer) { } @@ -223,13 +295,13 @@ namespace NSQLComplete { private: ILocalSyntaxAnalysis& GetSpecializedEngine(bool isAnsiLexer) { if (isAnsiLexer) { - return AnsiEngine; + return AnsiEngine_; } - return DefaultEngine; + return DefaultEngine_; } - TSpecializedLocalSyntaxAnalysis</* IsAnsiLexer = */ false> DefaultEngine; - TSpecializedLocalSyntaxAnalysis</* IsAnsiLexer = */ true> AnsiEngine; + TSpecializedLocalSyntaxAnalysis</* IsAnsiLexer = */ false> DefaultEngine_; + TSpecializedLocalSyntaxAnalysis</* IsAnsiLexer = */ true> AnsiEngine_; }; ILocalSyntaxAnalysis::TPtr MakeLocalSyntaxAnalysis(TLexerSupplier lexer) { diff --git a/yql/essentials/sql/v1/complete/syntax/local.h b/yql/essentials/sql/v1/complete/syntax/local.h index d58b62c62cd..8f88d5aa71c 100644 --- a/yql/essentials/sql/v1/complete/syntax/local.h +++ b/yql/essentials/sql/v1/complete/syntax/local.h @@ -1,15 +1,22 @@ #pragma once +#include <yql/essentials/sql/v1/complete/core/name.h> #include <yql/essentials/sql/v1/complete/sql_complete.h> #include <yql/essentials/sql/v1/lexer/lexer.h> #include <util/generic/string.h> #include <util/generic/hash.h> +#include <util/generic/hash_set.h> #include <util/generic/maybe.h> namespace NSQLComplete { + struct TEditRange { + size_t Begin = 0; + size_t Length = 0; + }; + struct TLocalSyntaxContext { using TKeywords = THashMap<TString, TVector<TString>>; @@ -25,11 +32,26 @@ namespace NSQLComplete { EStatementKind StatementKind; }; + struct TCluster { + TString Provider; + }; + + struct TObject { + TString Provider; + TString Cluster; + TString Path; + THashSet<EObjectKind> Kinds; + bool IsEnclosed = false; + }; + TKeywords Keywords; TMaybe<TPragma> Pragma; - bool IsTypeName = false; + bool Type = false; TMaybe<TFunction> Function; TMaybe<THint> Hint; + TMaybe<TObject> Object; + TMaybe<TCluster> Cluster; + TEditRange EditRange; }; class ILocalSyntaxAnalysis { diff --git a/yql/essentials/sql/v1/complete/syntax/parser_call_stack.cpp b/yql/essentials/sql/v1/complete/syntax/parser_call_stack.cpp index 938483438b1..ce6c94306d4 100644 --- a/yql/essentials/sql/v1/complete/syntax/parser_call_stack.cpp +++ b/yql/essentials/sql/v1/complete/syntax/parser_call_stack.cpp @@ -13,7 +13,7 @@ namespace NSQLComplete { - const TVector<TRuleId> KeywordRules = { + const TVector<TRuleId> PreferredRules = { RULE(Keyword), RULE(Keyword_expr_uncompat), RULE(Keyword_table_uncompat), @@ -24,27 +24,13 @@ namespace NSQLComplete { RULE(Keyword_hint_uncompat), RULE(Keyword_as_compat), RULE(Keyword_compat), - }; - - const TVector<TRuleId> PragmaNameRules = { - RULE(Opt_id_prefix_or_type), - RULE(An_id), - }; - - const TVector<TRuleId> TypeNameRules = { - RULE(Type_name_simple), RULE(An_id_or_type), - }; - - const TVector<TRuleId> FunctionNameRules = { + RULE(An_id), RULE(Id_expr), - RULE(An_id_or_type), RULE(Id_or_type), - }; - - const TVector<TRuleId> HintNameRules = { RULE(Id_hint), - RULE(An_id), + RULE(Opt_id_prefix_or_type), + RULE(Type_name_simple), }; TVector<std::string> Symbolized(const TParserCallStack& stack) { @@ -101,6 +87,26 @@ namespace NSQLComplete { Contains({RULE(External_call_param), RULE(An_id)}, stack); } + bool IsLikelyObjectRefStack(const TParserCallStack& stack) { + return Contains({RULE(Object_ref)}, stack); + } + + bool IsLikelyExistingTableStack(const TParserCallStack& stack) { + return !Contains({RULE(Create_table_stmt), + RULE(Simple_table_ref)}, stack) && + (Contains({RULE(Simple_table_ref), + RULE(Simple_table_ref_core), + RULE(Object_ref)}, stack) || + Contains({RULE(Single_source), + RULE(Table_ref), + RULE(Table_key), + RULE(Id_table_or_type)}, stack)); + } + + bool IsLikelyClusterStack(const TParserCallStack& stack) { + return Contains({RULE(Cluster_expr)}, stack); + } + TMaybe<EStatementKind> StatementKindOf(const TParserCallStack& stack) { for (TRuleId rule : std::ranges::views::reverse(stack)) { if (rule == RULE(Process_core) || rule == RULE(Reduce_core) || rule == RULE(Select_core)) { @@ -115,10 +121,7 @@ namespace NSQLComplete { std::unordered_set<TRuleId> GetC3PreferredRules() { std::unordered_set<TRuleId> preferredRules; - preferredRules.insert(std::begin(KeywordRules), std::end(KeywordRules)); - preferredRules.insert(std::begin(PragmaNameRules), std::end(PragmaNameRules)); - preferredRules.insert(std::begin(TypeNameRules), std::end(TypeNameRules)); - preferredRules.insert(std::begin(FunctionNameRules), std::end(FunctionNameRules)); + preferredRules.insert(std::begin(PreferredRules), std::end(PreferredRules)); return preferredRules; } diff --git a/yql/essentials/sql/v1/complete/syntax/parser_call_stack.h b/yql/essentials/sql/v1/complete/syntax/parser_call_stack.h index d185b72d628..d44b824a05e 100644 --- a/yql/essentials/sql/v1/complete/syntax/parser_call_stack.h +++ b/yql/essentials/sql/v1/complete/syntax/parser_call_stack.h @@ -15,6 +15,12 @@ namespace NSQLComplete { bool IsLikelyHintStack(const TParserCallStack& stack); + bool IsLikelyObjectRefStack(const TParserCallStack& stack); + + bool IsLikelyExistingTableStack(const TParserCallStack& stack); + + bool IsLikelyClusterStack(const TParserCallStack& stack); + TMaybe<EStatementKind> StatementKindOf(const TParserCallStack& stack); std::unordered_set<TRuleId> GetC3PreferredRules(); diff --git a/yql/essentials/sql/v1/complete/syntax/token.cpp b/yql/essentials/sql/v1/complete/syntax/token.cpp deleted file mode 100644 index b8aee3211c6..00000000000 --- a/yql/essentials/sql/v1/complete/syntax/token.cpp +++ /dev/null @@ -1,60 +0,0 @@ -#include "token.h" - -#include <yql/essentials/core/issue/yql_issue.h> -#include <yql/essentials/sql/v1/lexer/lexer.h> - -namespace NSQLComplete { - - bool GetStatement(NSQLTranslation::ILexer::TPtr& lexer, TCompletionInput input, TCompletionInput& output) { - TVector<TString> statements; - NYql::TIssues issues; - if (!NSQLTranslationV1::SplitQueryToStatements( - TString(input.Text) + ";", lexer, - statements, issues, /* file = */ "", - /* areBlankSkipped = */ false)) { - return false; - } - - size_t cursor = 0; - for (const auto& statement : statements) { - if (input.CursorPosition < cursor + statement.size()) { - output = { - .Text = input.Text.SubStr(cursor, statement.size()), - .CursorPosition = input.CursorPosition - cursor, - }; - return true; - } - cursor += statement.size(); - } - - output = input; - return true; - } - - TCaretTokenPosition CaretTokenPosition(const TParsedTokenList& tokens, size_t cursorPosition) { - size_t cursor = 0; - for (size_t i = 0; i < tokens.size(); ++i) { - const auto& content = tokens[i].Content; - cursor += content.size(); - if (cursorPosition < cursor) { - return {i, i}; - } else if (cursorPosition == cursor && IsWordBoundary(content.back())) { - return {i, i + 1}; - } - } - return {std::max(tokens.size(), static_cast<size_t>(1)) - 1, tokens.size()}; - } - - bool EndsWith(const TParsedTokenList& tokens, const TVector<TStringBuf>& pattern) { - if (tokens.size() < pattern.size()) { - return false; - } - for (yssize_t i = tokens.ysize() - 1, j = pattern.ysize() - 1; 0 <= j; --i, --j) { - if (!pattern[j].empty() && tokens[i].Name != pattern[j]) { - return false; - } - } - return true; - } - -} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/syntax/token.h b/yql/essentials/sql/v1/complete/syntax/token.h deleted file mode 100644 index d1e215285a9..00000000000 --- a/yql/essentials/sql/v1/complete/syntax/token.h +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once - -#include <yql/essentials/sql/v1/complete/core/input.h> -#include <yql/essentials/sql/v1/complete/text/word.h> - -#include <yql/essentials/parser/lexer_common/lexer.h> - -namespace NSQLComplete { - - using NSQLTranslation::TParsedTokenList; - - // `PrevTokenIndex` = `NextTokenIndex`, iff caret is enclosed - struct TCaretTokenPosition { - size_t PrevTokenIndex; - size_t NextTokenIndex; - }; - - bool GetStatement(NSQLTranslation::ILexer::TPtr& lexer, TCompletionInput input, TCompletionInput& output); - - TCaretTokenPosition CaretTokenPosition(const TParsedTokenList& tokens, size_t cursorPosition); - - bool EndsWith(const TParsedTokenList& tokens, const TVector<TStringBuf>& pattern); - -} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/syntax/ut/ya.make b/yql/essentials/sql/v1/complete/syntax/ut/ya.make index e070185af9f..7e682c5bac0 100644 --- a/yql/essentials/sql/v1/complete/syntax/ut/ya.make +++ b/yql/essentials/sql/v1/complete/syntax/ut/ya.make @@ -2,6 +2,11 @@ UNITTEST_FOR(yql/essentials/sql/v1/complete/syntax) SRCS( grammar_ut.cpp + cursor_token_context_ut.cpp +) + +PEERDIR( + yql/essentials/sql/v1/lexer/antlr4_pure ) END() diff --git a/yql/essentials/sql/v1/complete/syntax/ya.make b/yql/essentials/sql/v1/complete/syntax/ya.make index 9e2e908454b..7f63e5b2374 100644 --- a/yql/essentials/sql/v1/complete/syntax/ya.make +++ b/yql/essentials/sql/v1/complete/syntax/ya.make @@ -2,11 +2,11 @@ LIBRARY() SRCS( ansi.cpp + cursor_token_context.cpp format.cpp grammar.cpp local.cpp parser_call_stack.cpp - token.cpp ) ADDINCL( @@ -21,6 +21,8 @@ PEERDIR( yql/essentials/sql/settings yql/essentials/sql/v1/lexer yql/essentials/sql/v1/reflect + yql/essentials/sql/v1/complete/core + yql/essentials/sql/v1/complete/text ) END() |