diff options
author | vityaman <[email protected]> | 2025-05-20 12:50:28 +0300 |
---|---|---|
committer | robot-piglet <[email protected]> | 2025-05-20 13:07:06 +0300 |
commit | 16630d4dfad772e0108e694f76f922e486cd6439 (patch) | |
tree | 9502ff9057dfb71a94154f66a3e6452bf684c8fd /yql/essentials/sql/v1 | |
parent | 2cca5c2b53580983c51f00239859e45d4bc32836 (diff) |
YQL-19747: Support USE statement
Introduce global analysis which will be extended with more ParseTree analysis later.
---
- Related to `YQL-19747`
- Related to https://github.com/vityaman/ydb/issues/40
---
Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1281
commit_hash:848fb3fdd6bc9612769c47d66198fca018ff465f
Diffstat (limited to 'yql/essentials/sql/v1')
-rw-r--r-- | yql/essentials/sql/v1/complete/analysis/global/global.cpp | 94 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/analysis/global/global.h | 30 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/analysis/global/parse_tree.h | 18 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/analysis/global/use.cpp | 96 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/analysis/global/use.h | 17 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/analysis/global/ya.make | 14 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/analysis/ya.make | 7 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/antlr4/c3t.h | 9 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/antlr4/pipeline.h | 13 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/sql_complete.cpp | 29 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/sql_complete_ut.cpp | 63 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/syntax/local.h | 4 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/ya.make | 3 |
13 files changed, 382 insertions, 15 deletions
diff --git a/yql/essentials/sql/v1/complete/analysis/global/global.cpp b/yql/essentials/sql/v1/complete/analysis/global/global.cpp new file mode 100644 index 00000000000..a216b03de1a --- /dev/null +++ b/yql/essentials/sql/v1/complete/analysis/global/global.cpp @@ -0,0 +1,94 @@ +#include "global.h" + +#include "parse_tree.h" +#include "use.h" + +#include <yql/essentials/sql/v1/complete/antlr4/pipeline.h> +#include <yql/essentials/sql/v1/complete/syntax/ansi.h> + +namespace NSQLComplete { + + class TErrorStrategy: public antlr4::DefaultErrorStrategy { + public: + antlr4::Token* singleTokenDeletion(antlr4::Parser* /* recognizer */) override { + return nullptr; + } + }; + + template <bool IsAnsiLexer> + class TSpecializedGlobalAnalysis: public IGlobalAnalysis { + public: + using TDefaultYQLGrammar = TAntlrGrammar< + NALADefaultAntlr4::SQLv1Antlr4Lexer, + NALADefaultAntlr4::SQLv1Antlr4Parser>; + + using TAnsiYQLGrammar = TAntlrGrammar< + NALAAnsiAntlr4::SQLv1Antlr4Lexer, + NALAAnsiAntlr4::SQLv1Antlr4Parser>; + + using G = std::conditional_t< + IsAnsiLexer, + TAnsiYQLGrammar, + TDefaultYQLGrammar>; + + TSpecializedGlobalAnalysis() + : Chars_() + , Lexer_(&Chars_) + , Tokens_(&Lexer_) + , Parser_(&Tokens_) + { + Lexer_.removeErrorListeners(); + Parser_.removeErrorListeners(); + Parser_.setErrorHandler(std::make_shared<TErrorStrategy>()); + } + + TGlobalContext Analyze(TCompletionInput input) override { + SQLv1::Sql_queryContext* sqlQuery = Parse(input.Text); + Y_ENSURE(sqlQuery); + + TGlobalContext ctx; + + ctx.Use = FindUseStatement(sqlQuery, &Tokens_, input.CursorPosition); + + return ctx; + } + + private: + SQLv1::Sql_queryContext* Parse(TStringBuf input) { + Chars_.load(input.Data(), input.Size(), /* lenient = */ false); + Lexer_.reset(); + Tokens_.setTokenSource(&Lexer_); + Parser_.reset(); + return Parser_.sql_query(); + } + + antlr4::ANTLRInputStream Chars_; + G::TLexer Lexer_; + antlr4::CommonTokenStream Tokens_; + TDefaultYQLGrammar::TParser Parser_; + }; + + class TGlobalAnalysis: public IGlobalAnalysis { + public: + TGlobalContext Analyze(TCompletionInput input) override { + const bool isAnsiLexer = IsAnsiQuery(TString(input.Text)); + return GetSpecialized(isAnsiLexer).Analyze(std::move(input)); + } + + private: + IGlobalAnalysis& GetSpecialized(bool isAnsiLexer) { + if (isAnsiLexer) { + return AnsiAnalysis_; + } + return DefaultAnalysis_; + } + + TSpecializedGlobalAnalysis</* IsAnsiLexer = */ false> DefaultAnalysis_; + TSpecializedGlobalAnalysis</* IsAnsiLexer = */ true> AnsiAnalysis_; + }; + + IGlobalAnalysis::TPtr MakeGlobalAnalysis() { + return MakeHolder<TGlobalAnalysis>(); + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/analysis/global/global.h b/yql/essentials/sql/v1/complete/analysis/global/global.h new file mode 100644 index 00000000000..a5249bc3a48 --- /dev/null +++ b/yql/essentials/sql/v1/complete/analysis/global/global.h @@ -0,0 +1,30 @@ +#pragma once + +#include <yql/essentials/sql/v1/complete/core/input.h> + +#include <util/generic/ptr.h> +#include <util/generic/maybe.h> +#include <util/generic/string.h> + +namespace NSQLComplete { + + struct TUseContext { + TString Provider; + TString Cluster; + }; + + struct TGlobalContext { + TMaybe<TUseContext> Use; + }; + + class IGlobalAnalysis { + public: + using TPtr = THolder<IGlobalAnalysis>; + + virtual ~IGlobalAnalysis() = default; + virtual TGlobalContext Analyze(TCompletionInput input) = 0; + }; + + IGlobalAnalysis::TPtr MakeGlobalAnalysis(); + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/analysis/global/parse_tree.h b/yql/essentials/sql/v1/complete/analysis/global/parse_tree.h new file mode 100644 index 00000000000..25ca6d336cb --- /dev/null +++ b/yql/essentials/sql/v1/complete/analysis/global/parse_tree.h @@ -0,0 +1,18 @@ +#pragma once + +#ifdef TOKEN_QUERY // Conflict with the winnt.h + #undef TOKEN_QUERY +#endif +#include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h> +#include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4Parser.h> +#include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4BaseVisitor.h> +#include <yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Lexer.h> +#include <yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Parser.h> + +namespace NSQLComplete { + + using SQLv1 = NALADefaultAntlr4::SQLv1Antlr4Parser; + + using NALADefaultAntlr4::SQLv1Antlr4BaseVisitor; + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/analysis/global/use.cpp b/yql/essentials/sql/v1/complete/analysis/global/use.cpp new file mode 100644 index 00000000000..da7dc6a5751 --- /dev/null +++ b/yql/essentials/sql/v1/complete/analysis/global/use.cpp @@ -0,0 +1,96 @@ +#include "use.h" + +namespace NSQLComplete { + + namespace { + + class TVisitor: public SQLv1Antlr4BaseVisitor { + public: + TVisitor(antlr4::TokenStream* tokens, size_t cursorPosition) + : Tokens_(tokens) + , CursorPosition_(cursorPosition) + { + } + + std::any visitSql_stmt_core(SQLv1::Sql_stmt_coreContext* ctx) override { + if (ctx->use_stmt() || IsEnclosing(ctx)) { + return visitChildren(ctx); + } + return {}; + } + + std::any visitUse_stmt(SQLv1::Use_stmtContext* ctx) override { + SQLv1::Cluster_exprContext* expr = ctx->cluster_expr(); + if (!expr) { + return {}; + } + + std::string provider; + std::string cluster; + + if (SQLv1::An_idContext* ctx = expr->an_id()) { + provider = ctx->getText(); + } + + if (SQLv1::Pure_column_or_namedContext* ctx = expr->pure_column_or_named()) { + cluster = ctx->getText(); + } + + if (cluster.empty()) { + return {}; + } + + return TUseContext{ + .Provider = std::move(provider), + .Cluster = std::move(cluster), + }; + } + + std::any aggregateResult(std::any aggregate, std::any nextResult) override { + if (nextResult.has_value()) { + return nextResult; + } + return aggregate; + } + + bool shouldVisitNextChild(antlr4::tree::ParseTree* node, const std::any& /*currentResult*/) override { + return TextInterval(node).a < static_cast<ssize_t>(CursorPosition_); + } + + private: + bool IsEnclosing(antlr4::tree::ParseTree* tree) const { + return TextInterval(tree).properlyContains(CursorInterval()); + } + + antlr4::misc::Interval TextInterval(antlr4::tree::ParseTree* tree) const { + auto tokens = tree->getSourceInterval(); + if (tokens.b == -1) { + tokens.b = tokens.a; + } + return antlr4::misc::Interval( + Tokens_->get(tokens.a)->getStartIndex(), + Tokens_->get(tokens.b)->getStopIndex()); + } + + antlr4::misc::Interval CursorInterval() const { + return antlr4::misc::Interval(CursorPosition_, CursorPosition_); + } + + antlr4::TokenStream* Tokens_; + size_t CursorPosition_; + }; + + } // namespace + + TMaybe<TUseContext> FindUseStatement( + SQLv1::Sql_queryContext* ctx, + antlr4::TokenStream* tokens, + size_t cursorPosition) { + std::any result = TVisitor(tokens, cursorPosition).visit(ctx); + if (!result.has_value()) { + return Nothing(); + } + return std::any_cast<TUseContext>(result); + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/analysis/global/use.h b/yql/essentials/sql/v1/complete/analysis/global/use.h new file mode 100644 index 00000000000..54f3557fd62 --- /dev/null +++ b/yql/essentials/sql/v1/complete/analysis/global/use.h @@ -0,0 +1,17 @@ +#pragma once + +#include "global.h" +#include "parse_tree.h" + +#include <util/generic/ptr.h> +#include <util/generic/maybe.h> +#include <util/generic/string.h> + +namespace NSQLComplete { + + TMaybe<TUseContext> FindUseStatement( + SQLv1::Sql_queryContext* ctx, + antlr4::TokenStream* tokens, + size_t cursorPosition); + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/analysis/global/ya.make b/yql/essentials/sql/v1/complete/analysis/global/ya.make new file mode 100644 index 00000000000..a28d99f94c2 --- /dev/null +++ b/yql/essentials/sql/v1/complete/analysis/global/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +SRCS( + global.cpp + use.cpp +) + +PEERDIR( + yql/essentials/sql/v1/complete/core + yql/essentials/parser/antlr_ast/gen/v1_antlr4 + yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4 +) + +END() diff --git a/yql/essentials/sql/v1/complete/analysis/ya.make b/yql/essentials/sql/v1/complete/analysis/ya.make new file mode 100644 index 00000000000..986754c8b34 --- /dev/null +++ b/yql/essentials/sql/v1/complete/analysis/ya.make @@ -0,0 +1,7 @@ +LIBRARY() + +END() + +RECURSE( + global +) diff --git a/yql/essentials/sql/v1/complete/antlr4/c3t.h b/yql/essentials/sql/v1/complete/antlr4/c3t.h index fb26c8c7f4a..1f71553e04f 100644 --- a/yql/essentials/sql/v1/complete/antlr4/c3t.h +++ b/yql/essentials/sql/v1/complete/antlr4/c3t.h @@ -1,6 +1,7 @@ #pragma once #include "c3i.h" +#include "pipeline.h" #include <yql/essentials/sql/v1/complete/text/word.h> @@ -15,14 +16,6 @@ namespace NSQLComplete { - template <class Lexer, class Parser> - struct TAntlrGrammar { - using TLexer = Lexer; - using TParser = Parser; - - TAntlrGrammar() = delete; - }; - template <class G> class TC3Engine: public IC3Engine { public: diff --git a/yql/essentials/sql/v1/complete/antlr4/pipeline.h b/yql/essentials/sql/v1/complete/antlr4/pipeline.h new file mode 100644 index 00000000000..75fd2e5ba2c --- /dev/null +++ b/yql/essentials/sql/v1/complete/antlr4/pipeline.h @@ -0,0 +1,13 @@ +#pragma once + +namespace NSQLComplete { + + template <class Lexer, class Parser> + struct TAntlrGrammar { + using TLexer = Lexer; + using TParser = Parser; + + TAntlrGrammar() = delete; + }; + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/sql_complete.cpp b/yql/essentials/sql/v1/complete/sql_complete.cpp index d22e06e2b94..00d346f0770 100644 --- a/yql/essentials/sql/v1/complete/sql_complete.cpp +++ b/yql/essentials/sql/v1/complete/sql_complete.cpp @@ -4,6 +4,7 @@ #include <yql/essentials/sql/v1/complete/name/service/static/name_service.h> #include <yql/essentials/sql/v1/complete/syntax/local.h> #include <yql/essentials/sql/v1/complete/syntax/format.h> +#include <yql/essentials/sql/v1/complete/analysis/global/global.h> #include <util/generic/algorithm.h> #include <util/charset/utf8.h> @@ -18,6 +19,7 @@ namespace NSQLComplete { ISqlCompletionEngine::TConfiguration configuration) : Configuration_(std::move(configuration)) , SyntaxAnalysis_(MakeLocalSyntaxAnalysis(lexer)) + , GlobalAnalysis_(MakeGlobalAnalysis()) , Names_(std::move(names)) { } @@ -39,7 +41,9 @@ namespace NSQLComplete { TLocalSyntaxContext context = SyntaxAnalysis_->Analyze(input); auto keywords = context.Keywords; - TNameRequest request = NameRequestFrom(input, context); + TGlobalContext global = GlobalAnalysis_->Analyze(input); + + TNameRequest request = NameRequestFrom(input, context, global); if (request.IsEmpty()) { return NThreading::MakeFuture<TCompletion>({ .CompletedToken = GetCompletedToken(input, context.EditRange), @@ -61,7 +65,10 @@ namespace NSQLComplete { }; } - TNameRequest NameRequestFrom(TCompletionInput input, const TLocalSyntaxContext& context) const { + TNameRequest NameRequestFrom( + TCompletionInput input, + const TLocalSyntaxContext& context, + const TGlobalContext& global) const { TNameRequest request = { .Prefix = TString(GetCompletedToken(input, context.EditRange).Content), .Limit = Configuration_.Limit, @@ -94,14 +101,21 @@ namespace NSQLComplete { } if (context.Object) { - request.Constraints.Object = TObjectNameConstraints{ - .Provider = context.Object->Provider, - .Cluster = context.Object->Cluster, - .Kinds = context.Object->Kinds, - }; + request.Constraints.Object = TObjectNameConstraints(); + request.Constraints.Object->Kinds = context.Object->Kinds; request.Prefix = context.Object->Path; } + if (context.Object && global.Use) { + request.Constraints.Object->Provider = global.Use->Provider; + request.Constraints.Object->Cluster = global.Use->Cluster; + } + + if (context.Object && context.Object->HasCluster()) { + request.Constraints.Object->Provider = context.Object->Provider; + request.Constraints.Object->Cluster = context.Object->Cluster; + } + if (context.Cluster) { TClusterName::TConstraints constraints; constraints.Namespace = context.Cluster->Provider; @@ -195,6 +209,7 @@ namespace NSQLComplete { TConfiguration Configuration_; ILocalSyntaxAnalysis::TPtr SyntaxAnalysis_; + IGlobalAnalysis::TPtr GlobalAnalysis_; INameService::TPtr Names_; }; diff --git a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp index 1e01f899e46..00c5f3427f6 100644 --- a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp +++ b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp @@ -604,6 +604,69 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { } } + Y_UNIT_TEST(SelectFromWithUse) { + auto engine = MakeSqlCompletionEngineUT(); + { + TVector<TCandidate> expected = { + {TableName, "`maxim`"}, + {ClusterName, "example"}, + {ClusterName, "yt:saurus"}, + {Keyword, "ANY"}, + }; + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "USE yt:saurus; SELECT * FROM "), expected); + } + { + TVector<TCandidate> expected = { + {TableName, "`people`"}, + {FolderName, "`yql/`"}, + }; + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "USE yt:saurus; SELECT * FROM example."), expected); + } + { + TVector<TCandidate> expected = { + {TableName, "`maxim`"}, + {ClusterName, "example"}, + {ClusterName, "yt:saurus"}, + {Keyword, "ANY"}, + }; + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "USE example; USE yt:saurus; SELECT * FROM "), expected); + } + { + TVector<TCandidate> expected = { + {TableName, "`maxim`"}, + {ClusterName, "example"}, + {ClusterName, "yt:saurus"}, + {Keyword, "ANY"}, + }; + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, R"( + USE example; + DEFINE ACTION $hello() AS + USE yt:saurus; + SELECT * FROM #; + END DEFINE; + )"), expected); + } + { + TVector<TCandidate> expected = { + {TableName, "`people`"}, + {FolderName, "`yql/`"}, + {ClusterName, "example"}, + {ClusterName, "yt:saurus"}, + {Keyword, "ANY"}, + }; + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, R"( + USE example; + + DEFINE ACTION $action() AS + USE yt:saurus; + SELECT * FROM test; + END DEFINE; + + SELECT * FROM # + )"), expected); + } + } + Y_UNIT_TEST(SelectWhere) { TVector<TCandidate> expected = { {Keyword, "BITCAST("}, diff --git a/yql/essentials/sql/v1/complete/syntax/local.h b/yql/essentials/sql/v1/complete/syntax/local.h index 9675eb9e843..635485d2b7c 100644 --- a/yql/essentials/sql/v1/complete/syntax/local.h +++ b/yql/essentials/sql/v1/complete/syntax/local.h @@ -42,6 +42,10 @@ namespace NSQLComplete { TString Path; THashSet<EObjectKind> Kinds; bool IsQuoted = false; + + bool HasCluster() const { + return !Cluster.empty(); + } }; TKeywords Keywords; diff --git a/yql/essentials/sql/v1/complete/ya.make b/yql/essentials/sql/v1/complete/ya.make index f38e4bba319..e2a7d7db5f9 100644 --- a/yql/essentials/sql/v1/complete/ya.make +++ b/yql/essentials/sql/v1/complete/ya.make @@ -8,7 +8,9 @@ PEERDIR( yql/essentials/sql/v1/lexer yql/essentials/sql/v1/complete/antlr4 yql/essentials/sql/v1/complete/name/service + # TODO(YQL-19747): move to /analysis/local yql/essentials/sql/v1/complete/syntax + yql/essentials/sql/v1/complete/analysis/global yql/essentials/sql/v1/complete/text # TODO(YQL-19747): add it to YDB CLI PEERDIR yql/essentials/sql/v1/complete/name/service/static @@ -17,6 +19,7 @@ PEERDIR( END() RECURSE( + analysis antlr4 core name |