diff options
author | vitya-smirnov <[email protected]> | 2025-08-12 15:04:56 +0300 |
---|---|---|
committer | vitya-smirnov <[email protected]> | 2025-08-12 15:29:12 +0300 |
commit | 088fbdd4485f80a02567bbe84ed0e3f35337eb3d (patch) | |
tree | 15177d3cbfb50769297613218fe2597c7c2f678c /yql/essentials/sql | |
parent | a84a916fe9eff84d52151ee74e4c6ddcebcf00c2 (diff) |
YQL-19747: Improve and fix completon engine
Played with the autocomplete and found some issues:
https://nda.ya.ru/t/1F9ioYe37HVwo2.
- Supported `min`, `max` parameters of the `RANGE` table
function, respecting `prefix`.
- Supported cluster detecton at table function, e.g.
`SELECT * FROM plato.RANGE(Input1, Input2)`.
- Made column filter not to ignore a table alias.
- Supported keyword as identifier parsing.
- Supported `ID_QUOTED` as table alias.
commit_hash:9e9ff13373cd059e6a240ddff0ae18f599d790c0
Diffstat (limited to 'yql/essentials/sql')
14 files changed, 236 insertions, 29 deletions
diff --git a/yql/essentials/sql/v1/complete/analysis/global/evaluate.cpp b/yql/essentials/sql/v1/complete/analysis/global/evaluate.cpp index c42fef22c24..2c07f470c6a 100644 --- a/yql/essentials/sql/v1/complete/analysis/global/evaluate.cpp +++ b/yql/essentials/sql/v1/complete/analysis/global/evaluate.cpp @@ -78,6 +78,38 @@ namespace NSQLComplete { return TPartialValue(std::move(result)); } + std::any visitKeyword_compat(SQLv1::Keyword_compatContext* ctx) override { + return TPartialValue(GetText(ctx)); + } + + std::any visitKeyword_expr_uncompat(SQLv1::Keyword_expr_uncompatContext* ctx) override { + return TPartialValue(GetText(ctx)); + } + + std::any visitKeyword_table_uncompat(SQLv1::Keyword_table_uncompatContext* ctx) override { + return TPartialValue(GetText(ctx)); + } + + std::any visitKeyword_select_uncompat(SQLv1::Keyword_select_uncompatContext* ctx) override { + return TPartialValue(GetText(ctx)); + } + + std::any visitKeyword_alter_uncompat(SQLv1::Keyword_alter_uncompatContext* ctx) override { + return TPartialValue(GetText(ctx)); + } + + std::any visitKeyword_in_uncompat(SQLv1::Keyword_in_uncompatContext* ctx) override { + return TPartialValue(GetText(ctx)); + } + + std::any visitKeyword_window_uncompat(SQLv1::Keyword_window_uncompatContext* ctx) override { + return TPartialValue(GetText(ctx)); + } + + std::any visitKeyword_hint_uncompat(SQLv1::Keyword_hint_uncompatContext* ctx) override { + return TPartialValue(GetText(ctx)); + } + std::any visitTerminal(antlr4::tree::TerminalNode* node) override { switch (node->getSymbol()->getType()) { case SQLv1::TOKEN_ID_QUOTED: diff --git a/yql/essentials/sql/v1/complete/analysis/global/function.cpp b/yql/essentials/sql/v1/complete/analysis/global/function.cpp index 30072f4a69c..3ad64adf683 100644 --- a/yql/essentials/sql/v1/complete/analysis/global/function.cpp +++ b/yql/essentials/sql/v1/complete/analysis/global/function.cpp @@ -1,6 +1,8 @@ #include "function.h" +#include "evaluate.h" #include "narrowing_visitor.h" +#include "use.h" #include <library/cpp/iterator/enumerate.h> @@ -10,8 +12,9 @@ namespace NSQLComplete { class TVisitor: public TSQLv1NarrowingVisitor { public: - TVisitor(const TParsedInput& input) + TVisitor(const TParsedInput& input, const TNamedNodes* nodes) : TSQLv1NarrowingVisitor(input) + , Nodes_(nodes) { } @@ -32,14 +35,31 @@ namespace NSQLComplete { return {}; } + const size_t argN = ArgumentNumber(ctx).GetOrElse(0); return TFunctionContext{ .Name = function->getText(), - .ArgumentNumber = ArgumentNumber(ctx).GetOrElse(0), + .ArgumentNumber = argN, + .Arg0 = (argN != 0) ? Arg0(ctx) : Nothing(), + .Cluster = Cluster(ctx), }; } private: - TMaybe<size_t> ArgumentNumber(SQLv1::Table_refContext* ctx) { + TMaybe<TString> Arg0(SQLv1::Table_refContext* ctx) const { + auto* table_arg = ctx->table_arg(0); + if (!table_arg) { + return Nothing(); + } + + auto* named_expr = table_arg->named_expr(); + if (!named_expr) { + return Nothing(); + } + + return ToObjectRef(PartiallyEvaluate(named_expr, *Nodes_)); + } + + TMaybe<size_t> ArgumentNumber(SQLv1::Table_refContext* ctx) const { for (auto [i, arg] : Enumerate(ctx->table_arg())) { if (IsEnclosing(arg)) { return i; @@ -47,12 +67,23 @@ namespace NSQLComplete { } return Nothing(); } + + TMaybe<TClusterContext> Cluster(SQLv1::Table_refContext* ctx) const { + auto* cluster_expr = ctx->cluster_expr(); + if (!cluster_expr) { + return Nothing(); + } + + return ParseClusterContext(cluster_expr, *Nodes_); + } + + const TNamedNodes* Nodes_; }; } // namespace - TMaybe<TFunctionContext> EnclosingFunction(TParsedInput input) { - std::any result = TVisitor(input).visit(input.SqlQuery); + TMaybe<TFunctionContext> EnclosingFunction(TParsedInput input, const TNamedNodes& nodes) { + std::any result = TVisitor(input, &nodes).visit(input.SqlQuery); if (!result.has_value()) { return Nothing(); } diff --git a/yql/essentials/sql/v1/complete/analysis/global/function.h b/yql/essentials/sql/v1/complete/analysis/global/function.h index 52aa7090b7b..536e6f012b6 100644 --- a/yql/essentials/sql/v1/complete/analysis/global/function.h +++ b/yql/essentials/sql/v1/complete/analysis/global/function.h @@ -2,12 +2,13 @@ #include "global.h" #include "input.h" +#include "named_node.h" #include <util/generic/maybe.h> #include <util/generic/string.h> namespace NSQLComplete { - TMaybe<TFunctionContext> EnclosingFunction(TParsedInput input); + TMaybe<TFunctionContext> EnclosingFunction(TParsedInput input, const TNamedNodes& nodes); } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/analysis/global/global.cpp b/yql/essentials/sql/v1/complete/analysis/global/global.cpp index f97c940bed4..95a260ee973 100644 --- a/yql/essentials/sql/v1/complete/analysis/global/global.cpp +++ b/yql/essentials/sql/v1/complete/analysis/global/global.cpp @@ -171,7 +171,7 @@ namespace NSQLComplete { ctx.Use = FindUseStatement(parsed, nodes); ctx.Names = Keys(nodes); - ctx.EnclosingFunction = EnclosingFunction(parsed); + ctx.EnclosingFunction = EnclosingFunction(parsed, nodes); ctx.Column = InferColumnContext(parsed, nodes); if (ctx.Use && ctx.Column) { @@ -206,10 +206,10 @@ namespace NSQLComplete { return keys; } - void EnrichTableClusters(TColumnContext& column, const TUseContext& use) { + void EnrichTableClusters(TColumnContext& column, const TClusterContext& use) { for (auto& table : column.Tables) { if (table.Cluster.empty()) { - table.Cluster = use.Cluster; + table.Cluster = use.Name; } } } @@ -252,10 +252,20 @@ namespace NSQLComplete { } // namespace NSQLComplete template <> +void Out<NSQLComplete::TClusterContext>(IOutputStream& out, const NSQLComplete::TClusterContext& value) { + if (!value.Provider.empty()) { + out << value.Provider << ":"; + } + out << value.Name; +} + +template <> void Out<NSQLComplete::TFunctionContext>(IOutputStream& out, const NSQLComplete::TFunctionContext& value) { out << "TFunctionContext { "; out << "Name: " << value.Name; - out << ", Args: " << value.ArgumentNumber; + out << ", ArgN: " << value.ArgumentNumber; + out << ", Arg0: " << value.Arg0.GetOrElse("None"); + out << ", Cluster: " << value.Cluster; out << " }"; } diff --git a/yql/essentials/sql/v1/complete/analysis/global/global.h b/yql/essentials/sql/v1/complete/analysis/global/global.h index 9a18f45f7c8..62b027eaa79 100644 --- a/yql/essentials/sql/v1/complete/analysis/global/global.h +++ b/yql/essentials/sql/v1/complete/analysis/global/global.h @@ -13,14 +13,18 @@ namespace NSQLComplete { - struct TUseContext { + struct TClusterContext { TString Provider; - TString Cluster; + TString Name; + + friend bool operator==(const TClusterContext& lhs, const TClusterContext& rhs) = default; }; struct TFunctionContext { TString Name; size_t ArgumentNumber = 0; + TMaybe<TString> Arg0 = Nothing(); + TMaybe<TClusterContext> Cluster = Nothing(); friend bool operator==(const TFunctionContext& lhs, const TFunctionContext& rhs) = default; }; @@ -42,7 +46,7 @@ namespace NSQLComplete { }; struct TGlobalContext { - TMaybe<TUseContext> Use; + TMaybe<TClusterContext> Use; TVector<TString> Names; TMaybe<TFunctionContext> EnclosingFunction; TMaybe<TColumnContext> Column; diff --git a/yql/essentials/sql/v1/complete/analysis/global/global_ut.cpp b/yql/essentials/sql/v1/complete/analysis/global/global_ut.cpp index 4f0d8c8a456..1d593044ea5 100644 --- a/yql/essentials/sql/v1/complete/analysis/global/global_ut.cpp +++ b/yql/essentials/sql/v1/complete/analysis/global/global_ut.cpp @@ -92,7 +92,7 @@ Y_UNIT_TEST_SUITE(GlobalAnalysisTests) { { TString query = "SELECT * FROM Concat(a, #)"; TGlobalContext ctx = global->Analyze(SharpedInput(query), {}); - TFunctionContext expected = {"Concat", 1}; + TFunctionContext expected = {"Concat", 1, "a"}; UNIT_ASSERT_VALUES_EQUAL(ctx.EnclosingFunction, expected); } { @@ -112,6 +112,12 @@ Y_UNIT_TEST_SUITE(GlobalAnalysisTests) { TGlobalContext ctx = global->Analyze(SharpedInput(query), {}); UNIT_ASSERT_VALUES_EQUAL(ctx.EnclosingFunction, Nothing()); } + { + TString query = "SELECT * FROM plato.Concat(#)"; + TGlobalContext ctx = global->Analyze(SharpedInput(query), {}); + TClusterContext expected = {"", "plato"}; + UNIT_ASSERT_VALUES_EQUAL(ctx.EnclosingFunction->Cluster, expected); + } } Y_UNIT_TEST(SimpleSelectFrom) { diff --git a/yql/essentials/sql/v1/complete/analysis/global/use.cpp b/yql/essentials/sql/v1/complete/analysis/global/use.cpp index 4d3a98414ba..89e5ae458e6 100644 --- a/yql/essentials/sql/v1/complete/analysis/global/use.cpp +++ b/yql/essentials/sql/v1/complete/analysis/global/use.cpp @@ -28,15 +28,35 @@ namespace NSQLComplete { return {}; } + TMaybe<TClusterContext> cluster = ParseClusterContext(expr, *Nodes_); + if (!cluster) { + return {}; + } + + return *cluster; + } + + private: + const TNamedNodes* Nodes_; + }; + + class TClusterVisitor: public TSQLv1BaseVisitor { + public: + explicit TClusterVisitor(const TNamedNodes* nodes) + : Nodes_(nodes) + { + } + + std::any visitCluster_expr(SQLv1::Cluster_exprContext* ctx) { std::string provider; std::string cluster; - if (SQLv1::An_idContext* ctx = expr->an_id()) { - provider = ctx->getText(); + if (SQLv1::An_idContext* id = ctx->an_id()) { + provider = id->getText(); } - if (SQLv1::Pure_column_or_namedContext* ctx = expr->pure_column_or_named()) { - if (auto id = GetId(ctx)) { + if (SQLv1::Pure_column_or_namedContext* named = ctx->pure_column_or_named()) { + if (auto id = GetId(named)) { cluster = std::move(*id); } } @@ -45,9 +65,9 @@ namespace NSQLComplete { return {}; } - return TUseContext{ + return TClusterContext{ .Provider = std::move(provider), - .Cluster = std::move(cluster), + .Name = std::move(cluster), }; } @@ -75,13 +95,21 @@ namespace NSQLComplete { } // namespace + TMaybe<TClusterContext> ParseClusterContext(SQLv1::Cluster_exprContext* ctx, const TNamedNodes& nodes) { + std::any result = TClusterVisitor(&nodes).visit(ctx); + if (!result.has_value()) { + return Nothing(); + } + return std::any_cast<TClusterContext>(result); + } + // TODO(YQL-19747): Use any to maybe conversion function - TMaybe<TUseContext> FindUseStatement(TParsedInput input, const TNamedNodes& nodes) { + TMaybe<TClusterContext> FindUseStatement(TParsedInput input, const TNamedNodes& nodes) { std::any result = TVisitor(input, &nodes).visit(input.SqlQuery); if (!result.has_value()) { return Nothing(); } - return std::any_cast<TUseContext>(result); + return std::any_cast<TClusterContext>(result); } } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/analysis/global/use.h b/yql/essentials/sql/v1/complete/analysis/global/use.h index 411e0f9fcd8..2ecda65daaa 100644 --- a/yql/essentials/sql/v1/complete/analysis/global/use.h +++ b/yql/essentials/sql/v1/complete/analysis/global/use.h @@ -10,6 +10,8 @@ namespace NSQLComplete { - TMaybe<TUseContext> FindUseStatement(TParsedInput input, const TNamedNodes& nodes); + TMaybe<TClusterContext> ParseClusterContext(SQLv1::Cluster_exprContext* ctx, const TNamedNodes& nodes); + + TMaybe<TClusterContext> FindUseStatement(TParsedInput input, const TNamedNodes& nodes); } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/analysis/local/local.cpp b/yql/essentials/sql/v1/complete/analysis/local/local.cpp index 1d0dd20a282..cf10d3a5e85 100644 --- a/yql/essentials/sql/v1/complete/analysis/local/local.cpp +++ b/yql/essentials/sql/v1/complete/analysis/local/local.cpp @@ -347,6 +347,10 @@ namespace NSQLComplete { (begin = context.MatchCursorPrefix({"ID_PLAIN", "DOT"})) || (begin = context.MatchCursorPrefix({"ID_PLAIN", "DOT", ""}))) { column.Table = begin->Base->Content; + } else if (TMaybe<TRichParsedToken> begin; + (begin = context.MatchCursorPrefix({"ID_QUOTED", "DOT"})) || + (begin = context.MatchCursorPrefix({"ID_QUOTED", "DOT", ""}))) { + column.Table = Unquoted(begin->Base->Content); } return column; } diff --git a/yql/essentials/sql/v1/complete/name/service/column/name_service.cpp b/yql/essentials/sql/v1/complete/name/service/column/name_service.cpp index ebc63d9d780..bb302fac672 100644 --- a/yql/essentials/sql/v1/complete/name/service/column/name_service.cpp +++ b/yql/essentials/sql/v1/complete/name/service/column/name_service.cpp @@ -39,10 +39,15 @@ namespace NSQLComplete { without.insert(begin(it->second), end(it->second)); } + TString columnPrefix = request.Prefix; + if (tableName.StartsWith(request.Prefix)) { + columnPrefix = ""; + } + TDescribeTableRequest describeRequest = { .TableCluster = "", .TablePath = Escaped(tableName), - .ColumnPrefix = request.Prefix, + .ColumnPrefix = columnPrefix, .ColumnsLimit = request.Limit, }; diff --git a/yql/essentials/sql/v1/complete/name/service/schema/name_service.cpp b/yql/essentials/sql/v1/complete/name/service/schema/name_service.cpp index 8b663a44acc..2691ca0fadc 100644 --- a/yql/essentials/sql/v1/complete/name/service/schema/name_service.cpp +++ b/yql/essentials/sql/v1/complete/name/service/schema/name_service.cpp @@ -45,11 +45,19 @@ namespace NSQLComplete { } THashMap<TTableId, NThreading::TFuture<TDescribeTableResponse>> futuresByTable; - for (const auto& [table, _] : aliasesByTable) { + for (const auto& [table, aliases] : aliasesByTable) { + TString columnPrefix = prefix; + for (const auto& alias : aliases) { + if (alias.StartsWith(prefix)) { + columnPrefix = ""; + break; + } + } + TDescribeTableRequest request = { .TableCluster = table.Cluster, .TablePath = table.Path, - .ColumnPrefix = prefix, + .ColumnPrefix = columnPrefix, .ColumnsLimit = limit, }; diff --git a/yql/essentials/sql/v1/complete/name_mapping.cpp b/yql/essentials/sql/v1/complete/name_mapping.cpp index 29fda533c92..a4707942fa3 100644 --- a/yql/essentials/sql/v1/complete/name_mapping.cpp +++ b/yql/essentials/sql/v1/complete/name_mapping.cpp @@ -121,7 +121,7 @@ namespace NSQLComplete { if (local.Column->Table.empty() && !name.TableAlias.empty()) { name.Identifier.prepend('.'); - name.Identifier.prepend(name.TableAlias); + name.Identifier.prepend(ToIdentifier(std::move(name.TableAlias), local)); } return {ECandidateKind::ColumnName, std::move(name.Identifier)}; diff --git a/yql/essentials/sql/v1/complete/sql_complete.cpp b/yql/essentials/sql/v1/complete/sql_complete.cpp index c0916674eee..f98f8309f77 100644 --- a/yql/essentials/sql/v1/complete/sql_complete.cpp +++ b/yql/essentials/sql/v1/complete/sql_complete.cpp @@ -146,7 +146,7 @@ namespace NSQLComplete { if (local.Object && global.Use) { request.Constraints.Object->Provider = global.Use->Provider; - request.Constraints.Object->Cluster = global.Use->Cluster; + request.Constraints.Object->Cluster = global.Use->Name; } if (local.Object && local.Object->HasCluster()) { @@ -198,6 +198,11 @@ namespace NSQLComplete { return local; } + if (TMaybe<TClusterContext> cluster = function->Cluster) { + object->Provider = cluster->Provider; + object->Cluster = cluster->Name; + } + auto& name = function->Name; size_t number = function->ArgumentNumber; @@ -211,6 +216,14 @@ namespace NSQLComplete { name == "regexp" || name == "filter" || name == "folder" || name == "walkfolders")) { object->Kinds.emplace(EObjectKind::Folder); + } else if ((number == 1 || number == 2) && (name == "range")) { + if (TMaybe<TString> path = function->Arg0) { + object->Path = *path; + object->Path.append("/"); + } + + object->Kinds.emplace(EObjectKind::Folder); + object->Kinds.emplace(EObjectKind::Table); } return local; diff --git a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp index ddbcf9fc4b6..9540b012249 100644 --- a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp +++ b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp @@ -1151,15 +1151,33 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { { UNIT_ASSERT_VALUES_EQUAL( CompleteTop(1, engine, "SELECT * FROM Range(#)").at(0).Kind, FolderName); - UNIT_ASSERT_VALUES_UNEQUAL( + UNIT_ASSERT_VALUES_EQUAL( CompleteTop(1, engine, "SELECT * FROM Range(``, #)").at(0).Kind, FolderName); } { + TVector<TCandidate> expected = { + {TableName, "example"}, + }; + UNIT_ASSERT_VALUES_EQUAL( + CompleteTop(10, engine, "SELECT * FROM Range(`test/service`, `#`)"), expected); + UNIT_ASSERT_VALUES_EQUAL( + CompleteTop(10, engine, "SELECT * FROM Range(`test/service`, ``, `#`)"), expected); + } + { UNIT_ASSERT_VALUES_UNEQUAL(CompleteTop(1, engine, "SELECT Max(#)").at(0).Kind, FolderName); UNIT_ASSERT_VALUES_UNEQUAL(CompleteTop(1, engine, "SELECT Concat(#)").at(0).Kind, FolderName); } } + Y_UNIT_TEST(TableFunctionCluster) { + auto engine = MakeSqlCompletionEngineUT(); + TVector<TCandidate> expected = { + {TableName, "`people`"}, + {FolderName, "`yql/`", 1}, + }; + UNIT_ASSERT_VALUES_EQUAL(CompleteTop(2, engine, "SELECT * FROM example.Concat(#)"), expected); + } + Y_UNIT_TEST(ColumnsAtSimpleSelect) { auto engine = MakeSqlCompletionEngineUT(); { @@ -1433,6 +1451,51 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { } } + Y_UNIT_TEST(ColumnFiltration) { + auto engine = MakeSqlCompletionEngineUT(); + + TString query = R"sql( + SELECT roo# + FROM example.`/people` AS roommate + JOIN example.`/yql/tutorial` AS query ON 1 = 1; + )sql"; + + TVector<TCandidate> expected = { + {ColumnName, "roommate.Age"}, + {ColumnName, "roommate.Name"}, + {ColumnName, "query.room"}, + }; + UNIT_ASSERT_VALUES_EQUAL(CompleteTop(4, engine, query), expected); + } + + Y_UNIT_TEST(ColumnFromQuotedAlias) { + auto engine = MakeSqlCompletionEngineUT(); + { + TString query; + + TVector<TCandidate> expected = { + {ColumnName, "`per son`.Age"}, + {ColumnName, "`per son`.Name"}, + }; + + query = R"sql(SELECT # FROM example.`/people` AS `per son`)sql"; + UNIT_ASSERT_VALUES_EQUAL(CompleteTop(2, engine, query), expected); + + query = R"sql(SELECT per# FROM example.`/people` AS `per son`)sql"; + UNIT_ASSERT_VALUES_EQUAL(CompleteTop(2, engine, query), expected); + } + { + TString query = R"sql(SELECT `per son`.# FROM example.`/people` AS `per son`)sql"; + + TVector<TCandidate> expected = { + {ColumnName, "Age"}, + {ColumnName, "Name"}, + }; + + UNIT_ASSERT_VALUES_EQUAL(CompleteTop(2, engine, query), expected); + } + } + Y_UNIT_TEST(ProjectionVisibility) { auto engine = MakeSqlCompletionEngineUT(); { |