diff options
author | robot-piglet <[email protected]> | 2025-06-09 20:13:36 +0300 |
---|---|---|
committer | robot-piglet <[email protected]> | 2025-06-09 20:25:09 +0300 |
commit | bcdf39ca01cce750f1cd7221198eaa0342731b21 (patch) | |
tree | 79e517b6076141b9df5167589696a98c5ddb8d20 /yql/essentials/sql | |
parent | fd23a3e3bd20bdb790e18c9a5b353fb0907322a8 (diff) |
Intermediate changes
commit_hash:a5cf347a0fdbf87672444c86b9acad24d4ab956a
Diffstat (limited to 'yql/essentials/sql')
-rw-r--r-- | yql/essentials/sql/v1/complete/analysis/ya.make | 1 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/analysis/yql/cluster.cpp | 30 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/analysis/yql/cluster.h | 15 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/analysis/yql/table.cpp | 60 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/analysis/yql/table.h | 16 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/analysis/yql/ut/ya.make | 20 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/analysis/yql/ya.make | 21 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/analysis/yql/yql.cpp | 90 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/analysis/yql/yql.h | 28 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/analysis/yql/yql_ut.cpp | 74 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/check/check_complete.cpp | 109 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/check/check_complete.h | 21 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/check/ya.make | 21 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/ya.make | 1 |
14 files changed, 507 insertions, 0 deletions
diff --git a/yql/essentials/sql/v1/complete/analysis/ya.make b/yql/essentials/sql/v1/complete/analysis/ya.make index b6cf93d3ebc..3393a7bf96a 100644 --- a/yql/essentials/sql/v1/complete/analysis/ya.make +++ b/yql/essentials/sql/v1/complete/analysis/ya.make @@ -5,4 +5,5 @@ END() RECURSE( global local + yql ) diff --git a/yql/essentials/sql/v1/complete/analysis/yql/cluster.cpp b/yql/essentials/sql/v1/complete/analysis/yql/cluster.cpp new file mode 100644 index 00000000000..0f01463ae0b --- /dev/null +++ b/yql/essentials/sql/v1/complete/analysis/yql/cluster.cpp @@ -0,0 +1,30 @@ +#include "cluster.h" + +#define USE_CURRENT_UDF_ABI_VERSION +#include <yql/essentials/core/yql_expr_optimize.h> + +namespace NSQLComplete { + + TMaybe<TString> ToCluster(const NYql::TExprNode& node) { + if (!node.IsCallable("DataSource") && !node.IsCallable("DataSink")) { + return Nothing(); + } + if (node.ChildrenSize() == 2 && node.Child(1)->IsAtom()) { + return TString(node.Child(1)->Content()); + } + return Nothing(); + } + + THashSet<TString> CollectClusters(const NYql::TExprNode& root) { + THashSet<TString> clusters; + NYql::VisitExpr(root, [&](const NYql::TExprNode& node) -> bool { + if (TMaybe<TString> cluster = ToCluster(node)) { + clusters.emplace(std::move(*cluster)); + return true; + } + return true; + }); + return clusters; + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/analysis/yql/cluster.h b/yql/essentials/sql/v1/complete/analysis/yql/cluster.h new file mode 100644 index 00000000000..07f07bf595e --- /dev/null +++ b/yql/essentials/sql/v1/complete/analysis/yql/cluster.h @@ -0,0 +1,15 @@ +#pragma once + +#include <yql/essentials/ast/yql_expr.h> + +#include <util/generic/string.h> +#include <util/generic/maybe.h> +#include <util/generic/hash_set.h> + +namespace NSQLComplete { + + TMaybe<TString> ToCluster(const NYql::TExprNode& node); + + THashSet<TString> CollectClusters(const NYql::TExprNode& root); + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/analysis/yql/table.cpp b/yql/essentials/sql/v1/complete/analysis/yql/table.cpp new file mode 100644 index 00000000000..d14b59e4a61 --- /dev/null +++ b/yql/essentials/sql/v1/complete/analysis/yql/table.cpp @@ -0,0 +1,60 @@ +#include "table.h" + +#include "cluster.h" + +#define USE_CURRENT_UDF_ABI_VERSION +#include <yql/essentials/core/yql_expr_optimize.h> + +namespace NSQLComplete { + + TMaybe<TString> ToTablePath(const NYql::TExprNode& node) { + if (node.IsCallable("MrTableConcat")) { + Y_ENSURE(node.ChildrenSize() < 2); + return ToTablePath(*node.Child(0)); + } + + if (!node.IsCallable("Key") || node.ChildrenSize() < 1) { + return Nothing(); + } + + const NYql::TExprNode* table = node.Child(0); + if (!table->IsList() || table->ChildrenSize() < 2) { + return Nothing(); + } + + TStringBuf kind = table->Child(0)->Content(); + if (kind != "table" && kind != "tablescheme") { + return Nothing(); + } + + const NYql::TExprNode* string = table->Child(1); + if (!string->IsCallable("String") || string->ChildrenSize() < 1) { + return Nothing(); + } + + return TString(string->Child(0)->Content()); + } + + THashMap<TString, THashSet<TString>> CollectTablesByCluster(const NYql::TExprNode& node) { + THashMap<TString, THashSet<TString>> tablesByCluster; + NYql::VisitExpr(node, [&](const NYql::TExprNode& node) -> bool { + if (!node.IsCallable("Read!") && !node.IsCallable("Write!")) { + return true; + } + if (node.ChildrenSize() < 4) { + return true; + } + + TString cluster = ToCluster(*node.Child(1)).GetOrElse(""); + TMaybe<TString> table = ToTablePath(*node.Child(2)); + if (table.Empty()) { + return true; + } + + tablesByCluster[std::move(cluster)].emplace(std::move(*table)); + return true; + }); + return tablesByCluster; + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/analysis/yql/table.h b/yql/essentials/sql/v1/complete/analysis/yql/table.h new file mode 100644 index 00000000000..d9009026a42 --- /dev/null +++ b/yql/essentials/sql/v1/complete/analysis/yql/table.h @@ -0,0 +1,16 @@ +#pragma once + +#include <yql/essentials/ast/yql_expr.h> + +#include <util/generic/string.h> +#include <util/generic/maybe.h> +#include <util/generic/hash.h> +#include <util/generic/hash_set.h> + +namespace NSQLComplete { + + TMaybe<TString> ToTablePath(const NYql::TExprNode& node); + + THashMap<TString, THashSet<TString>> CollectTablesByCluster(const NYql::TExprNode& node); + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/analysis/yql/ut/ya.make b/yql/essentials/sql/v1/complete/analysis/yql/ut/ya.make new file mode 100644 index 00000000000..3d9249e0c8e --- /dev/null +++ b/yql/essentials/sql/v1/complete/analysis/yql/ut/ya.make @@ -0,0 +1,20 @@ +UNITTEST_FOR(yql/essentials/sql/v1/complete/analysis/yql) + +SRCS( + yql_ut.cpp +) + +PEERDIR( + yql/essentials/minikql/invoke_builtins/no_llvm + yql/essentials/providers/common/provider + yql/essentials/parser/pg_wrapper + yql/essentials/public/udf/service/stub + yql/essentials/sql/settings + yql/essentials/sql/v1 + yql/essentials/sql/v1/lexer/antlr4 + yql/essentials/sql/v1/lexer/antlr4_ansi + yql/essentials/sql/v1/proto_parser/antlr4 + yql/essentials/sql/v1/proto_parser/antlr4_ansi +) + +END() diff --git a/yql/essentials/sql/v1/complete/analysis/yql/ya.make b/yql/essentials/sql/v1/complete/analysis/yql/ya.make new file mode 100644 index 00000000000..bfc97e7d04b --- /dev/null +++ b/yql/essentials/sql/v1/complete/analysis/yql/ya.make @@ -0,0 +1,21 @@ +LIBRARY() + +SRCS( + cluster.cpp + table.cpp + yql.cpp +) + +PEERDIR( + yql/essentials/ast + yql/essentials/core + yql/essentials/core/services + yql/essentials/minikql + yql/essentials/minikql/invoke_builtins +) + +END() + +RECURSE_FOR_TESTS( + ut +) diff --git a/yql/essentials/sql/v1/complete/analysis/yql/yql.cpp b/yql/essentials/sql/v1/complete/analysis/yql/yql.cpp new file mode 100644 index 00000000000..8a8746e6839 --- /dev/null +++ b/yql/essentials/sql/v1/complete/analysis/yql/yql.cpp @@ -0,0 +1,90 @@ +#include "yql.h" + +#include "cluster.h" +#include "table.h" + +#define USE_CURRENT_UDF_ABI_VERSION +#include <yql/essentials/core/yql_type_annotation.h> +#include <yql/essentials/core/services/yql_eval_expr.h> +#include <yql/essentials/minikql/mkql_function_registry.h> +#include <yql/essentials/minikql/invoke_builtins/mkql_builtins.h> + +#include <library/cpp/iterator/iterate_keys.h> + +namespace NSQLComplete { + + THashSet<TString> TYqlContext::Clusters() const { + auto keys = IterateKeys(TablesByCluster); + return {keys.begin(), keys.end()}; + } + + TMaybe<TYqlContext> IYqlAnalysis::Analyze(NYql::TAstNode& root, NYql::TIssues& issues) const { + NYql::TExprContext ctx; + NYql::TExprNode::TPtr expr; + if (!NYql::CompileExpr(root, expr, ctx, /* resolver = */ nullptr, /* urlListerManager = */ nullptr)) { + for (NYql::TIssue issue : ctx.IssueManager.GetIssues()) { + issues.AddIssue(std::move(issue)); + } + return Nothing(); + } + return Analyze(expr, ctx); + } + + namespace { + + class TYqlAnalysis: public IYqlAnalysis { + public: + TYqlAnalysis() + : FunctionRegistry_( + NKikimr::NMiniKQL::CreateFunctionRegistry( + NKikimr::NMiniKQL::CreateBuiltinRegistry())) + , Types_(MakeIntrusive<NYql::TTypeAnnotationContext>()) + { + } + + TYqlContext Analyze(NYql::TExprNode::TPtr root, NYql::TExprContext& ctx) const override { + root = Optimized(std::move(root), ctx); + + TYqlContext yqlCtx; + + yqlCtx.TablesByCluster = CollectTablesByCluster(*root); + + for (TString cluster : CollectClusters(*root)) { + Y_UNUSED(yqlCtx.TablesByCluster[std::move(cluster)]); + } + + return yqlCtx; + } + + private: + NYql::TExprNode::TPtr Optimized(NYql::TExprNode::TPtr expr, NYql::TExprContext& ctx) const { + constexpr size_t AttemptsLimit = 128; + + for (size_t i = 0; i < AttemptsLimit; ++i) { + auto status = NYql::EvaluateExpression(expr, expr, *Types_, ctx, *FunctionRegistry_); + if (status.Level != NYql::IGraphTransformer::TStatus::Repeat) { + Y_ENSURE(status == NYql::IGraphTransformer::TStatus::Ok, "" << status); + return expr; + } + } + + ythrow yexception() << "Optimization was not converged after " + << AttemptsLimit << " attempts"; + } + + static void Print(IOutputStream& out, const NYql::TExprNode& root, NYql::TExprContext& ctx) { + auto ast = ConvertToAst(root, ctx, NYql::TExprAnnotationFlags::None, true); + ast.Root->PrettyPrintTo(out, NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote); + } + + TIntrusivePtr<NKikimr::NMiniKQL::IFunctionRegistry> FunctionRegistry_; + NYql::TTypeAnnotationContextPtr Types_; + }; + + } // namespace + + IYqlAnalysis::TPtr MakeYqlAnalysis() { + return new TYqlAnalysis(); + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/analysis/yql/yql.h b/yql/essentials/sql/v1/complete/analysis/yql/yql.h new file mode 100644 index 00000000000..6c97246885e --- /dev/null +++ b/yql/essentials/sql/v1/complete/analysis/yql/yql.h @@ -0,0 +1,28 @@ +#pragma once + +#include <yql/essentials/ast/yql_expr.h> + +#include <util/generic/ptr.h> +#include <util/generic/string.h> +#include <util/generic/hash.h> +#include <util/generic/maybe.h> + +namespace NSQLComplete { + + struct TYqlContext { + THashMap<TString, THashSet<TString>> TablesByCluster; + + THashSet<TString> Clusters() const; + }; + + class IYqlAnalysis: public TThrRefBase { + public: + using TPtr = TIntrusivePtr<IYqlAnalysis>; + + virtual TYqlContext Analyze(NYql::TExprNode::TPtr root, NYql::TExprContext& ctx) const = 0; + TMaybe<TYqlContext> Analyze(NYql::TAstNode& root, NYql::TIssues& issues) const; + }; + + IYqlAnalysis::TPtr MakeYqlAnalysis(); + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/analysis/yql/yql_ut.cpp b/yql/essentials/sql/v1/complete/analysis/yql/yql_ut.cpp new file mode 100644 index 00000000000..3d064e4673b --- /dev/null +++ b/yql/essentials/sql/v1/complete/analysis/yql/yql_ut.cpp @@ -0,0 +1,74 @@ +#include "yql.h" + +#include <yql/essentials/ast/yql_ast.h> +#include <yql/essentials/providers/common/provider/yql_provider_names.h> + +#include <yql/essentials/sql/settings/translation_settings.h> +#include <yql/essentials/sql/v1/sql.h> +#include <yql/essentials/sql/v1/lexer/lexer.h> +#include <yql/essentials/sql/v1/lexer/antlr4/lexer.h> +#include <yql/essentials/sql/v1/proto_parser/proto_parser.h> +#include <yql/essentials/sql/v1/proto_parser/antlr4/proto_parser.h> + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NSQLComplete; + +class TSQLv1Parser { +public: + TSQLv1Parser() { + Settings_.Arena = &Arena_; + Settings_.ClusterMapping = { + {"socrates", TString(NYql::YtProviderName)}, + {"plato", TString(NYql::YtProviderName)}, + }; + Settings_.SyntaxVersion = 1; + + Lexers_.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory(); + Parsers_.Antlr4 = NSQLTranslationV1::MakeAntlr4ParserFactory(); + } + + NYql::TAstParseResult Parse(const TString& query) { + Arena_.Reset(); + + auto result = NSQLTranslationV1::SqlToYql(Lexers_, Parsers_, query, Settings_); + Y_ENSURE(result.IsOk()); + return result; + } + +private: + google::protobuf::Arena Arena_; + NSQLTranslation::TTranslationSettings Settings_; + NSQLTranslationV1::TLexers Lexers_; + NSQLTranslationV1::TParsers Parsers_; +}; + +TYqlContext Analyze(const TString& query) { + auto ast = TSQLv1Parser().Parse(query); + + NYql::TIssues issues; + return *MakeYqlAnalysis()->Analyze(*ast.Root, issues); +} + +Y_UNIT_TEST_SUITE(YqlAnalysisTests) { + + Y_UNIT_TEST(NamesAreCollected) { + TString input = R"( + USE yt:socrates; + + SELECT * FROM Input; + + CREATE TABLE Newbie (x Unit); + + INSERT INTO plato.Input (id) VALUES (1); + )"; + + THashMap<TString, THashSet<TString>> expected = { + {"socrates", {"Input", "Newbie"}}, + {"plato", {"Input"}}, + }; + + UNIT_ASSERT_VALUES_EQUAL(Analyze(input).TablesByCluster, expected); + } + +} // Y_UNIT_TEST_SUITE(YqlAnalysisTests) diff --git a/yql/essentials/sql/v1/complete/check/check_complete.cpp b/yql/essentials/sql/v1/complete/check/check_complete.cpp new file mode 100644 index 00000000000..e91ddf14331 --- /dev/null +++ b/yql/essentials/sql/v1/complete/check/check_complete.cpp @@ -0,0 +1,109 @@ +#include "check_complete.h" + +#include <yql/essentials/sql/v1/complete/sql_complete.h> +#include <yql/essentials/sql/v1/complete/analysis/yql/yql.h> +#include <yql/essentials/sql/v1/complete/name/cluster/static/discovery.h> +#include <yql/essentials/sql/v1/complete/name/object/simple/static/schema.h> +#include <yql/essentials/sql/v1/complete/name/service/cluster/name_service.h> +#include <yql/essentials/sql/v1/complete/name/service/schema/name_service.h> +#include <yql/essentials/sql/v1/complete/name/service/static/name_service.h> +#include <yql/essentials/sql/v1/complete/name/service/union/name_service.h> + +#include <yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h> +#include <yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h> + +#include <util/charset/utf8.h> +#include <util/random/random.h> + +namespace NSQLComplete { + + namespace { + + TLexerSupplier MakePureLexerSupplier() { + NSQLTranslationV1::TLexers lexers; + lexers.Antlr4Pure = NSQLTranslationV1::MakeAntlr4PureLexerFactory(); + lexers.Antlr4PureAnsi = NSQLTranslationV1::MakeAntlr4PureAnsiLexerFactory(); + return [lexers = std::move(lexers)](bool ansi) { + return NSQLTranslationV1::MakeLexer( + lexers, ansi, /* antlr4 = */ true, + NSQLTranslationV1::ELexerFlavor::Pure); + }; + } + + INameService::TPtr MakeClusterNameService(const TYqlContext& ctx) { + THashSet<TString> clusterSet = ctx.Clusters(); + TVector<TString> clusterVec(begin(clusterSet), end(clusterSet)); + Sort(clusterVec); + + return MakeClusterNameService(MakeStaticClusterDiscovery(std::move(clusterVec))); + } + + INameService::TPtr MakeSchemaNameService(const TYqlContext& ctx) { + THashMap<TString, THashMap<TString, TVector<TFolderEntry>>> fs; + for (const auto& [cluster, tables] : ctx.TablesByCluster) { + for (TString table : tables) { + fs[cluster]["/"].push_back(TFolderEntry{ + .Type = TFolderEntry::Table, + .Name = std::move(table), + }); + } + } + + return MakeSchemaNameService(MakeSimpleSchema(MakeStaticSimpleSchema(std::move(fs)))); + } + + } // namespace + + bool CheckComplete(TStringBuf query, TYqlContext ctx) { + constexpr size_t Seed = 97651231; + constexpr size_t Attempts = 64; + constexpr size_t MaxAttempts = 256; + SetRandomSeed(Seed); + + auto service = MakeUnionNameService( + { + MakeClusterNameService(ctx), + MakeSchemaNameService(ctx), + }, + MakeDefaultRanking()); + + auto engine = MakeSqlCompletionEngine(MakePureLexerSupplier(), std::move(service)); + + for (size_t i = 0, j = 0; i < Attempts && j < MaxAttempts; ++j) { + size_t pos = RandomNumber<size_t>(query.size() + 1); + if (pos < query.size() && IsUTF8ContinuationByte(query.at(pos))) { + continue; + } + + TCompletionInput input = { + .Text = query, + .CursorPosition = pos, + }; + + auto output = engine->CompleteAsync(input).ExtractValueSync(); + Y_DO_NOT_OPTIMIZE_AWAY(output); + + i += 1; + } + + return true; + } + + bool CheckComplete(TStringBuf query, NYql::TExprNode::TPtr root, NYql::TExprContext& ctx, NYql::TIssues& issues) try { + return CheckComplete(query, MakeYqlAnalysis()->Analyze(root, ctx)); + } catch (...) { + issues.AddIssue(CurrentExceptionMessage()); + return false; + } + + bool CheckComplete(TStringBuf query, NYql::TAstNode& root, NYql::TIssues& issues) try { + return MakeYqlAnalysis() + ->Analyze(root, issues) + .Transform([&](auto&& ctx) { return CheckComplete(query, std::move(ctx)); }) + .GetOrElse(false); + } catch (...) { + issues.AddIssue(CurrentExceptionMessage()); + return false; + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/check/check_complete.h b/yql/essentials/sql/v1/complete/check/check_complete.h new file mode 100644 index 00000000000..8ec0af40c5e --- /dev/null +++ b/yql/essentials/sql/v1/complete/check/check_complete.h @@ -0,0 +1,21 @@ +#pragma once + +#include <yql/essentials/ast/yql_ast.h> +#include <yql/essentials/ast/yql_expr.h> +#include <yql/essentials/public/issue/yql_issue.h> + +#include <util/generic/string.h> +#include <util/generic/hash.h> +#include <util/generic/vector.h> + +namespace NSQLComplete { + + bool CheckComplete( + TStringBuf query, + NYql::TExprNode::TPtr root, + NYql::TExprContext& ctx, + NYql::TIssues& issues); + + bool CheckComplete(TStringBuf query, NYql::TAstNode& root, NYql::TIssues& issues); + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/check/ya.make b/yql/essentials/sql/v1/complete/check/ya.make new file mode 100644 index 00000000000..2a4d577fb51 --- /dev/null +++ b/yql/essentials/sql/v1/complete/check/ya.make @@ -0,0 +1,21 @@ +LIBRARY() + +SRCS( + check_complete.cpp +) + +PEERDIR( + yql/essentials/sql/v1/complete + yql/essentials/sql/v1/complete/analysis/yql + yql/essentials/sql/v1/complete/name/cluster/static + yql/essentials/sql/v1/complete/name/object/simple/static + yql/essentials/sql/v1/complete/name/service/cluster + yql/essentials/sql/v1/complete/name/service/schema + yql/essentials/sql/v1/complete/name/service/static + yql/essentials/sql/v1/complete/name/service/union + yql/essentials/sql/v1/lexer/antlr4_pure + yql/essentials/sql/v1/lexer/antlr4_pure_ansi + yql/essentials/ast +) + +END() diff --git a/yql/essentials/sql/v1/complete/ya.make b/yql/essentials/sql/v1/complete/ya.make index a1151d28142..e8ea648222f 100644 --- a/yql/essentials/sql/v1/complete/ya.make +++ b/yql/essentials/sql/v1/complete/ya.make @@ -23,6 +23,7 @@ END() RECURSE( analysis antlr4 + check core name syntax |