diff options
author | aozeritsky <aozeritsky@ydb.tech> | 2023-08-25 17:21:18 +0300 |
---|---|---|
committer | aozeritsky <aozeritsky@ydb.tech> | 2023-08-25 17:42:52 +0300 |
commit | 1574ae59ebf39c4e8ba58b14f9b740bcd7fae56e (patch) | |
tree | 00b31722491107b9c2840bdb5966bc98bd0dd121 | |
parent | e841c3be45ce65bf9b0f9b9402acdcbff444cf4f (diff) | |
download | ydb-1574ae59ebf39c4e8ba58b14f9b740bcd7fae56e.tar.gz |
Propagate statistics to AST nodes
26 files changed, 258 insertions, 1 deletions
diff --git a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp index e20e951518..e612cd70f6 100644 --- a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp +++ b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp @@ -5666,6 +5666,57 @@ TExprNode::TPtr ExpandConstraintsOf(const TExprNode::TPtr& node, TExprContext& c .Build(); } +TExprNode::TPtr ExpandCostsOf(const TExprNode::TPtr& node, TExprContext& ctx, TTypeAnnotationContext& typesCtx) { + YQL_CLOG(DEBUG, CorePeepHole) << "Expand " << node->Content(); + + TString json; + TStringOutput out(json); + NJson::TJsonWriter jsonWriter(&out, true); + + VisitExpr(node, [&](const TExprNode::TPtr& node) { + auto stat = typesCtx.GetStats(node.Get()); + + if (stat || node->ChildrenSize()) { + jsonWriter.OpenMap(); + jsonWriter.WriteKey("Name"); + jsonWriter.Write(node->Content()); + if (stat) { + if (stat->Cost) { + jsonWriter.WriteKey("Cost"); + jsonWriter.Write(*stat->Cost); + } + jsonWriter.WriteKey("Cols"); + jsonWriter.Write(stat->Ncols); + jsonWriter.WriteKey("Rows"); + jsonWriter.Write(stat->Nrows); + } + if (node->ChildrenSize()) { + jsonWriter.WriteKey("Children"); + jsonWriter.OpenArray(); + } + } + return true; + }, [&](const TExprNode::TPtr& node) { + auto stat = typesCtx.GetStats(node.Get()); + + if (stat || node->ChildrenSize()) { + if (node->ChildrenSize()) { + jsonWriter.CloseArray(); + } + jsonWriter.CloseMap(); + } + return true; + }); + + jsonWriter.Flush(); + + return ctx.Builder(node->Pos()) + .Callable("Json") + .Atom(0, json, TNodeFlags::MultilineContent) + .Seal() + .Build(); +} + TExprNode::TPtr OptimizeMapJoinCore(const TExprNode::TPtr& node, TExprContext& ctx) { if (const auto& input = node->Head(); input.IsCallable("NarrowMap") && input.Tail().Tail().IsCallable("AsStruct")) { YQL_CLOG(DEBUG, CorePeepHole) << "Swap " << node->Content() << " with " << input.Content(); @@ -7200,6 +7251,7 @@ struct TPeepHoleRules { {"AggregateMergeFinalize", &ExpandAggregatePeephole}, {"AggregateMergeManyFinalize", &ExpandAggregatePeephole}, {"AggregateFinalize", &ExpandAggregatePeephole}, + {"CostsOf", &ExpandCostsOf}, {"JsonQuery", &ExpandJsonQuery}, }; diff --git a/ydb/library/yql/core/services/yql_transform_pipeline.cpp b/ydb/library/yql/core/services/yql_transform_pipeline.cpp index 719d35a306..bc3dc7ec0c 100644 --- a/ydb/library/yql/core/services/yql_transform_pipeline.cpp +++ b/ydb/library/yql/core/services/yql_transform_pipeline.cpp @@ -162,6 +162,11 @@ TTransformationPipeline& TTransformationPipeline::AddOptimization(bool checkWorl "RecaptureDataProposals", issueCode)); Transformers_.push_back(TTransformStage( + CreateStatisticsProposalsInspector(*TypeAnnotationContext_, TString{DqProviderName}), + "StatisticsProposals", + issueCode + )); + Transformers_.push_back(TTransformStage( CreateLogicalDataProposalsInspector(*TypeAnnotationContext_), "LogicalDataProposals", issueCode)); diff --git a/ydb/library/yql/core/type_ann/type_ann_core.cpp b/ydb/library/yql/core/type_ann/type_ann_core.cpp index 00c767a781..bbef3d080d 100644 --- a/ydb/library/yql/core/type_ann/type_ann_core.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_core.cpp @@ -6230,6 +6230,16 @@ template <NKikimr::NUdf::EDataSlot DataSlot> return IGraphTransformer::TStatus::Ok; } + IGraphTransformer::TStatus CostsOfWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { + Y_UNUSED(output); + if (!EnsureArgsCount(*input, 1, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + input->SetTypeAnn(ctx.Expr.MakeType<TDataExprType>(EDataSlot::Json)); + return IGraphTransformer::TStatus::Ok; + } + IGraphTransformer::TStatus InstanceOfWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { Y_UNUSED(output); if (!EnsureArgsCount(*input, 1, ctx.Expr)) { @@ -11771,6 +11781,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> Functions["HasNull"] = &HasNullWrapper; Functions["TypeOf"] = &TypeOfWrapper; Functions["ConstraintsOf"] = &ConstraintsOfWrapper; + Functions["CostsOf"] = &CostsOfWrapper; Functions["InstanceOf"] = &InstanceOfWrapper; Functions["SourceOf"] = &SourceOfWrapper; Functions["MatchType"] = &MatchTypeWrapper; diff --git a/ydb/library/yql/core/yql_data_provider.h b/ydb/library/yql/core/yql_data_provider.h index 5455d160de..2fc835a47a 100644 --- a/ydb/library/yql/core/yql_data_provider.h +++ b/ydb/library/yql/core/yql_data_provider.h @@ -126,6 +126,7 @@ public: //-- optimizations virtual TExprNode::TPtr RewriteIO(const TExprNode::TPtr& node, TExprContext& ctx) = 0; virtual IGraphTransformer& GetRecaptureOptProposalTransformer() = 0; + virtual IGraphTransformer& GetStatisticsProposalTransformer() = 0; virtual IGraphTransformer& GetLogicalOptProposalTransformer() = 0; virtual IGraphTransformer& GetPhysicalOptProposalTransformer() = 0; virtual IGraphTransformer& GetPhysicalFinalizingTransformer() = 0; diff --git a/ydb/library/yql/core/yql_opt_proposed_by_data.cpp b/ydb/library/yql/core/yql_opt_proposed_by_data.cpp index 3b1e53663d..d9653a2ea2 100644 --- a/ydb/library/yql/core/yql_opt_proposed_by_data.cpp +++ b/ydb/library/yql/core/yql_opt_proposed_by_data.cpp @@ -406,4 +406,14 @@ TAutoPtr<IGraphTransformer> CreateRecaptureDataProposalsInspector(const TTypeAnn ); } +TAutoPtr<IGraphTransformer> CreateStatisticsProposalsInspector(const TTypeAnnotationContext& types, const TString& provider) { + return CreateSpecificDataProposalsInspector<ESource::DataSource>( + types, + provider, + [](IDataProvider* provider) -> IGraphTransformer& { + return provider->GetStatisticsProposalTransformer(); + } + ); +} + } // namespace NYql diff --git a/ydb/library/yql/core/yql_opt_proposed_by_data.h b/ydb/library/yql/core/yql_opt_proposed_by_data.h index 0ec0bcab43..dbc3035adc 100644 --- a/ydb/library/yql/core/yql_opt_proposed_by_data.h +++ b/ydb/library/yql/core/yql_opt_proposed_by_data.h @@ -14,6 +14,7 @@ TAutoPtr<IGraphTransformer> CreateConfigureTransformer(const TTypeAnnotationCont TAutoPtr<IGraphTransformer> CreateIODiscoveryTransformer(const TTypeAnnotationContext& types); TAutoPtr<IGraphTransformer> CreateEpochsTransformer(const TTypeAnnotationContext& types); TAutoPtr<IGraphTransformer> CreateRecaptureDataProposalsInspector(const TTypeAnnotationContext& types, const TString& provider); +TAutoPtr<IGraphTransformer> CreateStatisticsProposalsInspector(const TTypeAnnotationContext& types, const TString& provider); TAutoPtr<IGraphTransformer> CreateLogicalDataProposalsInspector(const TTypeAnnotationContext& types); TAutoPtr<IGraphTransformer> CreatePhysicalDataProposalsInspector(const TTypeAnnotationContext& types); TAutoPtr<IGraphTransformer> CreatePhysicalFinalizers(const TTypeAnnotationContext& types); diff --git a/ydb/library/yql/core/yql_type_annotation.cpp b/ydb/library/yql/core/yql_type_annotation.cpp index 1c63948080..6082a1db95 100644 --- a/ydb/library/yql/core/yql_type_annotation.cpp +++ b/ydb/library/yql/core/yql_type_annotation.cpp @@ -53,6 +53,7 @@ void TTypeAnnotationContext::Reset() { ExpectedTypes.clear(); ExpectedConstraints.clear(); ExpectedColumnOrders.clear(); + StatisticsMap.clear(); } TString FormatColumnOrder(const TMaybe<TColumnOrder>& columnOrder) { diff --git a/ydb/library/yql/core/yql_type_annotation.h b/ydb/library/yql/core/yql_type_annotation.h index ef857a4cb5..bffbac61fa 100644 --- a/ydb/library/yql/core/yql_type_annotation.h +++ b/ydb/library/yql/core/yql_type_annotation.h @@ -236,6 +236,7 @@ struct TTypeAnnotationContext: public TThrRefBase { bool UseBlocks = false; bool PgEmitAggApply = false; IArrowResolver::TPtr ArrowResolver; + TString CostBasedOptimizerType; // compatibility with v0 or raw s-expression code bool OrderedColumns = false; diff --git a/ydb/library/yql/dq/integration/yql_dq_integration.h b/ydb/library/yql/dq/integration/yql_dq_integration.h index b34880f637..d4f45d7f16 100644 --- a/ydb/library/yql/dq/integration/yql_dq_integration.h +++ b/ydb/library/yql/dq/integration/yql_dq_integration.h @@ -2,6 +2,7 @@ #include <ydb/library/yql/ast/yql_expr.h> #include <ydb/library/yql/core/yql_data_provider.h> +#include <ydb/library/yql/core/yql_statistics.h> #include <ydb/library/yql/dq/tasks/dq_tasks_graph.h> #include <ydb/library/yql/public/issue/yql_issue.h> @@ -40,10 +41,11 @@ public: virtual ui64 Partition(const TDqSettings& config, size_t maxPartitions, const TExprNode& node, TVector<TString>& partitions, TString* clusterName, TExprContext& ctx, bool canFallback) = 0; - virtual bool CheckPragmas(const TExprNode& node, TExprContext& ctx, bool skipIssues = false) { Y_UNUSED(skipIssues); Y_UNUSED(node); Y_UNUSED(ctx); return true; } + virtual bool CheckPragmas(const TExprNode& node, TExprContext& ctx, bool skipIssues = false) = 0; virtual bool CanRead(const TExprNode& read, TExprContext& ctx, bool skipIssues = true) = 0; virtual TMaybe<ui64> EstimateReadSize(ui64 dataSizePerJob, ui32 maxTasksPerStage, const TVector<const TExprNode*>& nodes, TExprContext& ctx) = 0; virtual TExprNode::TPtr WrapRead(const TDqSettings& config, const TExprNode::TPtr& read, TExprContext& ctx) = 0; + virtual TMaybe<TOptimizerStatistics> ReadStatistics(const TExprNode::TPtr& readWrap, TExprContext& ctx) = 0; // Nothing if callable is not for writing, // false if callable is for writing and there are some errors (they are added to ctx), diff --git a/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.cpp b/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.cpp index 451ecf606c..a66cac44f2 100644 --- a/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.cpp +++ b/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.cpp @@ -7,6 +7,13 @@ ui64 TDqIntegrationBase::Partition(const TDqSettings&, size_t, const TExprNode&, return 0; } +bool TDqIntegrationBase::CheckPragmas(const TExprNode& node, TExprContext& ctx, bool skipIssues) { + Y_UNUSED(skipIssues); + Y_UNUSED(node); + Y_UNUSED(ctx); + return true; +} + bool TDqIntegrationBase::CanRead(const TExprNode&, TExprContext&, bool) { return false; } @@ -19,6 +26,12 @@ TExprNode::TPtr TDqIntegrationBase::WrapRead(const TDqSettings&, const TExprNode return read; } +TMaybe<TOptimizerStatistics> TDqIntegrationBase::ReadStatistics(const TExprNode::TPtr& readWrap, TExprContext& ctx) { + Y_UNUSED(readWrap); + Y_UNUSED(ctx); + return Nothing(); +} + TMaybe<bool> TDqIntegrationBase::CanWrite(const TExprNode&, TExprContext&) { return Nothing(); } diff --git a/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.h b/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.h index 714aa124dc..7273d5a25f 100644 --- a/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.h +++ b/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.h @@ -8,9 +8,11 @@ class TDqIntegrationBase: public IDqIntegration { public: ui64 Partition(const TDqSettings& config, size_t maxPartitions, const TExprNode& node, TVector<TString>& partitions, TString* clusterName, TExprContext& ctx, bool canFallback) override; + bool CheckPragmas(const TExprNode& node, TExprContext& ctx, bool skipIssues) override; bool CanRead(const TExprNode& read, TExprContext& ctx, bool skipIssues) override; TMaybe<ui64> EstimateReadSize(ui64 dataSizePerJob, ui32 maxTasksPerStage, const TVector<const TExprNode*>& nodes, TExprContext& ctx) override; TExprNode::TPtr WrapRead(const TDqSettings& config, const TExprNode::TPtr& read, TExprContext& ctx) override; + TMaybe<TOptimizerStatistics> ReadStatistics(const TExprNode::TPtr& readWrap, TExprContext& ctx) override; void RegisterMkqlCompiler(NCommon::TMkqlCallableCompilerBase& compiler) override; TMaybe<bool> CanWrite(const TExprNode& write, TExprContext& ctx) override; TExprNode::TPtr WrapWrite(const TExprNode::TPtr& write, TExprContext& ctx) override; diff --git a/ydb/library/yql/providers/common/provider/yql_data_provider_impl.cpp b/ydb/library/yql/providers/common/provider/yql_data_provider_impl.cpp index c04c6e4d87..f4e8a8ce1b 100644 --- a/ydb/library/yql/providers/common/provider/yql_data_provider_impl.cpp +++ b/ydb/library/yql/providers/common/provider/yql_data_provider_impl.cpp @@ -165,6 +165,7 @@ void TDataProviderBase::Reset() { } } GetRecaptureOptProposalTransformer().Rewind(); + GetStatisticsProposalTransformer().Rewind(); GetLogicalOptProposalTransformer().Rewind(); GetPhysicalOptProposalTransformer().Rewind(); GetPhysicalFinalizingTransformer().Rewind(); @@ -179,6 +180,10 @@ IGraphTransformer& TDataProviderBase::GetRecaptureOptProposalTransformer() { return NullTransformer_; } +IGraphTransformer& TDataProviderBase::GetStatisticsProposalTransformer() { + return NullTransformer_; +} + IGraphTransformer& TDataProviderBase::GetLogicalOptProposalTransformer() { return NullTransformer_; } diff --git a/ydb/library/yql/providers/common/provider/yql_data_provider_impl.h b/ydb/library/yql/providers/common/provider/yql_data_provider_impl.h index d1bfd4b027..445ffa1c18 100644 --- a/ydb/library/yql/providers/common/provider/yql_data_provider_impl.h +++ b/ydb/library/yql/providers/common/provider/yql_data_provider_impl.h @@ -58,6 +58,7 @@ public: void PostRewriteIO() override; void Reset() override; IGraphTransformer& GetRecaptureOptProposalTransformer() override; + IGraphTransformer& GetStatisticsProposalTransformer() override; IGraphTransformer& GetLogicalOptProposalTransformer() override; IGraphTransformer& GetPhysicalOptProposalTransformer() override; IGraphTransformer& GetPhysicalFinalizingTransformer() override; diff --git a/ydb/library/yql/providers/config/yql_config_provider.cpp b/ydb/library/yql/providers/config/yql_config_provider.cpp index 794fa85580..1f93ad2307 100644 --- a/ydb/library/yql/providers/config/yql_config_provider.cpp +++ b/ydb/library/yql/providers/config/yql_config_provider.cpp @@ -810,6 +810,24 @@ namespace { Types.PgEmitAggApply = (name == "PgEmitAggApply"); } + else if (name == "CostBasedOptimizer") { + if (args.size() != 1) { + ctx.AddError(TIssue(pos, TStringBuilder() << "Expected at most 1 argument, but got " << args.size())); + return false; + } + auto arg = TString{args[0]}; + + if (arg == "PG") { + Types.CostBasedOptimizerType = arg; + } else if (arg == "DPccp") { + Types.CostBasedOptimizerType = arg; + } else if (arg == "disable") { + ; + } else { + ctx.AddError(TIssue(pos, TStringBuilder() << "Expected `disable|PG|DPccp', but got: " << args[0])); + return false; + } + } else { ctx.AddError(TIssue(pos, TStringBuilder() << "Unsupported command: " << name)); return false; diff --git a/ydb/library/yql/providers/dq/provider/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/providers/dq/provider/CMakeLists.darwin-x86_64.txt index e192c1673e..899857894e 100644 --- a/ydb/library/yql/providers/dq/provider/CMakeLists.darwin-x86_64.txt +++ b/ydb/library/yql/providers/dq/provider/CMakeLists.darwin-x86_64.txt @@ -56,6 +56,7 @@ target_sources(providers-dq-provider PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_datasink.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_recapture.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_statistics.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_statistics_json.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_validate.cpp ) diff --git a/ydb/library/yql/providers/dq/provider/CMakeLists.linux-aarch64.txt b/ydb/library/yql/providers/dq/provider/CMakeLists.linux-aarch64.txt index 86469196fc..3d2343c2a7 100644 --- a/ydb/library/yql/providers/dq/provider/CMakeLists.linux-aarch64.txt +++ b/ydb/library/yql/providers/dq/provider/CMakeLists.linux-aarch64.txt @@ -57,6 +57,7 @@ target_sources(providers-dq-provider PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_datasink.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_recapture.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_statistics.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_statistics_json.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_validate.cpp ) diff --git a/ydb/library/yql/providers/dq/provider/CMakeLists.linux-x86_64.txt b/ydb/library/yql/providers/dq/provider/CMakeLists.linux-x86_64.txt index 86469196fc..3d2343c2a7 100644 --- a/ydb/library/yql/providers/dq/provider/CMakeLists.linux-x86_64.txt +++ b/ydb/library/yql/providers/dq/provider/CMakeLists.linux-x86_64.txt @@ -57,6 +57,7 @@ target_sources(providers-dq-provider PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_datasink.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_recapture.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_statistics.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_statistics_json.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_validate.cpp ) diff --git a/ydb/library/yql/providers/dq/provider/CMakeLists.windows-x86_64.txt b/ydb/library/yql/providers/dq/provider/CMakeLists.windows-x86_64.txt index e192c1673e..899857894e 100644 --- a/ydb/library/yql/providers/dq/provider/CMakeLists.windows-x86_64.txt +++ b/ydb/library/yql/providers/dq/provider/CMakeLists.windows-x86_64.txt @@ -56,6 +56,7 @@ target_sources(providers-dq-provider PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_datasink.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_recapture.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_statistics.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_statistics_json.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_validate.cpp ) diff --git a/ydb/library/yql/providers/dq/provider/ya.make b/ydb/library/yql/providers/dq/provider/ya.make index 67b4edb3e5..5765c684d4 100644 --- a/ydb/library/yql/providers/dq/provider/ya.make +++ b/ydb/library/yql/providers/dq/provider/ya.make @@ -19,6 +19,8 @@ SRCS( yql_dq_datasource.h yql_dq_recapture.cpp yql_dq_recapture.h + yql_dq_statistics.cpp + yql_dq_statistics.h yql_dq_statistics_json.cpp yql_dq_statistics_json.h yql_dq_validate.cpp diff --git a/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp b/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp index 49c97fbf83..7a4cb98e69 100644 --- a/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp +++ b/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp @@ -3,6 +3,7 @@ #include "yql_dq_datasource_type_ann.h" #include "yql_dq_state.h" #include "yql_dq_validate.h" +#include "yql_dq_statistics.h" #include <ydb/library/yql/providers/common/config/yql_configuration_transformer.h> #include <ydb/library/yql/providers/common/provider/yql_data_provider_impl.h> @@ -48,6 +49,7 @@ public: , ExecTransformer_([this, execTransformerFactory] () { return THolder<IGraphTransformer>(execTransformerFactory(State_)); }) , TypeAnnotationTransformer_([] () { return CreateDqsDataSourceTypeAnnotationTransformer(); }) , ConstraintsTransformer_([] () { return CreateDqDataSourceConstraintTransformer(); }) + , StatisticsTransformer_([this]() { return CreateDqsStatisticsTransformer(State_); }) { } TStringBuf GetName() const override { @@ -68,6 +70,10 @@ public: return *ConfigurationTransformer_; } + IGraphTransformer& GetStatisticsProposalTransformer() override { + return *StatisticsTransformer_; + } + bool CanBuildResultImpl(const TExprNode& node, TNodeSet& visited) { if (!visited.emplace(&node).second) { return true; @@ -268,6 +274,7 @@ private: TLazyInitHolder<IGraphTransformer> ExecTransformer_; TLazyInitHolder<TVisitorTransformerBase> TypeAnnotationTransformer_; TLazyInitHolder<IGraphTransformer> ConstraintsTransformer_; + TLazyInitHolder<IGraphTransformer> StatisticsTransformer_; }; } diff --git a/ydb/library/yql/providers/dq/provider/yql_dq_statistics.cpp b/ydb/library/yql/providers/dq/provider/yql_dq_statistics.cpp new file mode 100644 index 0000000000..e2aa1f4ba6 --- /dev/null +++ b/ydb/library/yql/providers/dq/provider/yql_dq_statistics.cpp @@ -0,0 +1,68 @@ +#include "yql_dq_statistics.h" +#include "yql_dq_state.h" + +#include <ydb/library/yql/dq/opt/dq_opt_stat.h> +#include <ydb/library/yql/dq/integration/yql_dq_integration.h> +#include <ydb/library/yql/core/yql_expr_optimize.h> + +#include <ydb/library/yql/providers/dq/expr_nodes/dqs_expr_nodes.h> + +namespace NYql { + +using namespace NNodes; + +class TDqsStatisticsTransformer : public TSyncTransformerBase { +public: + TDqsStatisticsTransformer(const TDqStatePtr& state) + : State(state) + { } + + IGraphTransformer::TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final { + output = input; + + if (!State->TypeCtx->CostBasedOptimizerType) { + return IGraphTransformer::TStatus::Ok; + } + + TOptimizeExprSettings settings(nullptr); + + auto ret = OptimizeExpr(input, output, [*this](const TExprNode::TPtr& input, TExprContext& ctx) { + Y_UNUSED(ctx); + auto output = input; + + if (TCoFlatMap::Match(input.Get())){ + NDq::InferStatisticsForFlatMap(input, State->TypeCtx); + } else if(TCoSkipNullMembers::Match(input.Get())){ + NDq::InferStatisticsForSkipNullMembers(input, State->TypeCtx); + } else if (TDqReadWrapBase::Match(input.Get())) { + auto read = input->Child(TDqReadWrapBase::idx_Input); + auto dataSourceChildIndex = 1; + YQL_ENSURE(read->ChildrenSize() > 1); + YQL_ENSURE(read->Child(dataSourceChildIndex)->IsCallable("DataSource")); + auto dataSourceName = read->Child(dataSourceChildIndex)->Child(0)->Content(); + auto datasource = State->TypeCtx->DataSourceMap.FindPtr(dataSourceName); + YQL_ENSURE(datasource); + if (auto dqIntegration = (*datasource)->GetDqIntegration()) { + auto stat = dqIntegration->ReadStatistics(read, ctx); + if (stat) { + State->TypeCtx->SetStats(input.Get(), std::move(std::make_shared<TOptimizerStatistics>(*stat))); + } + } + } + return output; + }, ctx, settings); + + return ret; + } + + void Rewind() { } + +private: + TDqStatePtr State; +}; + +THolder<IGraphTransformer> CreateDqsStatisticsTransformer(TDqStatePtr state) { + return MakeHolder<TDqsStatisticsTransformer>(state); +} + +} // namespace NYql diff --git a/ydb/library/yql/providers/dq/provider/yql_dq_statistics.h b/ydb/library/yql/providers/dq/provider/yql_dq_statistics.h new file mode 100644 index 0000000000..460b363bf4 --- /dev/null +++ b/ydb/library/yql/providers/dq/provider/yql_dq_statistics.h @@ -0,0 +1,14 @@ +#pragma once + +#include <ydb/library/yql/core/yql_graph_transformer.h> + +#include <util/generic/ptr.h> + +namespace NYql { + +struct TDqState; +using TDqStatePtr = TIntrusivePtr<TDqState>; + +THolder<IGraphTransformer> CreateDqsStatisticsTransformer(TDqStatePtr state); + +} // namespace NYql diff --git a/ydb/library/yql/providers/yt/provider/yql_yt_dq_integration.cpp b/ydb/library/yql/providers/yt/provider/yql_yt_dq_integration.cpp index 7ca72f87cd..896200ced6 100644 --- a/ydb/library/yql/providers/yt/provider/yql_yt_dq_integration.cpp +++ b/ydb/library/yql/providers/yt/provider/yql_yt_dq_integration.cpp @@ -344,6 +344,29 @@ public: return false; } + TMaybe<TOptimizerStatistics> ReadStatistics(const TExprNode::TPtr& read, TExprContext& ctx) override { + Y_UNUSED(ctx); + TOptimizerStatistics stat(0, 0); + if (auto maybeRead = TMaybeNode<TYtReadTable>(read)) { + auto input = maybeRead.Cast().Input(); + for (auto section: input) { + for (const auto& path: section.Paths()) { + auto pathInfo = MakeIntrusive<TYtPathInfo>(path); + auto tableInfo = pathInfo->Table; + YQL_ENSURE(tableInfo); + + if (tableInfo->Stat) { + stat.Nrows += tableInfo->Stat->RecordsCount; + } + if (pathInfo->Columns && pathInfo->Columns->GetColumns()) { + stat.Ncols += pathInfo->Columns->GetColumns()->size(); + } + } + } + } + return stat; + } + TMaybe<ui64> EstimateReadSize(ui64 dataSizePerJob, ui32 maxTasksPerStage, const TVector<const TExprNode*>& nodes, TExprContext& ctx) override { TVector<bool> hasErasurePerNode; hasErasurePerNode.reserve(nodes.size()); diff --git a/ydb/library/yql/sql/v1/context.h b/ydb/library/yql/sql/v1/context.h index 980d7e262d..8896c1cb1a 100644 --- a/ydb/library/yql/sql/v1/context.h +++ b/ydb/library/yql/sql/v1/context.h @@ -256,6 +256,7 @@ namespace NSQLTranslationV1 { bool EnableSystemColumns = true; bool DqEngineEnable = false; bool DqEngineForce = false; + TString CostBasedOptimizer; TMaybe<bool> JsonQueryReturnsJsonDocument; TMaybe<bool> AnsiInForEmptyOrNullableItemsCollections; TMaybe<bool> AnsiRankForNullableKeys = true; diff --git a/ydb/library/yql/sql/v1/query.cpp b/ydb/library/yql/sql/v1/query.cpp index 08c4ce616a..809154d8e7 100644 --- a/ydb/library/yql/sql/v1/query.cpp +++ b/ydb/library/yql/sql/v1/query.cpp @@ -2479,6 +2479,11 @@ public: BuildQuotedAtom(Pos, "DqEngine"), BuildQuotedAtom(Pos, mode)))); } + if (ctx.CostBasedOptimizer) { + Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource, + BuildQuotedAtom(Pos, "CostBasedOptimizer"), BuildQuotedAtom(Pos, ctx.CostBasedOptimizer)))); + } + if (ctx.JsonQueryReturnsJsonDocument.Defined()) { TString pragmaName = "DisableJsonQueryReturnsJsonDocument"; if (*ctx.JsonQueryReturnsJsonDocument) { diff --git a/ydb/library/yql/sql/v1/sql_query.cpp b/ydb/library/yql/sql/v1/sql_query.cpp index b8ced318ce..d0d78ea430 100644 --- a/ydb/library/yql/sql/v1/sql_query.cpp +++ b/ydb/library/yql/sql/v1/sql_query.cpp @@ -1974,6 +1974,16 @@ TNodePtr TSqlQuery::PragmaStatement(const TRule_pragma_stmt& stmt, bool& success } else if (normalizedPragma == "disablecompactgroupby") { Ctx.CompactGroupBy = false; Ctx.IncrementMonCounter("sql_pragma", "DisableCompactGroupBy"); + } else if (normalizedPragma == "costbasedoptimizer") { + Ctx.IncrementMonCounter("sql_pragma", "CostBasedOptimizer"); + if (values.size() != 1 || !values[0].GetLiteral() + || ! (*values[0].GetLiteral() == "disable" || *values[0].GetLiteral() == "PG" || *values[0].GetLiteral() == "DPccp")) + { + Error() << "Expected `disable|PG|DPccp' argument for: " << pragma; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + Ctx.CostBasedOptimizer = *values[0].GetLiteral(); } else { Error() << "Unknown pragma: " << pragma; Ctx.IncrementMonCounter("sql_errors", "UnknownPragma"); |