aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoraozeritsky <aozeritsky@ydb.tech>2023-08-25 17:21:18 +0300
committeraozeritsky <aozeritsky@ydb.tech>2023-08-25 17:42:52 +0300
commit1574ae59ebf39c4e8ba58b14f9b740bcd7fae56e (patch)
tree00b31722491107b9c2840bdb5966bc98bd0dd121
parente841c3be45ce65bf9b0f9b9402acdcbff444cf4f (diff)
downloadydb-1574ae59ebf39c4e8ba58b14f9b740bcd7fae56e.tar.gz
Propagate statistics to AST nodes
-rw-r--r--ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp52
-rw-r--r--ydb/library/yql/core/services/yql_transform_pipeline.cpp5
-rw-r--r--ydb/library/yql/core/type_ann/type_ann_core.cpp11
-rw-r--r--ydb/library/yql/core/yql_data_provider.h1
-rw-r--r--ydb/library/yql/core/yql_opt_proposed_by_data.cpp10
-rw-r--r--ydb/library/yql/core/yql_opt_proposed_by_data.h1
-rw-r--r--ydb/library/yql/core/yql_type_annotation.cpp1
-rw-r--r--ydb/library/yql/core/yql_type_annotation.h1
-rw-r--r--ydb/library/yql/dq/integration/yql_dq_integration.h4
-rw-r--r--ydb/library/yql/providers/common/dq/yql_dq_integration_impl.cpp13
-rw-r--r--ydb/library/yql/providers/common/dq/yql_dq_integration_impl.h2
-rw-r--r--ydb/library/yql/providers/common/provider/yql_data_provider_impl.cpp5
-rw-r--r--ydb/library/yql/providers/common/provider/yql_data_provider_impl.h1
-rw-r--r--ydb/library/yql/providers/config/yql_config_provider.cpp18
-rw-r--r--ydb/library/yql/providers/dq/provider/CMakeLists.darwin-x86_64.txt1
-rw-r--r--ydb/library/yql/providers/dq/provider/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/library/yql/providers/dq/provider/CMakeLists.linux-x86_64.txt1
-rw-r--r--ydb/library/yql/providers/dq/provider/CMakeLists.windows-x86_64.txt1
-rw-r--r--ydb/library/yql/providers/dq/provider/ya.make2
-rw-r--r--ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp7
-rw-r--r--ydb/library/yql/providers/dq/provider/yql_dq_statistics.cpp68
-rw-r--r--ydb/library/yql/providers/dq/provider/yql_dq_statistics.h14
-rw-r--r--ydb/library/yql/providers/yt/provider/yql_yt_dq_integration.cpp23
-rw-r--r--ydb/library/yql/sql/v1/context.h1
-rw-r--r--ydb/library/yql/sql/v1/query.cpp5
-rw-r--r--ydb/library/yql/sql/v1/sql_query.cpp10
26 files changed, 258 insertions, 1 deletions
diff --git a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp
index e20e951518..e612cd70f6 100644
--- a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp
+++ b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp
@@ -5666,6 +5666,57 @@ TExprNode::TPtr ExpandConstraintsOf(const TExprNode::TPtr& node, TExprContext& c
.Build();
}
+TExprNode::TPtr ExpandCostsOf(const TExprNode::TPtr& node, TExprContext& ctx, TTypeAnnotationContext& typesCtx) {
+ YQL_CLOG(DEBUG, CorePeepHole) << "Expand " << node->Content();
+
+ TString json;
+ TStringOutput out(json);
+ NJson::TJsonWriter jsonWriter(&out, true);
+
+ VisitExpr(node, [&](const TExprNode::TPtr& node) {
+ auto stat = typesCtx.GetStats(node.Get());
+
+ if (stat || node->ChildrenSize()) {
+ jsonWriter.OpenMap();
+ jsonWriter.WriteKey("Name");
+ jsonWriter.Write(node->Content());
+ if (stat) {
+ if (stat->Cost) {
+ jsonWriter.WriteKey("Cost");
+ jsonWriter.Write(*stat->Cost);
+ }
+ jsonWriter.WriteKey("Cols");
+ jsonWriter.Write(stat->Ncols);
+ jsonWriter.WriteKey("Rows");
+ jsonWriter.Write(stat->Nrows);
+ }
+ if (node->ChildrenSize()) {
+ jsonWriter.WriteKey("Children");
+ jsonWriter.OpenArray();
+ }
+ }
+ return true;
+ }, [&](const TExprNode::TPtr& node) {
+ auto stat = typesCtx.GetStats(node.Get());
+
+ if (stat || node->ChildrenSize()) {
+ if (node->ChildrenSize()) {
+ jsonWriter.CloseArray();
+ }
+ jsonWriter.CloseMap();
+ }
+ return true;
+ });
+
+ jsonWriter.Flush();
+
+ return ctx.Builder(node->Pos())
+ .Callable("Json")
+ .Atom(0, json, TNodeFlags::MultilineContent)
+ .Seal()
+ .Build();
+}
+
TExprNode::TPtr OptimizeMapJoinCore(const TExprNode::TPtr& node, TExprContext& ctx) {
if (const auto& input = node->Head(); input.IsCallable("NarrowMap") && input.Tail().Tail().IsCallable("AsStruct")) {
YQL_CLOG(DEBUG, CorePeepHole) << "Swap " << node->Content() << " with " << input.Content();
@@ -7200,6 +7251,7 @@ struct TPeepHoleRules {
{"AggregateMergeFinalize", &ExpandAggregatePeephole},
{"AggregateMergeManyFinalize", &ExpandAggregatePeephole},
{"AggregateFinalize", &ExpandAggregatePeephole},
+ {"CostsOf", &ExpandCostsOf},
{"JsonQuery", &ExpandJsonQuery},
};
diff --git a/ydb/library/yql/core/services/yql_transform_pipeline.cpp b/ydb/library/yql/core/services/yql_transform_pipeline.cpp
index 719d35a306..bc3dc7ec0c 100644
--- a/ydb/library/yql/core/services/yql_transform_pipeline.cpp
+++ b/ydb/library/yql/core/services/yql_transform_pipeline.cpp
@@ -162,6 +162,11 @@ TTransformationPipeline& TTransformationPipeline::AddOptimization(bool checkWorl
"RecaptureDataProposals",
issueCode));
Transformers_.push_back(TTransformStage(
+ CreateStatisticsProposalsInspector(*TypeAnnotationContext_, TString{DqProviderName}),
+ "StatisticsProposals",
+ issueCode
+ ));
+ Transformers_.push_back(TTransformStage(
CreateLogicalDataProposalsInspector(*TypeAnnotationContext_),
"LogicalDataProposals",
issueCode));
diff --git a/ydb/library/yql/core/type_ann/type_ann_core.cpp b/ydb/library/yql/core/type_ann/type_ann_core.cpp
index 00c767a781..bbef3d080d 100644
--- a/ydb/library/yql/core/type_ann/type_ann_core.cpp
+++ b/ydb/library/yql/core/type_ann/type_ann_core.cpp
@@ -6230,6 +6230,16 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
return IGraphTransformer::TStatus::Ok;
}
+ IGraphTransformer::TStatus CostsOfWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) {
+ Y_UNUSED(output);
+ if (!EnsureArgsCount(*input, 1, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ input->SetTypeAnn(ctx.Expr.MakeType<TDataExprType>(EDataSlot::Json));
+ return IGraphTransformer::TStatus::Ok;
+ }
+
IGraphTransformer::TStatus InstanceOfWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) {
Y_UNUSED(output);
if (!EnsureArgsCount(*input, 1, ctx.Expr)) {
@@ -11771,6 +11781,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
Functions["HasNull"] = &HasNullWrapper;
Functions["TypeOf"] = &TypeOfWrapper;
Functions["ConstraintsOf"] = &ConstraintsOfWrapper;
+ Functions["CostsOf"] = &CostsOfWrapper;
Functions["InstanceOf"] = &InstanceOfWrapper;
Functions["SourceOf"] = &SourceOfWrapper;
Functions["MatchType"] = &MatchTypeWrapper;
diff --git a/ydb/library/yql/core/yql_data_provider.h b/ydb/library/yql/core/yql_data_provider.h
index 5455d160de..2fc835a47a 100644
--- a/ydb/library/yql/core/yql_data_provider.h
+++ b/ydb/library/yql/core/yql_data_provider.h
@@ -126,6 +126,7 @@ public:
//-- optimizations
virtual TExprNode::TPtr RewriteIO(const TExprNode::TPtr& node, TExprContext& ctx) = 0;
virtual IGraphTransformer& GetRecaptureOptProposalTransformer() = 0;
+ virtual IGraphTransformer& GetStatisticsProposalTransformer() = 0;
virtual IGraphTransformer& GetLogicalOptProposalTransformer() = 0;
virtual IGraphTransformer& GetPhysicalOptProposalTransformer() = 0;
virtual IGraphTransformer& GetPhysicalFinalizingTransformer() = 0;
diff --git a/ydb/library/yql/core/yql_opt_proposed_by_data.cpp b/ydb/library/yql/core/yql_opt_proposed_by_data.cpp
index 3b1e53663d..d9653a2ea2 100644
--- a/ydb/library/yql/core/yql_opt_proposed_by_data.cpp
+++ b/ydb/library/yql/core/yql_opt_proposed_by_data.cpp
@@ -406,4 +406,14 @@ TAutoPtr<IGraphTransformer> CreateRecaptureDataProposalsInspector(const TTypeAnn
);
}
+TAutoPtr<IGraphTransformer> CreateStatisticsProposalsInspector(const TTypeAnnotationContext& types, const TString& provider) {
+ return CreateSpecificDataProposalsInspector<ESource::DataSource>(
+ types,
+ provider,
+ [](IDataProvider* provider) -> IGraphTransformer& {
+ return provider->GetStatisticsProposalTransformer();
+ }
+ );
+}
+
} // namespace NYql
diff --git a/ydb/library/yql/core/yql_opt_proposed_by_data.h b/ydb/library/yql/core/yql_opt_proposed_by_data.h
index 0ec0bcab43..dbc3035adc 100644
--- a/ydb/library/yql/core/yql_opt_proposed_by_data.h
+++ b/ydb/library/yql/core/yql_opt_proposed_by_data.h
@@ -14,6 +14,7 @@ TAutoPtr<IGraphTransformer> CreateConfigureTransformer(const TTypeAnnotationCont
TAutoPtr<IGraphTransformer> CreateIODiscoveryTransformer(const TTypeAnnotationContext& types);
TAutoPtr<IGraphTransformer> CreateEpochsTransformer(const TTypeAnnotationContext& types);
TAutoPtr<IGraphTransformer> CreateRecaptureDataProposalsInspector(const TTypeAnnotationContext& types, const TString& provider);
+TAutoPtr<IGraphTransformer> CreateStatisticsProposalsInspector(const TTypeAnnotationContext& types, const TString& provider);
TAutoPtr<IGraphTransformer> CreateLogicalDataProposalsInspector(const TTypeAnnotationContext& types);
TAutoPtr<IGraphTransformer> CreatePhysicalDataProposalsInspector(const TTypeAnnotationContext& types);
TAutoPtr<IGraphTransformer> CreatePhysicalFinalizers(const TTypeAnnotationContext& types);
diff --git a/ydb/library/yql/core/yql_type_annotation.cpp b/ydb/library/yql/core/yql_type_annotation.cpp
index 1c63948080..6082a1db95 100644
--- a/ydb/library/yql/core/yql_type_annotation.cpp
+++ b/ydb/library/yql/core/yql_type_annotation.cpp
@@ -53,6 +53,7 @@ void TTypeAnnotationContext::Reset() {
ExpectedTypes.clear();
ExpectedConstraints.clear();
ExpectedColumnOrders.clear();
+ StatisticsMap.clear();
}
TString FormatColumnOrder(const TMaybe<TColumnOrder>& columnOrder) {
diff --git a/ydb/library/yql/core/yql_type_annotation.h b/ydb/library/yql/core/yql_type_annotation.h
index ef857a4cb5..bffbac61fa 100644
--- a/ydb/library/yql/core/yql_type_annotation.h
+++ b/ydb/library/yql/core/yql_type_annotation.h
@@ -236,6 +236,7 @@ struct TTypeAnnotationContext: public TThrRefBase {
bool UseBlocks = false;
bool PgEmitAggApply = false;
IArrowResolver::TPtr ArrowResolver;
+ TString CostBasedOptimizerType;
// compatibility with v0 or raw s-expression code
bool OrderedColumns = false;
diff --git a/ydb/library/yql/dq/integration/yql_dq_integration.h b/ydb/library/yql/dq/integration/yql_dq_integration.h
index b34880f637..d4f45d7f16 100644
--- a/ydb/library/yql/dq/integration/yql_dq_integration.h
+++ b/ydb/library/yql/dq/integration/yql_dq_integration.h
@@ -2,6 +2,7 @@
#include <ydb/library/yql/ast/yql_expr.h>
#include <ydb/library/yql/core/yql_data_provider.h>
+#include <ydb/library/yql/core/yql_statistics.h>
#include <ydb/library/yql/dq/tasks/dq_tasks_graph.h>
#include <ydb/library/yql/public/issue/yql_issue.h>
@@ -40,10 +41,11 @@ public:
virtual ui64 Partition(const TDqSettings& config, size_t maxPartitions, const TExprNode& node,
TVector<TString>& partitions, TString* clusterName, TExprContext& ctx, bool canFallback) = 0;
- virtual bool CheckPragmas(const TExprNode& node, TExprContext& ctx, bool skipIssues = false) { Y_UNUSED(skipIssues); Y_UNUSED(node); Y_UNUSED(ctx); return true; }
+ virtual bool CheckPragmas(const TExprNode& node, TExprContext& ctx, bool skipIssues = false) = 0;
virtual bool CanRead(const TExprNode& read, TExprContext& ctx, bool skipIssues = true) = 0;
virtual TMaybe<ui64> EstimateReadSize(ui64 dataSizePerJob, ui32 maxTasksPerStage, const TVector<const TExprNode*>& nodes, TExprContext& ctx) = 0;
virtual TExprNode::TPtr WrapRead(const TDqSettings& config, const TExprNode::TPtr& read, TExprContext& ctx) = 0;
+ virtual TMaybe<TOptimizerStatistics> ReadStatistics(const TExprNode::TPtr& readWrap, TExprContext& ctx) = 0;
// Nothing if callable is not for writing,
// false if callable is for writing and there are some errors (they are added to ctx),
diff --git a/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.cpp b/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.cpp
index 451ecf606c..a66cac44f2 100644
--- a/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.cpp
+++ b/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.cpp
@@ -7,6 +7,13 @@ ui64 TDqIntegrationBase::Partition(const TDqSettings&, size_t, const TExprNode&,
return 0;
}
+bool TDqIntegrationBase::CheckPragmas(const TExprNode& node, TExprContext& ctx, bool skipIssues) {
+ Y_UNUSED(skipIssues);
+ Y_UNUSED(node);
+ Y_UNUSED(ctx);
+ return true;
+}
+
bool TDqIntegrationBase::CanRead(const TExprNode&, TExprContext&, bool) {
return false;
}
@@ -19,6 +26,12 @@ TExprNode::TPtr TDqIntegrationBase::WrapRead(const TDqSettings&, const TExprNode
return read;
}
+TMaybe<TOptimizerStatistics> TDqIntegrationBase::ReadStatistics(const TExprNode::TPtr& readWrap, TExprContext& ctx) {
+ Y_UNUSED(readWrap);
+ Y_UNUSED(ctx);
+ return Nothing();
+}
+
TMaybe<bool> TDqIntegrationBase::CanWrite(const TExprNode&, TExprContext&) {
return Nothing();
}
diff --git a/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.h b/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.h
index 714aa124dc..7273d5a25f 100644
--- a/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.h
+++ b/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.h
@@ -8,9 +8,11 @@ class TDqIntegrationBase: public IDqIntegration {
public:
ui64 Partition(const TDqSettings& config, size_t maxPartitions, const TExprNode& node,
TVector<TString>& partitions, TString* clusterName, TExprContext& ctx, bool canFallback) override;
+ bool CheckPragmas(const TExprNode& node, TExprContext& ctx, bool skipIssues) override;
bool CanRead(const TExprNode& read, TExprContext& ctx, bool skipIssues) override;
TMaybe<ui64> EstimateReadSize(ui64 dataSizePerJob, ui32 maxTasksPerStage, const TVector<const TExprNode*>& nodes, TExprContext& ctx) override;
TExprNode::TPtr WrapRead(const TDqSettings& config, const TExprNode::TPtr& read, TExprContext& ctx) override;
+ TMaybe<TOptimizerStatistics> ReadStatistics(const TExprNode::TPtr& readWrap, TExprContext& ctx) override;
void RegisterMkqlCompiler(NCommon::TMkqlCallableCompilerBase& compiler) override;
TMaybe<bool> CanWrite(const TExprNode& write, TExprContext& ctx) override;
TExprNode::TPtr WrapWrite(const TExprNode::TPtr& write, TExprContext& ctx) override;
diff --git a/ydb/library/yql/providers/common/provider/yql_data_provider_impl.cpp b/ydb/library/yql/providers/common/provider/yql_data_provider_impl.cpp
index c04c6e4d87..f4e8a8ce1b 100644
--- a/ydb/library/yql/providers/common/provider/yql_data_provider_impl.cpp
+++ b/ydb/library/yql/providers/common/provider/yql_data_provider_impl.cpp
@@ -165,6 +165,7 @@ void TDataProviderBase::Reset() {
}
}
GetRecaptureOptProposalTransformer().Rewind();
+ GetStatisticsProposalTransformer().Rewind();
GetLogicalOptProposalTransformer().Rewind();
GetPhysicalOptProposalTransformer().Rewind();
GetPhysicalFinalizingTransformer().Rewind();
@@ -179,6 +180,10 @@ IGraphTransformer& TDataProviderBase::GetRecaptureOptProposalTransformer() {
return NullTransformer_;
}
+IGraphTransformer& TDataProviderBase::GetStatisticsProposalTransformer() {
+ return NullTransformer_;
+}
+
IGraphTransformer& TDataProviderBase::GetLogicalOptProposalTransformer() {
return NullTransformer_;
}
diff --git a/ydb/library/yql/providers/common/provider/yql_data_provider_impl.h b/ydb/library/yql/providers/common/provider/yql_data_provider_impl.h
index d1bfd4b027..445ffa1c18 100644
--- a/ydb/library/yql/providers/common/provider/yql_data_provider_impl.h
+++ b/ydb/library/yql/providers/common/provider/yql_data_provider_impl.h
@@ -58,6 +58,7 @@ public:
void PostRewriteIO() override;
void Reset() override;
IGraphTransformer& GetRecaptureOptProposalTransformer() override;
+ IGraphTransformer& GetStatisticsProposalTransformer() override;
IGraphTransformer& GetLogicalOptProposalTransformer() override;
IGraphTransformer& GetPhysicalOptProposalTransformer() override;
IGraphTransformer& GetPhysicalFinalizingTransformer() override;
diff --git a/ydb/library/yql/providers/config/yql_config_provider.cpp b/ydb/library/yql/providers/config/yql_config_provider.cpp
index 794fa85580..1f93ad2307 100644
--- a/ydb/library/yql/providers/config/yql_config_provider.cpp
+++ b/ydb/library/yql/providers/config/yql_config_provider.cpp
@@ -810,6 +810,24 @@ namespace {
Types.PgEmitAggApply = (name == "PgEmitAggApply");
}
+ else if (name == "CostBasedOptimizer") {
+ if (args.size() != 1) {
+ ctx.AddError(TIssue(pos, TStringBuilder() << "Expected at most 1 argument, but got " << args.size()));
+ return false;
+ }
+ auto arg = TString{args[0]};
+
+ if (arg == "PG") {
+ Types.CostBasedOptimizerType = arg;
+ } else if (arg == "DPccp") {
+ Types.CostBasedOptimizerType = arg;
+ } else if (arg == "disable") {
+ ;
+ } else {
+ ctx.AddError(TIssue(pos, TStringBuilder() << "Expected `disable|PG|DPccp', but got: " << args[0]));
+ return false;
+ }
+ }
else {
ctx.AddError(TIssue(pos, TStringBuilder() << "Unsupported command: " << name));
return false;
diff --git a/ydb/library/yql/providers/dq/provider/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/providers/dq/provider/CMakeLists.darwin-x86_64.txt
index e192c1673e..899857894e 100644
--- a/ydb/library/yql/providers/dq/provider/CMakeLists.darwin-x86_64.txt
+++ b/ydb/library/yql/providers/dq/provider/CMakeLists.darwin-x86_64.txt
@@ -56,6 +56,7 @@ target_sources(providers-dq-provider PRIVATE
${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_datasink.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_recapture.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_statistics.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_statistics_json.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_validate.cpp
)
diff --git a/ydb/library/yql/providers/dq/provider/CMakeLists.linux-aarch64.txt b/ydb/library/yql/providers/dq/provider/CMakeLists.linux-aarch64.txt
index 86469196fc..3d2343c2a7 100644
--- a/ydb/library/yql/providers/dq/provider/CMakeLists.linux-aarch64.txt
+++ b/ydb/library/yql/providers/dq/provider/CMakeLists.linux-aarch64.txt
@@ -57,6 +57,7 @@ target_sources(providers-dq-provider PRIVATE
${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_datasink.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_recapture.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_statistics.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_statistics_json.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_validate.cpp
)
diff --git a/ydb/library/yql/providers/dq/provider/CMakeLists.linux-x86_64.txt b/ydb/library/yql/providers/dq/provider/CMakeLists.linux-x86_64.txt
index 86469196fc..3d2343c2a7 100644
--- a/ydb/library/yql/providers/dq/provider/CMakeLists.linux-x86_64.txt
+++ b/ydb/library/yql/providers/dq/provider/CMakeLists.linux-x86_64.txt
@@ -57,6 +57,7 @@ target_sources(providers-dq-provider PRIVATE
${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_datasink.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_recapture.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_statistics.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_statistics_json.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_validate.cpp
)
diff --git a/ydb/library/yql/providers/dq/provider/CMakeLists.windows-x86_64.txt b/ydb/library/yql/providers/dq/provider/CMakeLists.windows-x86_64.txt
index e192c1673e..899857894e 100644
--- a/ydb/library/yql/providers/dq/provider/CMakeLists.windows-x86_64.txt
+++ b/ydb/library/yql/providers/dq/provider/CMakeLists.windows-x86_64.txt
@@ -56,6 +56,7 @@ target_sources(providers-dq-provider PRIVATE
${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_datasink.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_recapture.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_statistics.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_statistics_json.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/providers/dq/provider/yql_dq_validate.cpp
)
diff --git a/ydb/library/yql/providers/dq/provider/ya.make b/ydb/library/yql/providers/dq/provider/ya.make
index 67b4edb3e5..5765c684d4 100644
--- a/ydb/library/yql/providers/dq/provider/ya.make
+++ b/ydb/library/yql/providers/dq/provider/ya.make
@@ -19,6 +19,8 @@ SRCS(
yql_dq_datasource.h
yql_dq_recapture.cpp
yql_dq_recapture.h
+ yql_dq_statistics.cpp
+ yql_dq_statistics.h
yql_dq_statistics_json.cpp
yql_dq_statistics_json.h
yql_dq_validate.cpp
diff --git a/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp b/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp
index 49c97fbf83..7a4cb98e69 100644
--- a/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp
+++ b/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp
@@ -3,6 +3,7 @@
#include "yql_dq_datasource_type_ann.h"
#include "yql_dq_state.h"
#include "yql_dq_validate.h"
+#include "yql_dq_statistics.h"
#include <ydb/library/yql/providers/common/config/yql_configuration_transformer.h>
#include <ydb/library/yql/providers/common/provider/yql_data_provider_impl.h>
@@ -48,6 +49,7 @@ public:
, ExecTransformer_([this, execTransformerFactory] () { return THolder<IGraphTransformer>(execTransformerFactory(State_)); })
, TypeAnnotationTransformer_([] () { return CreateDqsDataSourceTypeAnnotationTransformer(); })
, ConstraintsTransformer_([] () { return CreateDqDataSourceConstraintTransformer(); })
+ , StatisticsTransformer_([this]() { return CreateDqsStatisticsTransformer(State_); })
{ }
TStringBuf GetName() const override {
@@ -68,6 +70,10 @@ public:
return *ConfigurationTransformer_;
}
+ IGraphTransformer& GetStatisticsProposalTransformer() override {
+ return *StatisticsTransformer_;
+ }
+
bool CanBuildResultImpl(const TExprNode& node, TNodeSet& visited) {
if (!visited.emplace(&node).second) {
return true;
@@ -268,6 +274,7 @@ private:
TLazyInitHolder<IGraphTransformer> ExecTransformer_;
TLazyInitHolder<TVisitorTransformerBase> TypeAnnotationTransformer_;
TLazyInitHolder<IGraphTransformer> ConstraintsTransformer_;
+ TLazyInitHolder<IGraphTransformer> StatisticsTransformer_;
};
}
diff --git a/ydb/library/yql/providers/dq/provider/yql_dq_statistics.cpp b/ydb/library/yql/providers/dq/provider/yql_dq_statistics.cpp
new file mode 100644
index 0000000000..e2aa1f4ba6
--- /dev/null
+++ b/ydb/library/yql/providers/dq/provider/yql_dq_statistics.cpp
@@ -0,0 +1,68 @@
+#include "yql_dq_statistics.h"
+#include "yql_dq_state.h"
+
+#include <ydb/library/yql/dq/opt/dq_opt_stat.h>
+#include <ydb/library/yql/dq/integration/yql_dq_integration.h>
+#include <ydb/library/yql/core/yql_expr_optimize.h>
+
+#include <ydb/library/yql/providers/dq/expr_nodes/dqs_expr_nodes.h>
+
+namespace NYql {
+
+using namespace NNodes;
+
+class TDqsStatisticsTransformer : public TSyncTransformerBase {
+public:
+ TDqsStatisticsTransformer(const TDqStatePtr& state)
+ : State(state)
+ { }
+
+ IGraphTransformer::TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final {
+ output = input;
+
+ if (!State->TypeCtx->CostBasedOptimizerType) {
+ return IGraphTransformer::TStatus::Ok;
+ }
+
+ TOptimizeExprSettings settings(nullptr);
+
+ auto ret = OptimizeExpr(input, output, [*this](const TExprNode::TPtr& input, TExprContext& ctx) {
+ Y_UNUSED(ctx);
+ auto output = input;
+
+ if (TCoFlatMap::Match(input.Get())){
+ NDq::InferStatisticsForFlatMap(input, State->TypeCtx);
+ } else if(TCoSkipNullMembers::Match(input.Get())){
+ NDq::InferStatisticsForSkipNullMembers(input, State->TypeCtx);
+ } else if (TDqReadWrapBase::Match(input.Get())) {
+ auto read = input->Child(TDqReadWrapBase::idx_Input);
+ auto dataSourceChildIndex = 1;
+ YQL_ENSURE(read->ChildrenSize() > 1);
+ YQL_ENSURE(read->Child(dataSourceChildIndex)->IsCallable("DataSource"));
+ auto dataSourceName = read->Child(dataSourceChildIndex)->Child(0)->Content();
+ auto datasource = State->TypeCtx->DataSourceMap.FindPtr(dataSourceName);
+ YQL_ENSURE(datasource);
+ if (auto dqIntegration = (*datasource)->GetDqIntegration()) {
+ auto stat = dqIntegration->ReadStatistics(read, ctx);
+ if (stat) {
+ State->TypeCtx->SetStats(input.Get(), std::move(std::make_shared<TOptimizerStatistics>(*stat)));
+ }
+ }
+ }
+ return output;
+ }, ctx, settings);
+
+ return ret;
+ }
+
+ void Rewind() { }
+
+private:
+ TDqStatePtr State;
+};
+
+THolder<IGraphTransformer> CreateDqsStatisticsTransformer(TDqStatePtr state) {
+ return MakeHolder<TDqsStatisticsTransformer>(state);
+}
+
+} // namespace NYql
diff --git a/ydb/library/yql/providers/dq/provider/yql_dq_statistics.h b/ydb/library/yql/providers/dq/provider/yql_dq_statistics.h
new file mode 100644
index 0000000000..460b363bf4
--- /dev/null
+++ b/ydb/library/yql/providers/dq/provider/yql_dq_statistics.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <ydb/library/yql/core/yql_graph_transformer.h>
+
+#include <util/generic/ptr.h>
+
+namespace NYql {
+
+struct TDqState;
+using TDqStatePtr = TIntrusivePtr<TDqState>;
+
+THolder<IGraphTransformer> CreateDqsStatisticsTransformer(TDqStatePtr state);
+
+} // namespace NYql
diff --git a/ydb/library/yql/providers/yt/provider/yql_yt_dq_integration.cpp b/ydb/library/yql/providers/yt/provider/yql_yt_dq_integration.cpp
index 7ca72f87cd..896200ced6 100644
--- a/ydb/library/yql/providers/yt/provider/yql_yt_dq_integration.cpp
+++ b/ydb/library/yql/providers/yt/provider/yql_yt_dq_integration.cpp
@@ -344,6 +344,29 @@ public:
return false;
}
+ TMaybe<TOptimizerStatistics> ReadStatistics(const TExprNode::TPtr& read, TExprContext& ctx) override {
+ Y_UNUSED(ctx);
+ TOptimizerStatistics stat(0, 0);
+ if (auto maybeRead = TMaybeNode<TYtReadTable>(read)) {
+ auto input = maybeRead.Cast().Input();
+ for (auto section: input) {
+ for (const auto& path: section.Paths()) {
+ auto pathInfo = MakeIntrusive<TYtPathInfo>(path);
+ auto tableInfo = pathInfo->Table;
+ YQL_ENSURE(tableInfo);
+
+ if (tableInfo->Stat) {
+ stat.Nrows += tableInfo->Stat->RecordsCount;
+ }
+ if (pathInfo->Columns && pathInfo->Columns->GetColumns()) {
+ stat.Ncols += pathInfo->Columns->GetColumns()->size();
+ }
+ }
+ }
+ }
+ return stat;
+ }
+
TMaybe<ui64> EstimateReadSize(ui64 dataSizePerJob, ui32 maxTasksPerStage, const TVector<const TExprNode*>& nodes, TExprContext& ctx) override {
TVector<bool> hasErasurePerNode;
hasErasurePerNode.reserve(nodes.size());
diff --git a/ydb/library/yql/sql/v1/context.h b/ydb/library/yql/sql/v1/context.h
index 980d7e262d..8896c1cb1a 100644
--- a/ydb/library/yql/sql/v1/context.h
+++ b/ydb/library/yql/sql/v1/context.h
@@ -256,6 +256,7 @@ namespace NSQLTranslationV1 {
bool EnableSystemColumns = true;
bool DqEngineEnable = false;
bool DqEngineForce = false;
+ TString CostBasedOptimizer;
TMaybe<bool> JsonQueryReturnsJsonDocument;
TMaybe<bool> AnsiInForEmptyOrNullableItemsCollections;
TMaybe<bool> AnsiRankForNullableKeys = true;
diff --git a/ydb/library/yql/sql/v1/query.cpp b/ydb/library/yql/sql/v1/query.cpp
index 08c4ce616a..809154d8e7 100644
--- a/ydb/library/yql/sql/v1/query.cpp
+++ b/ydb/library/yql/sql/v1/query.cpp
@@ -2479,6 +2479,11 @@ public:
BuildQuotedAtom(Pos, "DqEngine"), BuildQuotedAtom(Pos, mode))));
}
+ if (ctx.CostBasedOptimizer) {
+ Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource,
+ BuildQuotedAtom(Pos, "CostBasedOptimizer"), BuildQuotedAtom(Pos, ctx.CostBasedOptimizer))));
+ }
+
if (ctx.JsonQueryReturnsJsonDocument.Defined()) {
TString pragmaName = "DisableJsonQueryReturnsJsonDocument";
if (*ctx.JsonQueryReturnsJsonDocument) {
diff --git a/ydb/library/yql/sql/v1/sql_query.cpp b/ydb/library/yql/sql/v1/sql_query.cpp
index b8ced318ce..d0d78ea430 100644
--- a/ydb/library/yql/sql/v1/sql_query.cpp
+++ b/ydb/library/yql/sql/v1/sql_query.cpp
@@ -1974,6 +1974,16 @@ TNodePtr TSqlQuery::PragmaStatement(const TRule_pragma_stmt& stmt, bool& success
} else if (normalizedPragma == "disablecompactgroupby") {
Ctx.CompactGroupBy = false;
Ctx.IncrementMonCounter("sql_pragma", "DisableCompactGroupBy");
+ } else if (normalizedPragma == "costbasedoptimizer") {
+ Ctx.IncrementMonCounter("sql_pragma", "CostBasedOptimizer");
+ if (values.size() != 1 || !values[0].GetLiteral()
+ || ! (*values[0].GetLiteral() == "disable" || *values[0].GetLiteral() == "PG" || *values[0].GetLiteral() == "DPccp"))
+ {
+ Error() << "Expected `disable|PG|DPccp' argument for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ Ctx.CostBasedOptimizer = *values[0].GetLiteral();
} else {
Error() << "Unknown pragma: " << pragma;
Ctx.IncrementMonCounter("sql_errors", "UnknownPragma");