diff options
author | pavelvelikhov <pavelvelikhov@ydb.tech> | 2023-12-06 19:08:11 +0300 |
---|---|---|
committer | pavelvelikhov <pavelvelikhov@ydb.tech> | 2023-12-06 20:56:43 +0300 |
commit | 81745ec41b2b6f3d67eff789248c78e9325d59b9 (patch) | |
tree | 725c2d69bf80f4f219370ac58afedcfe612571c4 | |
parent | c962b9ba7511d2fe0776750d9a65d8f8d331acb2 (diff) | |
download | ydb-81745ec41b2b6f3d67eff789248c78e9325d59b9.tar.gz |
Added constant folding to KQP
Added constant folding to KQP
-rw-r--r-- | ydb/core/kqp/host/kqp_runner.cpp | 3 | ||||
-rw-r--r-- | ydb/core/kqp/opt/CMakeLists.darwin-arm64.txt | 1 | ||||
-rw-r--r-- | ydb/core/kqp/opt/CMakeLists.darwin-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/core/kqp/opt/CMakeLists.linux-aarch64.txt | 1 | ||||
-rw-r--r-- | ydb/core/kqp/opt/CMakeLists.linux-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/core/kqp/opt/CMakeLists.windows-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp | 188 | ||||
-rw-r--r-- | ydb/core/kqp/opt/kqp_constant_folding_transformer.h | 45 | ||||
-rw-r--r-- | ydb/core/kqp/opt/ya.make | 1 | ||||
-rw-r--r-- | ydb/core/kqp/provider/yql_kikimr_settings.cpp | 6 | ||||
-rw-r--r-- | ydb/core/kqp/provider/yql_kikimr_settings.h | 4 | ||||
-rw-r--r-- | ydb/core/kqp/ut/join/kqp_join_order_ut.cpp | 80 | ||||
-rw-r--r-- | ydb/library/yql/dq/opt/dq_opt_stat.cpp | 2 |
13 files changed, 332 insertions, 2 deletions
diff --git a/ydb/core/kqp/host/kqp_runner.cpp b/ydb/core/kqp/host/kqp_runner.cpp index dc4f4bf099..2925bd58a3 100644 --- a/ydb/core/kqp/host/kqp_runner.cpp +++ b/ydb/core/kqp/host/kqp_runner.cpp @@ -5,6 +5,8 @@ #include <ydb/core/kqp/opt/kqp_opt.h> #include <ydb/core/kqp/opt/logical/kqp_opt_log.h> #include <ydb/core/kqp/opt/kqp_statistics_transformer.h> +#include <ydb/core/kqp/opt/kqp_constant_folding_transformer.h> + #include <ydb/core/kqp/opt/physical/kqp_opt_phy.h> #include <ydb/core/kqp/opt/peephole/kqp_opt_peephole.h> @@ -256,6 +258,7 @@ private: .Add(CreateKqpCheckQueryTransformer(), "CheckKqlQuery") .AddPostTypeAnnotation(/* forSubgraph */ true) .AddCommonOptimization() + .Add(CreateKqpConstantFoldingTransformer(OptimizeCtx, *typesCtx, Config), "ConstantFolding") .Add(CreateKqpStatisticsTransformer(OptimizeCtx, *typesCtx, Config), "Statistics") .Add(CreateKqpLogOptTransformer(OptimizeCtx, *typesCtx, Config), "LogicalOptimize") .Add(CreateLogicalDataProposalsInspector(*typesCtx), "ProvidersLogicalOptimize") diff --git a/ydb/core/kqp/opt/CMakeLists.darwin-arm64.txt b/ydb/core/kqp/opt/CMakeLists.darwin-arm64.txt index 461862256a..90a38078a4 100644 --- a/ydb/core/kqp/opt/CMakeLists.darwin-arm64.txt +++ b/ydb/core/kqp/opt/CMakeLists.darwin-arm64.txt @@ -47,6 +47,7 @@ target_sources(core-kqp-opt PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_query_blocks_transformer.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_query_plan.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_statistics_transformer.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp ) generate_enum_serilization(core-kqp-opt ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_query_plan.h diff --git a/ydb/core/kqp/opt/CMakeLists.darwin-x86_64.txt b/ydb/core/kqp/opt/CMakeLists.darwin-x86_64.txt index 461862256a..90a38078a4 100644 --- a/ydb/core/kqp/opt/CMakeLists.darwin-x86_64.txt +++ b/ydb/core/kqp/opt/CMakeLists.darwin-x86_64.txt @@ -47,6 +47,7 @@ target_sources(core-kqp-opt PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_query_blocks_transformer.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_query_plan.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_statistics_transformer.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp ) generate_enum_serilization(core-kqp-opt ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_query_plan.h diff --git a/ydb/core/kqp/opt/CMakeLists.linux-aarch64.txt b/ydb/core/kqp/opt/CMakeLists.linux-aarch64.txt index a52a9fa681..22edef967d 100644 --- a/ydb/core/kqp/opt/CMakeLists.linux-aarch64.txt +++ b/ydb/core/kqp/opt/CMakeLists.linux-aarch64.txt @@ -48,6 +48,7 @@ target_sources(core-kqp-opt PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_query_blocks_transformer.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_query_plan.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_statistics_transformer.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp ) generate_enum_serilization(core-kqp-opt ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_query_plan.h diff --git a/ydb/core/kqp/opt/CMakeLists.linux-x86_64.txt b/ydb/core/kqp/opt/CMakeLists.linux-x86_64.txt index a52a9fa681..22edef967d 100644 --- a/ydb/core/kqp/opt/CMakeLists.linux-x86_64.txt +++ b/ydb/core/kqp/opt/CMakeLists.linux-x86_64.txt @@ -48,6 +48,7 @@ target_sources(core-kqp-opt PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_query_blocks_transformer.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_query_plan.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_statistics_transformer.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp ) generate_enum_serilization(core-kqp-opt ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_query_plan.h diff --git a/ydb/core/kqp/opt/CMakeLists.windows-x86_64.txt b/ydb/core/kqp/opt/CMakeLists.windows-x86_64.txt index 461862256a..90a38078a4 100644 --- a/ydb/core/kqp/opt/CMakeLists.windows-x86_64.txt +++ b/ydb/core/kqp/opt/CMakeLists.windows-x86_64.txt @@ -47,6 +47,7 @@ target_sources(core-kqp-opt PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_query_blocks_transformer.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_query_plan.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_statistics_transformer.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp ) generate_enum_serilization(core-kqp-opt ${CMAKE_SOURCE_DIR}/ydb/core/kqp/opt/kqp_query_plan.h diff --git a/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp b/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp new file mode 100644 index 0000000000..73de91941e --- /dev/null +++ b/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp @@ -0,0 +1,188 @@ +#include "kqp_constant_folding_transformer.h" + +#include <ydb/library/yql/utils/log/log.h> +#include <ydb/library/yql/core/yql_expr_type_annotation.h> + +using namespace NYql; +using namespace NYql::NNodes; +using namespace NKikimr::NKqp; +using namespace NYql::NDq; + +namespace { + + /*** + * We maintain a white list of callables that we consider part of constant expressions + * All other callables will not be evaluated + */ + THashSet<TString> constantFoldingWhiteList = { + "Concat", "Just", "Optional","SafeCast", + "+", "-", "*", "/", "%"}; + + bool NeedCalc(NNodes::TExprBase node) { + auto type = node.Ref().GetTypeAnn(); + if (type->IsSingleton()) { + return false; + } + + if (type->GetKind() == ETypeAnnotationKind::Optional) { + if (node.Maybe<TCoNothing>()) { + return false; + } + if (auto maybeJust = node.Maybe<TCoJust>()) { + return NeedCalc(maybeJust.Cast().Input()); + } + return true; + } + + if (type->GetKind() == ETypeAnnotationKind::Tuple) { + if (auto maybeTuple = node.Maybe<TExprList>()) { + return AnyOf(maybeTuple.Cast(), [](const auto& item) { return NeedCalc(item); }); + } + return true; + } + + if (type->GetKind() == ETypeAnnotationKind::List) { + if (node.Maybe<TCoList>()) { + YQL_ENSURE(node.Ref().ChildrenSize() == 1, "Should be rewritten to AsList"); + return false; + } + if (auto maybeAsList = node.Maybe<TCoAsList>()) { + return AnyOf(maybeAsList.Cast().Args(), [](const auto& item) { return NeedCalc(NNodes::TExprBase(item)); }); + } + return true; + } + + YQL_ENSURE(type->GetKind() == ETypeAnnotationKind::Data, + "Object of type " << *type << " should not be considered for calculation"); + + return !node.Maybe<TCoDataCtor>(); + } + + /*** + * Check if the expression is a constant expression + * Its type annotation need to specify that its a data type, and then we check: + * - If its a literal, its a constant expression + * - If its a callable in the while list and all children are constant expressions, then its a constant expression + * - If one of the child is a type expression, it also passes the check + */ + bool IsConstantExpr(const TExprNode::TPtr& input) { + if (!IsDataOrOptionalOfData(input->GetTypeAnn())) { + return false; + } + + if (!NeedCalc(TExprBase(input))) { + return true; + } + + else if (input->IsCallable(constantFoldingWhiteList)) { + for (size_t i = 0; i < input->ChildrenSize(); i++) { + auto callableInput = input->Child(i); + if (callableInput->GetTypeAnn()->GetKind() != ETypeAnnotationKind::Type && !IsConstantExpr(callableInput)) { + return false; + } + } + return true; + } + + return false; + } + + /** + * Traverse a lambda and create a mapping from nodes to nodes wrapped in EvaluateExpr callable + * We check for literals specifically, since they shouldn't be evaluated + */ + void ExtractConstantExprs(const TExprNode::TPtr& input, TNodeOnNodeOwnedMap& replaces, TExprContext& ctx) { + if (TCoLambda::Match(input.Get())) { + auto lambda = TExprBase(input).Cast<TCoLambda>(); + return ExtractConstantExprs(lambda.Body().Ptr(), replaces, ctx); + } + + if (IsDataOrOptionalOfData(input->GetTypeAnn()) && !NeedCalc(TExprBase(input))) { + return; + } + + if (IsConstantExpr(input)) { + TNodeOnNodeOwnedMap deepClones; + auto inputClone = ctx.DeepCopy(*input, ctx, deepClones, false, true, true); + + auto replaceExpr = ctx.Builder(input->Pos()) + .Callable("EvaluateExpr") + .Add(0, inputClone) + .Seal() + .Build(); + + replaces[input.Get()] = replaceExpr; + + return; + } + + if (input->IsCallable() && input->Content() != "EvaluateExpr") { + if (input->ChildrenSize() >= 1) { + for (size_t i = 0; i < input->ChildrenSize(); i++) { + ExtractConstantExprs(input->Child(i), replaces, ctx); + } + } + } + + return; + } + +} + +/** + * Constant folding transformer finds constant expressions in FlatMaps, evaluates them and + * substitutes the result in the AST + */ +IGraphTransformer::TStatus TKqpConstantFoldingTransformer::DoTransform(TExprNode::TPtr input, + TExprNode::TPtr& output, TExprContext& ctx) { + output = input; + + if (!Config->HasOptEnableConstantFolding()) { + return IGraphTransformer::TStatus::Ok; + } + + TNodeOnNodeOwnedMap replaces; + + VisitExpr(input, [&](const TExprNode::TPtr& node) { + if (!replaces.empty()) { + return false; + } + + if (TCoFlatMap::Match(node.Get())) { + auto flatmap = TExprBase(node).Cast<TCoFlatMap>(); + + if (!IsPredicateFlatMap(flatmap.Lambda().Body().Ref())) { + return true; + } + + ExtractConstantExprs(flatmap.Lambda().Body().Ptr(), replaces, ctx); + + return replaces.empty(); + } + + return true; + }); + + if (replaces.empty()) { + return IGraphTransformer::TStatus::Ok; + ; + } else { + TOptimizeExprSettings settings(&TypeCtx); + settings.VisitTuples = false; + ctx.Step.Repeat(TExprStep::ExprEval); + + auto status = RemapExpr(input, output, replaces, ctx, settings); + + return status.Combine(IGraphTransformer::TStatus(IGraphTransformer::TStatus::Repeat, true)); + } + + return IGraphTransformer::TStatus::Ok; +} + +void TKqpConstantFoldingTransformer::Rewind() { +} + +TAutoPtr<IGraphTransformer> NKikimr::NKqp::CreateKqpConstantFoldingTransformer(const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx, + TTypeAnnotationContext& typeCtx, const TKikimrConfiguration::TPtr& config) { + return THolder<IGraphTransformer>(new TKqpConstantFoldingTransformer(kqpCtx, typeCtx, config)); +} diff --git a/ydb/core/kqp/opt/kqp_constant_folding_transformer.h b/ydb/core/kqp/opt/kqp_constant_folding_transformer.h new file mode 100644 index 0000000000..061d2bd87f --- /dev/null +++ b/ydb/core/kqp/opt/kqp_constant_folding_transformer.h @@ -0,0 +1,45 @@ +#pragma once + +#include "kqp_opt.h" + +#include <ydb/core/kqp/common/kqp_yql.h> +#include <ydb/library/yql/ast/yql_expr.h> +#include <ydb/library/yql/core/yql_graph_transformer.h> +#include <ydb/library/yql/core/yql_expr_optimize.h> +#include <ydb/library/yql/core/yql_expr_type_annotation.h> +#include <ydb/library/yql/core/yql_opt_utils.h> + + +namespace NKikimr { +namespace NKqp { + +using namespace NYql; +using namespace NYql::NNodes; +using namespace NOpt; + +/** + * Constant folding transformer finds constant expressions in FlatMaps, evaluates them and + * substitutes the result in the AST +*/ +class TKqpConstantFoldingTransformer : public TSyncTransformerBase { + public: + TKqpConstantFoldingTransformer(const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx, TTypeAnnotationContext& typeCtx, + const TKikimrConfiguration::TPtr& config) : + Config(config), + TypeCtx(typeCtx), + KqpCtx(*kqpCtx) {} + + // Main method of the transformer + IGraphTransformer::TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final; + void Rewind() override; + + private: + const TKikimrConfiguration::TPtr& Config; + TTypeAnnotationContext& TypeCtx; + const TKqpOptimizeContext& KqpCtx; +}; + +TAutoPtr<IGraphTransformer> CreateKqpConstantFoldingTransformer(const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx, TTypeAnnotationContext& typeCtx, const TKikimrConfiguration::TPtr& config); + +} +}
\ No newline at end of file diff --git a/ydb/core/kqp/opt/ya.make b/ydb/core/kqp/opt/ya.make index e2adfaef5b..816583d41e 100644 --- a/ydb/core/kqp/opt/ya.make +++ b/ydb/core/kqp/opt/ya.make @@ -13,6 +13,7 @@ SRCS( kqp_query_blocks_transformer.cpp kqp_query_plan.cpp kqp_statistics_transformer.cpp + kqp_constant_folding_transformer.cpp ) PEERDIR( diff --git a/ydb/core/kqp/provider/yql_kikimr_settings.cpp b/ydb/core/kqp/provider/yql_kikimr_settings.cpp index 8f4a9229c0..7789279ffa 100644 --- a/ydb/core/kqp/provider/yql_kikimr_settings.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_settings.cpp @@ -70,6 +70,8 @@ TKikimrConfiguration::TKikimrConfiguration() { REGISTER_SETTING(*this, OptUseFinalizeByKey); REGISTER_SETTING(*this, OptEnableCostBasedOptimization); + REGISTER_SETTING(*this, OptEnableConstantFolding); + REGISTER_SETTING(*this, MaxDPccpDPTableSize); REGISTER_SETTING(*this, MaxTasksPerStage); @@ -144,6 +146,10 @@ bool TKikimrSettings::HasOptEnableCostBasedOptimization() const { return GetOptionalFlagValue(OptEnableCostBasedOptimization.Get()) == EOptionalFlag::Enabled; } +bool TKikimrSettings::HasOptEnableConstantFolding() const { + return GetOptionalFlagValue(OptEnableConstantFolding.Get()) == EOptionalFlag::Enabled; +} + EOptionalFlag TKikimrSettings::GetOptPredicateExtract() const { return GetOptionalFlagValue(OptEnablePredicateExtract.Get()); diff --git a/ydb/core/kqp/provider/yql_kikimr_settings.h b/ydb/core/kqp/provider/yql_kikimr_settings.h index c773ad299b..9ca375f3d5 100644 --- a/ydb/core/kqp/provider/yql_kikimr_settings.h +++ b/ydb/core/kqp/provider/yql_kikimr_settings.h @@ -63,6 +63,8 @@ struct TKikimrSettings { NCommon::TConfSetting<bool, false> OptEnableOlapProvideComputeSharding; NCommon::TConfSetting<bool, false> OptUseFinalizeByKey; NCommon::TConfSetting<bool, false> OptEnableCostBasedOptimization; + NCommon::TConfSetting<bool, false> OptEnableConstantFolding; + NCommon::TConfSetting<ui32, false> MaxDPccpDPTableSize; @@ -88,6 +90,8 @@ struct TKikimrSettings { bool HasOptEnableOlapProvideComputeSharding() const; bool HasOptUseFinalizeByKey() const; bool HasOptEnableCostBasedOptimization() const; + bool HasOptEnableConstantFolding() const; + EOptionalFlag GetOptPredicateExtract() const; EOptionalFlag GetUseLlvm() const; diff --git a/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp b/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp index 597767627a..2aad2e7654 100644 --- a/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp +++ b/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp @@ -83,6 +83,10 @@ static TKikimrRunner GetKikimrWithJoinSettings(){ setting.SetValue("true"); settings.push_back(setting); + setting.SetName("OptEnableConstantFolding"); + setting.SetValue("true"); + settings.push_back(setting); + return TKikimrRunner(settings); } @@ -351,6 +355,82 @@ Y_UNIT_TEST_SUITE(KqpJoinOrder) { Cout << result.GetPlan(); } } + + Y_UNIT_TEST(FiveWayJoinWithConstantFold) { + + auto kikimr = GetKikimrWithJoinSettings(); + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + CreateSampleTable(session); + + /* join with parameters */ + { + const TString query = Q_(R"( + SELECT * + FROM `/Root/R` as R + INNER JOIN + `/Root/S` as S + ON R.id = S.id + INNER JOIN + `/Root/T` as T + ON S.id = T.id + INNER JOIN + `/Root/U` as U + ON T.id = U.id + INNER JOIN + `/Root/V` as V + ON U.id = V.id + WHERE R.payload1 = 'bl' || 'ah' AND V.payload5 = 'blah' + )"); + + auto result = session.ExplainDataQuery(query).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); + + NJson::TJsonValue plan; + NJson::ReadJsonTree(result.GetPlan(), &plan, true); + Cout << result.GetPlan(); + } + } + + Y_UNIT_TEST(FiveWayJoinWithConstantFoldOpt) { + + auto kikimr = GetKikimrWithJoinSettings(); + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + CreateSampleTable(session); + + /* join with parameters */ + { + const TString query = Q_(R"( + SELECT * + FROM `/Root/R` as R + INNER JOIN + `/Root/S` as S + ON R.id = S.id + INNER JOIN + `/Root/T` as T + ON S.id = T.id + INNER JOIN + `/Root/U` as U + ON T.id = U.id + INNER JOIN + `/Root/V` as V + ON U.id = V.id + WHERE R.payload1 = 'bl' || Cast(1 as String?) AND V.payload5 = 'blah' + )"); + + auto result = session.ExplainDataQuery(query).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); + + NJson::TJsonValue plan; + NJson::ReadJsonTree(result.GetPlan(), &plan, true); + Cout << result.GetPlan(); + } + } } } diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.cpp b/ydb/library/yql/dq/opt/dq_opt_stat.cpp index db45e4f3e3..99516169b0 100644 --- a/ydb/library/yql/dq/opt/dq_opt_stat.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_stat.cpp @@ -28,8 +28,6 @@ void InferStatisticsForMapJoin(const TExprNode::TPtr& input, TTypeAnnotationCont return; } - YQL_CLOG(TRACE, CoreDq) << "Right side of the map join: " << rightArg.Raw()->Dump(); - TVector<TString> leftJoinKeys; TVector<TString> rightJoinKeys; |