diff options
author | Pavel Velikhov <pavelvelikhov@ydb.tech> | 2025-04-23 17:20:52 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-04-23 14:20:52 +0000 |
commit | b38bd0dd54eff96d609088e35af32106df3e5807 (patch) | |
tree | 84d68c59903b2250971fcd17c2329fbd730d3047 | |
parent | 6f7b91184dfe0d146a630d91747ae549ee690e37 (diff) | |
download | ydb-b38bd0dd54eff96d609088e35af32106df3e5807.tar.gz |
Expanded constant folding with UDFs (#17533)
Co-authored-by: Pavel Velikhov <pavelvelikhov@localhost.localdomain>
11 files changed, 73 insertions, 12 deletions
diff --git a/ydb/core/kqp/compile_service/kqp_compile_actor.cpp b/ydb/core/kqp/compile_service/kqp_compile_actor.cpp index 9d6f79f1550..733fda15c25 100644 --- a/ydb/core/kqp/compile_service/kqp_compile_actor.cpp +++ b/ydb/core/kqp/compile_service/kqp_compile_actor.cpp @@ -649,6 +649,7 @@ void ApplyServiceConfig(TKikimrConfiguration& kqpConfig, const TTableServiceConf kqpConfig.DefaultCostBasedOptimizationLevel = serviceConfig.GetDefaultCostBasedOptimizationLevel(); kqpConfig.DefaultEnableShuffleElimination = serviceConfig.GetDefaultEnableShuffleElimination(); kqpConfig.EnableConstantFolding = serviceConfig.GetEnableConstantFolding(); + kqpConfig.EnableFoldUdfs = serviceConfig.GetEnableFoldUdfs(); kqpConfig.SetDefaultEnabledSpillingNodes(serviceConfig.GetEnableSpillingNodes()); kqpConfig.EnableSpilling = serviceConfig.GetEnableQueryServiceSpilling(); kqpConfig.EnableSnapshotIsolationRW = serviceConfig.GetEnableSnapshotIsolationRW(); diff --git a/ydb/core/kqp/compile_service/kqp_compile_service.cpp b/ydb/core/kqp/compile_service/kqp_compile_service.cpp index 5dd8fe3c1db..7f68d871a8b 100644 --- a/ydb/core/kqp/compile_service/kqp_compile_service.cpp +++ b/ydb/core/kqp/compile_service/kqp_compile_service.cpp @@ -309,6 +309,7 @@ private: ui64 defaultCostBasedOptimizationLevel = TableServiceConfig.GetDefaultCostBasedOptimizationLevel(); bool enableConstantFolding = TableServiceConfig.GetEnableConstantFolding(); + bool enableFoldUdfs = TableServiceConfig.GetEnableFoldUdfs(); bool defaultEnableShuffleElimination = TableServiceConfig.GetDefaultEnableShuffleElimination(); @@ -341,6 +342,7 @@ private: TableServiceConfig.GetEnableSpillingNodes() != enableSpillingNodes || TableServiceConfig.GetDefaultCostBasedOptimizationLevel() != defaultCostBasedOptimizationLevel || TableServiceConfig.GetEnableConstantFolding() != enableConstantFolding || + TableServiceConfig.GetEnableFoldUdfs() != enableFoldUdfs || TableServiceConfig.GetEnableAstCache() != enableAstCache || TableServiceConfig.GetEnableImplicitQueryParameterTypes() != enableImplicitQueryParameterTypes || TableServiceConfig.GetEnablePgConstsToParams() != enablePgConstsToParams || diff --git a/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp b/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp index 8e597d9b56d..2d6e254c666 100644 --- a/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp +++ b/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp @@ -14,7 +14,7 @@ namespace { * Traverse a lambda and create a mapping from nodes to nodes wrapped in EvaluateExpr callable * We check for literals specifically, since they shouldn't be evaluated */ - void ExtractConstantExprs(const TExprNode::TPtr& input, TNodeOnNodeOwnedMap& replaces, TExprContext& ctx) { + void ExtractConstantExprs(const TExprNode::TPtr& input, TNodeOnNodeOwnedMap& replaces, TExprContext& ctx, bool foldUdfs = true) { if (TCoLambda::Match(input.Get())) { auto lambda = TExprBase(input).Cast<TCoLambda>(); return ExtractConstantExprs(lambda.Body().Ptr(), replaces, ctx); @@ -24,7 +24,7 @@ namespace { return; } - if (IsConstantExpr(input) && !input->IsCallable("PgConst")) { + if (IsConstantExpr(input, foldUdfs) && !input->IsCallable("PgConst")) { TNodeOnNodeOwnedMap deepClones; auto inputClone = ctx.DeepCopy(*input, ctx, deepClones, false, true, true); @@ -64,6 +64,8 @@ IGraphTransformer::TStatus TKqpConstantFoldingTransformer::DoTransform(TExprNode return IGraphTransformer::TStatus::Ok; } + bool foldUdfs = Config->EnableFoldUdfs; + TNodeOnNodeOwnedMap replaces; VisitExpr(input, [&](const TExprNode::TPtr& node) { @@ -78,7 +80,7 @@ IGraphTransformer::TStatus TKqpConstantFoldingTransformer::DoTransform(TExprNode return true; } - ExtractConstantExprs(flatmap.Lambda().Body().Ptr(), replaces, ctx); + ExtractConstantExprs(flatmap.Lambda().Body().Ptr(), replaces, ctx, foldUdfs); return replaces.empty(); } diff --git a/ydb/core/kqp/provider/yql_kikimr_settings.h b/ydb/core/kqp/provider/yql_kikimr_settings.h index 4e3483282f7..e88d6f3143d 100644 --- a/ydb/core/kqp/provider/yql_kikimr_settings.h +++ b/ydb/core/kqp/provider/yql_kikimr_settings.h @@ -177,6 +177,7 @@ struct TKikimrConfiguration : public TKikimrSettings, public NCommon::TSettingDi bool EnableSpilling = true; ui32 DefaultCostBasedOptimizationLevel = 4; bool EnableConstantFolding = true; + bool EnableFoldUdfs = true; ui64 DefaultEnableSpillingNodes = 0; bool EnableAntlr4Parser = false; bool EnableSnapshotIsolationRW = false; diff --git a/ydb/core/kqp/ut/join/data/queries/udf_constant_fold.sql b/ydb/core/kqp/ut/join/data/queries/udf_constant_fold.sql new file mode 100644 index 00000000000..7755475e878 --- /dev/null +++ b/ydb/core/kqp/ut/join/data/queries/udf_constant_fold.sql @@ -0,0 +1,3 @@ +SELECT * +FROM `/Root/S` as S +WHERE S.payload2 = String::HexDecode("54"); diff --git a/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp b/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp index 307e0b4dcfd..d33a6c23789 100644 --- a/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp +++ b/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp @@ -683,6 +683,10 @@ Y_UNIT_TEST_SUITE(KqpJoinOrder) { ExecuteJoinOrderTestGenericQueryWithStats("queries/datetime_constant_fold.sql", "stats/basic.json", false, ColumnStore); } + Y_UNIT_TEST_TWIN(UdfConstantFold, ColumnStore) { + ExecuteJoinOrderTestGenericQueryWithStats("queries/udf_constant_fold.sql", "stats/basic.json", false, ColumnStore); + } + Y_UNIT_TEST_TWIN(TPCHRandomJoinViewJustWorks, ColumnStore) { ExecuteJoinOrderTestGenericQueryWithStats("queries/tpch_random_join_view_just_works.sql", "stats/tpch1000s.json", false, ColumnStore); } diff --git a/ydb/core/protos/table_service_config.proto b/ydb/core/protos/table_service_config.proto index 6d1582e5d64..ab5613c1b7a 100644 --- a/ydb/core/protos/table_service_config.proto +++ b/ydb/core/protos/table_service_config.proto @@ -381,4 +381,7 @@ message TTableServiceConfig { } optional TBatchOperationSettings BatchOperationSettings = 81; + + optional bool EnableFoldUdfs = 82 [ default = true ]; + }; diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.cpp b/ydb/library/yql/dq/opt/dq_opt_stat.cpp index 57ff8a108e6..f72fd7d817f 100644 --- a/ydb/library/yql/dq/opt/dq_opt_stat.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_stat.cpp @@ -15,13 +15,50 @@ namespace { * We maintain a white list of callables that we consider part of constant expressions * All other callables will not be evaluated */ - THashSet<TString> constantFoldingWhiteList = { + THashSet<TString> ConstantFoldingWhiteList = { "Concat", "Just", "Optional", "SafeCast", "AsList", "+", "-", "*", "/", "%"}; - THashSet<TString> pgConstantFoldingWhiteList = { + THashSet<TString> PgConstantFoldingWhiteList = { "PgResolvedOp", "PgResolvedCall", "PgCast", "PgConst", "PgArray", "PgType"}; + TVector<TString> UdfBlackList = { + "RandomNumber", + "Random", + "RandomUuid", + "Now", + "CurrentUtcDate", + "CurrentUtcDatetime", + "CurrentUtcTimestamp" + }; + + bool IsConstantUdf(const TExprNode::TPtr& input, bool withParams = false) { + if (!TCoApply::Match(input.Get())) { + return false; + } + + if (input->ChildrenSize()!=2) { + return false; + } + if (input->Child(0)->IsCallable("Udf")) { + auto udf = TCoUdf(input->Child(0)); + auto udfName = udf.MethodName().StringValue(); + + for (auto blck : UdfBlackList) { + if (udfName.find(blck) != TString::npos) { + return false; + } + } + + if (withParams) { + return IsConstantExprWithParams(input->Child(1)); + } + else { + return IsConstantExpr(input->Child(1)); + } + } + return false; + } TString RemoveAliases(TString attributeName) { if (auto idx = attributeName.find('.'); idx != TString::npos) { @@ -167,7 +204,7 @@ bool IsConstantExprPg(const TExprNode::TPtr& input) { return true; } - if (input->IsCallable(pgConstantFoldingWhiteList) || input->IsList()) { + if (input->IsCallable(PgConstantFoldingWhiteList) || input->IsList()) { for (size_t i = 0; i < input->ChildrenSize(); i++) { auto callableInput = input->Child(i); if (callableInput->IsLambda() && !IsConstantExprPg(callableInput->Child(1))) { @@ -190,7 +227,7 @@ bool IsConstantExprPg(const TExprNode::TPtr& input) { * - If its a callable in the while list and all children are constant expressions, then its a constant expression * - If one of the child is a type expression, it also passes the check */ -bool IsConstantExpr(const TExprNode::TPtr& input) { +bool IsConstantExpr(const TExprNode::TPtr& input, bool foldUdfs) { if (input->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Pg) { return IsConstantExprPg(input); } @@ -203,7 +240,7 @@ bool IsConstantExpr(const TExprNode::TPtr& input) { return true; } - else if (input->IsCallable(constantFoldingWhiteList)) { + else if (input->IsCallable(ConstantFoldingWhiteList)) { for (size_t i = 0; i < input->ChildrenSize(); i++) { auto callableInput = input->Child(i); if (callableInput->GetTypeAnn()->GetKind() != ETypeAnnotationKind::Type && !IsConstantExpr(callableInput)) { @@ -213,6 +250,10 @@ bool IsConstantExpr(const TExprNode::TPtr& input) { return true; } + else if (foldUdfs && TCoApply::Match(input.Get()) && IsConstantUdf(input)) { + return true; + } + return false; } @@ -233,7 +274,7 @@ bool IsConstantExprWithParams(const TExprNode::TPtr& input) { return true; } - else if (input->IsCallable(constantFoldingWhiteList)) { + else if (input->IsCallable(ConstantFoldingWhiteList)) { for (size_t i = 0; i < input->ChildrenSize(); i++) { auto callableInput = input->Child(i); if (callableInput->GetTypeAnn()->GetKind() != ETypeAnnotationKind::Type && !IsConstantExprWithParams(callableInput)) { @@ -243,6 +284,10 @@ bool IsConstantExprWithParams(const TExprNode::TPtr& input) { return true; } + else if (TCoApply::Match(input.Get()) && IsConstantUdf(input, true)) { + return true; + } + return false; } diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.h b/ydb/library/yql/dq/opt/dq_opt_stat.h index d8ded84ae05..9a4695807db 100644 --- a/ydb/library/yql/dq/opt/dq_opt_stat.h +++ b/ydb/library/yql/dq/opt/dq_opt_stat.h @@ -99,7 +99,7 @@ private: }; bool NeedCalc(NNodes::TExprBase node); -bool IsConstantExpr(const TExprNode::TPtr& input); +bool IsConstantExpr(const TExprNode::TPtr& input, bool foldUdfs = true); bool IsConstantExprWithParams(const TExprNode::TPtr& input); } // namespace NYql::NDq { diff --git a/ydb/tests/functional/canonical/canondata/test_sql.TestCanonicalFolder1.test_case_index_topsort_index_with_selector_aliases_2.sql-plan_/index_topsort_index_with_selector_aliases_2.sql.plan b/ydb/tests/functional/canonical/canondata/test_sql.TestCanonicalFolder1.test_case_index_topsort_index_with_selector_aliases_2.sql-plan_/index_topsort_index_with_selector_aliases_2.sql.plan index a56404b2a56..1adb13d0866 100644 --- a/ydb/tests/functional/canonical/canondata/test_sql.TestCanonicalFolder1.test_case_index_topsort_index_with_selector_aliases_2.sql-plan_/index_topsort_index_with_selector_aliases_2.sql.plan +++ b/ydb/tests/functional/canonical/canondata/test_sql.TestCanonicalFolder1.test_case_index_topsort_index_with_selector_aliases_2.sql-plan_/index_topsort_index_with_selector_aliases_2.sql.plan @@ -16,7 +16,7 @@ "limit": "1000", "lookup_by": [ "hashOrderDate (9983033094796217818)", - "orderDate (\u00ab2023-03-01\u00bb)" + "orderDate (2023-03-01)" ], "scan_by": [ "orderId (-\u221e, +\u221e)" diff --git a/ydb/tests/functional/canonical/canondata/test_sql.TestCanonicalFolder1.test_case_pk_predicate_pk_predicate_random_chars_ranges.sql-plan_/pk_predicate_pk_predicate_random_chars_ranges.sql.plan b/ydb/tests/functional/canonical/canondata/test_sql.TestCanonicalFolder1.test_case_pk_predicate_pk_predicate_random_chars_ranges.sql-plan_/pk_predicate_pk_predicate_random_chars_ranges.sql.plan index 08df07dc2a2..4b03ddfed0e 100644 --- a/ydb/tests/functional/canonical/canondata/test_sql.TestCanonicalFolder1.test_case_pk_predicate_pk_predicate_random_chars_ranges.sql-plan_/pk_predicate_pk_predicate_random_chars_ranges.sql.plan +++ b/ydb/tests/functional/canonical/canondata/test_sql.TestCanonicalFolder1.test_case_pk_predicate_pk_predicate_random_chars_ranges.sql-plan_/pk_predicate_pk_predicate_random_chars_ranges.sql.plan @@ -14,7 +14,7 @@ ], "limit": "1001", "scan_by": [ - "HashPassword (\u00ab\u00bb, Non-UTF8 string)" + "HashPassword (null, Non-UTF8 string)" ], "type": "Scan" } |