aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPavel Velikhov <pavelvelikhov@ydb.tech>2025-04-23 17:20:52 +0300
committerGitHub <noreply@github.com>2025-04-23 14:20:52 +0000
commitb38bd0dd54eff96d609088e35af32106df3e5807 (patch)
tree84d68c59903b2250971fcd17c2329fbd730d3047
parent6f7b91184dfe0d146a630d91747ae549ee690e37 (diff)
downloadydb-b38bd0dd54eff96d609088e35af32106df3e5807.tar.gz
Expanded constant folding with UDFs (#17533)
Co-authored-by: Pavel Velikhov <pavelvelikhov@localhost.localdomain>
-rw-r--r--ydb/core/kqp/compile_service/kqp_compile_actor.cpp1
-rw-r--r--ydb/core/kqp/compile_service/kqp_compile_service.cpp2
-rw-r--r--ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp8
-rw-r--r--ydb/core/kqp/provider/yql_kikimr_settings.h1
-rw-r--r--ydb/core/kqp/ut/join/data/queries/udf_constant_fold.sql3
-rw-r--r--ydb/core/kqp/ut/join/kqp_join_order_ut.cpp4
-rw-r--r--ydb/core/protos/table_service_config.proto3
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_stat.cpp57
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_stat.h2
-rw-r--r--ydb/tests/functional/canonical/canondata/test_sql.TestCanonicalFolder1.test_case_index_topsort_index_with_selector_aliases_2.sql-plan_/index_topsort_index_with_selector_aliases_2.sql.plan2
-rw-r--r--ydb/tests/functional/canonical/canondata/test_sql.TestCanonicalFolder1.test_case_pk_predicate_pk_predicate_random_chars_ranges.sql-plan_/pk_predicate_pk_predicate_random_chars_ranges.sql.plan2
11 files changed, 73 insertions, 12 deletions
diff --git a/ydb/core/kqp/compile_service/kqp_compile_actor.cpp b/ydb/core/kqp/compile_service/kqp_compile_actor.cpp
index 9d6f79f1550..733fda15c25 100644
--- a/ydb/core/kqp/compile_service/kqp_compile_actor.cpp
+++ b/ydb/core/kqp/compile_service/kqp_compile_actor.cpp
@@ -649,6 +649,7 @@ void ApplyServiceConfig(TKikimrConfiguration& kqpConfig, const TTableServiceConf
kqpConfig.DefaultCostBasedOptimizationLevel = serviceConfig.GetDefaultCostBasedOptimizationLevel();
kqpConfig.DefaultEnableShuffleElimination = serviceConfig.GetDefaultEnableShuffleElimination();
kqpConfig.EnableConstantFolding = serviceConfig.GetEnableConstantFolding();
+ kqpConfig.EnableFoldUdfs = serviceConfig.GetEnableFoldUdfs();
kqpConfig.SetDefaultEnabledSpillingNodes(serviceConfig.GetEnableSpillingNodes());
kqpConfig.EnableSpilling = serviceConfig.GetEnableQueryServiceSpilling();
kqpConfig.EnableSnapshotIsolationRW = serviceConfig.GetEnableSnapshotIsolationRW();
diff --git a/ydb/core/kqp/compile_service/kqp_compile_service.cpp b/ydb/core/kqp/compile_service/kqp_compile_service.cpp
index 5dd8fe3c1db..7f68d871a8b 100644
--- a/ydb/core/kqp/compile_service/kqp_compile_service.cpp
+++ b/ydb/core/kqp/compile_service/kqp_compile_service.cpp
@@ -309,6 +309,7 @@ private:
ui64 defaultCostBasedOptimizationLevel = TableServiceConfig.GetDefaultCostBasedOptimizationLevel();
bool enableConstantFolding = TableServiceConfig.GetEnableConstantFolding();
+ bool enableFoldUdfs = TableServiceConfig.GetEnableFoldUdfs();
bool defaultEnableShuffleElimination = TableServiceConfig.GetDefaultEnableShuffleElimination();
@@ -341,6 +342,7 @@ private:
TableServiceConfig.GetEnableSpillingNodes() != enableSpillingNodes ||
TableServiceConfig.GetDefaultCostBasedOptimizationLevel() != defaultCostBasedOptimizationLevel ||
TableServiceConfig.GetEnableConstantFolding() != enableConstantFolding ||
+ TableServiceConfig.GetEnableFoldUdfs() != enableFoldUdfs ||
TableServiceConfig.GetEnableAstCache() != enableAstCache ||
TableServiceConfig.GetEnableImplicitQueryParameterTypes() != enableImplicitQueryParameterTypes ||
TableServiceConfig.GetEnablePgConstsToParams() != enablePgConstsToParams ||
diff --git a/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp b/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp
index 8e597d9b56d..2d6e254c666 100644
--- a/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp
+++ b/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp
@@ -14,7 +14,7 @@ namespace {
* Traverse a lambda and create a mapping from nodes to nodes wrapped in EvaluateExpr callable
* We check for literals specifically, since they shouldn't be evaluated
*/
- void ExtractConstantExprs(const TExprNode::TPtr& input, TNodeOnNodeOwnedMap& replaces, TExprContext& ctx) {
+ void ExtractConstantExprs(const TExprNode::TPtr& input, TNodeOnNodeOwnedMap& replaces, TExprContext& ctx, bool foldUdfs = true) {
if (TCoLambda::Match(input.Get())) {
auto lambda = TExprBase(input).Cast<TCoLambda>();
return ExtractConstantExprs(lambda.Body().Ptr(), replaces, ctx);
@@ -24,7 +24,7 @@ namespace {
return;
}
- if (IsConstantExpr(input) && !input->IsCallable("PgConst")) {
+ if (IsConstantExpr(input, foldUdfs) && !input->IsCallable("PgConst")) {
TNodeOnNodeOwnedMap deepClones;
auto inputClone = ctx.DeepCopy(*input, ctx, deepClones, false, true, true);
@@ -64,6 +64,8 @@ IGraphTransformer::TStatus TKqpConstantFoldingTransformer::DoTransform(TExprNode
return IGraphTransformer::TStatus::Ok;
}
+ bool foldUdfs = Config->EnableFoldUdfs;
+
TNodeOnNodeOwnedMap replaces;
VisitExpr(input, [&](const TExprNode::TPtr& node) {
@@ -78,7 +80,7 @@ IGraphTransformer::TStatus TKqpConstantFoldingTransformer::DoTransform(TExprNode
return true;
}
- ExtractConstantExprs(flatmap.Lambda().Body().Ptr(), replaces, ctx);
+ ExtractConstantExprs(flatmap.Lambda().Body().Ptr(), replaces, ctx, foldUdfs);
return replaces.empty();
}
diff --git a/ydb/core/kqp/provider/yql_kikimr_settings.h b/ydb/core/kqp/provider/yql_kikimr_settings.h
index 4e3483282f7..e88d6f3143d 100644
--- a/ydb/core/kqp/provider/yql_kikimr_settings.h
+++ b/ydb/core/kqp/provider/yql_kikimr_settings.h
@@ -177,6 +177,7 @@ struct TKikimrConfiguration : public TKikimrSettings, public NCommon::TSettingDi
bool EnableSpilling = true;
ui32 DefaultCostBasedOptimizationLevel = 4;
bool EnableConstantFolding = true;
+ bool EnableFoldUdfs = true;
ui64 DefaultEnableSpillingNodes = 0;
bool EnableAntlr4Parser = false;
bool EnableSnapshotIsolationRW = false;
diff --git a/ydb/core/kqp/ut/join/data/queries/udf_constant_fold.sql b/ydb/core/kqp/ut/join/data/queries/udf_constant_fold.sql
new file mode 100644
index 00000000000..7755475e878
--- /dev/null
+++ b/ydb/core/kqp/ut/join/data/queries/udf_constant_fold.sql
@@ -0,0 +1,3 @@
+SELECT *
+FROM `/Root/S` as S
+WHERE S.payload2 = String::HexDecode("54");
diff --git a/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp b/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp
index 307e0b4dcfd..d33a6c23789 100644
--- a/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp
+++ b/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp
@@ -683,6 +683,10 @@ Y_UNIT_TEST_SUITE(KqpJoinOrder) {
ExecuteJoinOrderTestGenericQueryWithStats("queries/datetime_constant_fold.sql", "stats/basic.json", false, ColumnStore);
}
+ Y_UNIT_TEST_TWIN(UdfConstantFold, ColumnStore) {
+ ExecuteJoinOrderTestGenericQueryWithStats("queries/udf_constant_fold.sql", "stats/basic.json", false, ColumnStore);
+ }
+
Y_UNIT_TEST_TWIN(TPCHRandomJoinViewJustWorks, ColumnStore) {
ExecuteJoinOrderTestGenericQueryWithStats("queries/tpch_random_join_view_just_works.sql", "stats/tpch1000s.json", false, ColumnStore);
}
diff --git a/ydb/core/protos/table_service_config.proto b/ydb/core/protos/table_service_config.proto
index 6d1582e5d64..ab5613c1b7a 100644
--- a/ydb/core/protos/table_service_config.proto
+++ b/ydb/core/protos/table_service_config.proto
@@ -381,4 +381,7 @@ message TTableServiceConfig {
}
optional TBatchOperationSettings BatchOperationSettings = 81;
+
+ optional bool EnableFoldUdfs = 82 [ default = true ];
+
};
diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.cpp b/ydb/library/yql/dq/opt/dq_opt_stat.cpp
index 57ff8a108e6..f72fd7d817f 100644
--- a/ydb/library/yql/dq/opt/dq_opt_stat.cpp
+++ b/ydb/library/yql/dq/opt/dq_opt_stat.cpp
@@ -15,13 +15,50 @@ namespace {
* We maintain a white list of callables that we consider part of constant expressions
* All other callables will not be evaluated
*/
- THashSet<TString> constantFoldingWhiteList = {
+ THashSet<TString> ConstantFoldingWhiteList = {
"Concat", "Just", "Optional", "SafeCast", "AsList",
"+", "-", "*", "/", "%"};
- THashSet<TString> pgConstantFoldingWhiteList = {
+ THashSet<TString> PgConstantFoldingWhiteList = {
"PgResolvedOp", "PgResolvedCall", "PgCast", "PgConst", "PgArray", "PgType"};
+ TVector<TString> UdfBlackList = {
+ "RandomNumber",
+ "Random",
+ "RandomUuid",
+ "Now",
+ "CurrentUtcDate",
+ "CurrentUtcDatetime",
+ "CurrentUtcTimestamp"
+ };
+
+ bool IsConstantUdf(const TExprNode::TPtr& input, bool withParams = false) {
+ if (!TCoApply::Match(input.Get())) {
+ return false;
+ }
+
+ if (input->ChildrenSize()!=2) {
+ return false;
+ }
+ if (input->Child(0)->IsCallable("Udf")) {
+ auto udf = TCoUdf(input->Child(0));
+ auto udfName = udf.MethodName().StringValue();
+
+ for (auto blck : UdfBlackList) {
+ if (udfName.find(blck) != TString::npos) {
+ return false;
+ }
+ }
+
+ if (withParams) {
+ return IsConstantExprWithParams(input->Child(1));
+ }
+ else {
+ return IsConstantExpr(input->Child(1));
+ }
+ }
+ return false;
+ }
TString RemoveAliases(TString attributeName) {
if (auto idx = attributeName.find('.'); idx != TString::npos) {
@@ -167,7 +204,7 @@ bool IsConstantExprPg(const TExprNode::TPtr& input) {
return true;
}
- if (input->IsCallable(pgConstantFoldingWhiteList) || input->IsList()) {
+ if (input->IsCallable(PgConstantFoldingWhiteList) || input->IsList()) {
for (size_t i = 0; i < input->ChildrenSize(); i++) {
auto callableInput = input->Child(i);
if (callableInput->IsLambda() && !IsConstantExprPg(callableInput->Child(1))) {
@@ -190,7 +227,7 @@ bool IsConstantExprPg(const TExprNode::TPtr& input) {
* - If its a callable in the while list and all children are constant expressions, then its a constant expression
* - If one of the child is a type expression, it also passes the check
*/
-bool IsConstantExpr(const TExprNode::TPtr& input) {
+bool IsConstantExpr(const TExprNode::TPtr& input, bool foldUdfs) {
if (input->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Pg) {
return IsConstantExprPg(input);
}
@@ -203,7 +240,7 @@ bool IsConstantExpr(const TExprNode::TPtr& input) {
return true;
}
- else if (input->IsCallable(constantFoldingWhiteList)) {
+ else if (input->IsCallable(ConstantFoldingWhiteList)) {
for (size_t i = 0; i < input->ChildrenSize(); i++) {
auto callableInput = input->Child(i);
if (callableInput->GetTypeAnn()->GetKind() != ETypeAnnotationKind::Type && !IsConstantExpr(callableInput)) {
@@ -213,6 +250,10 @@ bool IsConstantExpr(const TExprNode::TPtr& input) {
return true;
}
+ else if (foldUdfs && TCoApply::Match(input.Get()) && IsConstantUdf(input)) {
+ return true;
+ }
+
return false;
}
@@ -233,7 +274,7 @@ bool IsConstantExprWithParams(const TExprNode::TPtr& input) {
return true;
}
- else if (input->IsCallable(constantFoldingWhiteList)) {
+ else if (input->IsCallable(ConstantFoldingWhiteList)) {
for (size_t i = 0; i < input->ChildrenSize(); i++) {
auto callableInput = input->Child(i);
if (callableInput->GetTypeAnn()->GetKind() != ETypeAnnotationKind::Type && !IsConstantExprWithParams(callableInput)) {
@@ -243,6 +284,10 @@ bool IsConstantExprWithParams(const TExprNode::TPtr& input) {
return true;
}
+ else if (TCoApply::Match(input.Get()) && IsConstantUdf(input, true)) {
+ return true;
+ }
+
return false;
}
diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.h b/ydb/library/yql/dq/opt/dq_opt_stat.h
index d8ded84ae05..9a4695807db 100644
--- a/ydb/library/yql/dq/opt/dq_opt_stat.h
+++ b/ydb/library/yql/dq/opt/dq_opt_stat.h
@@ -99,7 +99,7 @@ private:
};
bool NeedCalc(NNodes::TExprBase node);
-bool IsConstantExpr(const TExprNode::TPtr& input);
+bool IsConstantExpr(const TExprNode::TPtr& input, bool foldUdfs = true);
bool IsConstantExprWithParams(const TExprNode::TPtr& input);
} // namespace NYql::NDq {
diff --git a/ydb/tests/functional/canonical/canondata/test_sql.TestCanonicalFolder1.test_case_index_topsort_index_with_selector_aliases_2.sql-plan_/index_topsort_index_with_selector_aliases_2.sql.plan b/ydb/tests/functional/canonical/canondata/test_sql.TestCanonicalFolder1.test_case_index_topsort_index_with_selector_aliases_2.sql-plan_/index_topsort_index_with_selector_aliases_2.sql.plan
index a56404b2a56..1adb13d0866 100644
--- a/ydb/tests/functional/canonical/canondata/test_sql.TestCanonicalFolder1.test_case_index_topsort_index_with_selector_aliases_2.sql-plan_/index_topsort_index_with_selector_aliases_2.sql.plan
+++ b/ydb/tests/functional/canonical/canondata/test_sql.TestCanonicalFolder1.test_case_index_topsort_index_with_selector_aliases_2.sql-plan_/index_topsort_index_with_selector_aliases_2.sql.plan
@@ -16,7 +16,7 @@
"limit": "1000",
"lookup_by": [
"hashOrderDate (9983033094796217818)",
- "orderDate (\u00ab2023-03-01\u00bb)"
+ "orderDate (2023-03-01)"
],
"scan_by": [
"orderId (-\u221e, +\u221e)"
diff --git a/ydb/tests/functional/canonical/canondata/test_sql.TestCanonicalFolder1.test_case_pk_predicate_pk_predicate_random_chars_ranges.sql-plan_/pk_predicate_pk_predicate_random_chars_ranges.sql.plan b/ydb/tests/functional/canonical/canondata/test_sql.TestCanonicalFolder1.test_case_pk_predicate_pk_predicate_random_chars_ranges.sql-plan_/pk_predicate_pk_predicate_random_chars_ranges.sql.plan
index 08df07dc2a2..4b03ddfed0e 100644
--- a/ydb/tests/functional/canonical/canondata/test_sql.TestCanonicalFolder1.test_case_pk_predicate_pk_predicate_random_chars_ranges.sql-plan_/pk_predicate_pk_predicate_random_chars_ranges.sql.plan
+++ b/ydb/tests/functional/canonical/canondata/test_sql.TestCanonicalFolder1.test_case_pk_predicate_pk_predicate_random_chars_ranges.sql-plan_/pk_predicate_pk_predicate_random_chars_ranges.sql.plan
@@ -14,7 +14,7 @@
],
"limit": "1001",
"scan_by": [
- "HashPassword (\u00ab\u00bb, Non-UTF8 string)"
+ "HashPassword (null, Non-UTF8 string)"
],
"type": "Scan"
}