summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlucius <[email protected]>2025-07-22 11:54:54 +0300
committerlucius <[email protected]>2025-07-22 12:08:10 +0300
commit483d11bf5a8df9991fc6c614908d8f5a750dcca8 (patch)
tree991c1e91cd32f494d9399268fccb1bda501f12ad
parentfd48d57a5e946aff8a6ff640107fe4d0645e8a8e (diff)
YQL-20197: fix FilterOverAggregate when predicate uses all fields
Оказалось что пушдаун предиката через Aggregate не работал когда в предикате используются все поля из Aggregate. Чиню commit_hash:c2e8cab88a0adb496464b9dac52807fcc95f433d
-rw-r--r--yql/essentials/core/common_opt/yql_co_flow2.cpp51
-rw-r--r--yql/essentials/tests/sql/minirun/part1/canondata/result.json14
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/result.json12
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_aggregate-yql-20197_/formatted.sql22
-rw-r--r--yql/essentials/tests/sql/suites/aggregate/yql-20197.sql22
5 files changed, 111 insertions, 10 deletions
diff --git a/yql/essentials/core/common_opt/yql_co_flow2.cpp b/yql/essentials/core/common_opt/yql_co_flow2.cpp
index 0c7a83ec9f5..5be47cf0975 100644
--- a/yql/essentials/core/common_opt/yql_co_flow2.cpp
+++ b/yql/essentials/core/common_opt/yql_co_flow2.cpp
@@ -1660,6 +1660,43 @@ ICalcualtor::TPtr BuildProgram(const TExprNode::TPtr& node, const TNodeMap<ESubg
return result;
}
+bool CanPushdownOverAggregate(
+ const TExprNode::TPtr& p,
+ const TExprNode::TPtr& arg,
+ const TOptimizeContext& optCtx,
+ const THashSet<TStringBuf>& keyColumns
+) {
+ if (IsNoPush(*p)) {
+ return false;
+ }
+
+ if (HasDependsOn(p, arg)) {
+ return false;
+ }
+
+ if (!p->IsComplete() && !IsStrict(p)) {
+ return false;
+ }
+
+ // Check used fields to ensure that predicate use only key columns from aggregation.
+ TSet<TStringBuf> usedFields;
+ // Predicate with HaveFieldsSubset()==true and any usedFields (including empty) can be used for pushdown (for example constant predicates can have empty usedFields).
+ if (!HaveFieldsSubset(p, *arg, usedFields, *optCtx.ParentsMap)) {
+ static const char optName[] = "FilterOverAggregateAllFields";
+ const bool canPushdownAll = IsOptimizerEnabled<optName>(*optCtx.Types) && !IsOptimizerDisabled<optName>(*optCtx.Types);
+ if (!canPushdownAll) {
+ return false;
+ }
+
+ // Predicate with HaveFieldsSubset()==false and non-empty usedFields also can be used for pushdown (all fields are used).
+ if (usedFields.empty()) {
+ return false;
+ }
+ }
+
+ return AllOf(usedFields, [&keyColumns] (TStringBuf field) { return keyColumns.contains(field); });
+}
+
TExprBase FilterOverAggregate(const TCoFlatMapBase& node, TExprContext& ctx, TOptimizeContext& optCtx) {
YQL_ENSURE(optCtx.ParentsMap);
if (!TCoConditionalValueBase::Match(node.Lambda().Body().Raw())) {
@@ -1682,18 +1719,12 @@ TExprBase FilterOverAggregate(const TCoFlatMapBase& node, TExprContext& ctx, TOp
TExprNodeList pushComponents;
TExprNodeList restComponents;
size_t separableComponents = 0;
- for (auto& p : andComponents) {
- TSet<TStringBuf> usedFields;
- if (IsNoPush(*p) ||
- HasDependsOn(p, arg.Ptr()) ||
- !HaveFieldsSubset(p, arg.Ref(), usedFields, *optCtx.ParentsMap) ||
- !AllOf(usedFields, [&](TStringBuf field) { return keyColumns.contains(field); }) ||
- !p->IsComplete() && !IsStrict(p))
- {
- restComponents.push_back(p);
- } else {
+ for (const auto& p : andComponents) {
+ if (CanPushdownOverAggregate(p, arg.Ptr(), optCtx, keyColumns)) {
pushComponents.push_back(p);
++separableComponents;
+ } else {
+ restComponents.push_back(p);
}
}
diff --git a/yql/essentials/tests/sql/minirun/part1/canondata/result.json b/yql/essentials/tests/sql/minirun/part1/canondata/result.json
index 404d7a82754..c2222bfd7e5 100644
--- a/yql/essentials/tests/sql/minirun/part1/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part1/canondata/result.json
@@ -181,6 +181,20 @@
"uri": "https://{canondata_backend}/1599023/38ec9754770d90a028f92de4e0d6fc9f0a72c9ef/resource.tar.gz#test.test_aggregate-listbuiltin_constness-default.txt-Results_/results.txt"
}
],
+ "test.test[aggregate-yql-20197-default.txt-Debug]": [
+ {
+ "checksum": "eb0f0e300d22ca671394b50f2fa343d3",
+ "size": 607,
+ "uri": "https://{canondata_backend}/1917492/13e0784e60be917c2c6c6d40e5017618fa1d2c5e/resource.tar.gz#test.test_aggregate-yql-20197-default.txt-Debug_/opt.yql"
+ }
+ ],
+ "test.test[aggregate-yql-20197-default.txt-Results]": [
+ {
+ "checksum": "86d710093fdd7e4c5d5a50ef473ee047",
+ "size": 1080,
+ "uri": "https://{canondata_backend}/1871002/a43a4af8f04899d43a39e11d0162607cc4a4897b/resource.tar.gz#test.test_aggregate-yql-20197-default.txt-Results_/results.txt"
+ }
+ ],
"test.test[bigdate-common_type-default.txt-Debug]": [
{
"checksum": "7d8249b6484a480738e702db45a5130c",
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json
index b4351ac41d7..2a2b2d5737e 100644
--- a/yql/essentials/tests/sql/sql2yql/canondata/result.json
+++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json
@@ -1042,6 +1042,13 @@
"uri": "https://{canondata_backend}/1925821/6494c0b47eb6d65247ddb7962a165f6a764c86f7/resource.tar.gz#test_sql2yql.test_aggregate-yql-20171_/sql.yql"
}
],
+ "test_sql2yql.test[aggregate-yql-20197]": [
+ {
+ "checksum": "bdc823fd53979146ce174fd73fc73cf2",
+ "size": 2241,
+ "uri": "https://{canondata_backend}/1942278/31c3c0763ef1cab14897810b757db9b69954e27f/resource.tar.gz#test_sql2yql.test_aggregate-yql-20197_/sql.yql"
+ }
+ ],
"test_sql2yql.test[ansi_idents-escaping]": [
{
"checksum": "4870ad0bb397aa5a3edad1f634eb6e93",
@@ -8598,6 +8605,11 @@
"uri": "file://test_sql_format.test_aggregate-yql-20171_/formatted.sql"
}
],
+ "test_sql_format.test[aggregate-yql-20197]": [
+ {
+ "uri": "file://test_sql_format.test_aggregate-yql-20197_/formatted.sql"
+ }
+ ],
"test_sql_format.test[ansi_idents-escaping]": [
{
"uri": "file://test_sql_format.test_ansi_idents-escaping_/formatted.sql"
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_aggregate-yql-20197_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_aggregate-yql-20197_/formatted.sql
new file mode 100644
index 00000000000..0ce76676370
--- /dev/null
+++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_aggregate-yql-20197_/formatted.sql
@@ -0,0 +1,22 @@
+PRAGMA config.flags('OptimizerFlags', 'FilterOverAggregateAllFields');
+
+SELECT
+ *
+FROM (
+ SELECT
+ m1,
+ m2,
+ FROM
+ AS_TABLE([
+ <|m1: 0, m2: 0|>,
+ <|m1: 0, m2: 1|>,
+ <|m1: 0, m2: 2|>,
+ <|m1: 1, m2: 1|>,
+ ])
+ GROUP BY
+ m1,
+ m2
+)
+WHERE
+ (m1 == 1 OR m2 == 1)
+;
diff --git a/yql/essentials/tests/sql/suites/aggregate/yql-20197.sql b/yql/essentials/tests/sql/suites/aggregate/yql-20197.sql
new file mode 100644
index 00000000000..d921d568702
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/aggregate/yql-20197.sql
@@ -0,0 +1,22 @@
+PRAGMA config.flags("OptimizerFlags", "FilterOverAggregateAllFields");
+
+SELECT
+ *
+FROM (
+ SELECT
+ m1,
+ m2,
+ FROM
+ AS_TABLE([
+ <|m1: 0, m2: 0|>,
+ <|m1: 0, m2: 1|>,
+ <|m1: 0, m2: 2|>,
+ <|m1: 1, m2: 1|>,
+ ])
+ GROUP BY
+ m1,
+ m2
+)
+WHERE
+ (m1 == 1 OR m2 == 1)
+;