diff options
| author | lucius <[email protected]> | 2025-07-22 11:54:54 +0300 |
|---|---|---|
| committer | lucius <[email protected]> | 2025-07-22 12:08:10 +0300 |
| commit | 483d11bf5a8df9991fc6c614908d8f5a750dcca8 (patch) | |
| tree | 991c1e91cd32f494d9399268fccb1bda501f12ad | |
| parent | fd48d57a5e946aff8a6ff640107fe4d0645e8a8e (diff) | |
YQL-20197: fix FilterOverAggregate when predicate uses all fields
Оказалось что пушдаун предиката через Aggregate не работал когда в предикате используются все поля из Aggregate. Чиню
commit_hash:c2e8cab88a0adb496464b9dac52807fcc95f433d
5 files changed, 111 insertions, 10 deletions
diff --git a/yql/essentials/core/common_opt/yql_co_flow2.cpp b/yql/essentials/core/common_opt/yql_co_flow2.cpp index 0c7a83ec9f5..5be47cf0975 100644 --- a/yql/essentials/core/common_opt/yql_co_flow2.cpp +++ b/yql/essentials/core/common_opt/yql_co_flow2.cpp @@ -1660,6 +1660,43 @@ ICalcualtor::TPtr BuildProgram(const TExprNode::TPtr& node, const TNodeMap<ESubg return result; } +bool CanPushdownOverAggregate( + const TExprNode::TPtr& p, + const TExprNode::TPtr& arg, + const TOptimizeContext& optCtx, + const THashSet<TStringBuf>& keyColumns +) { + if (IsNoPush(*p)) { + return false; + } + + if (HasDependsOn(p, arg)) { + return false; + } + + if (!p->IsComplete() && !IsStrict(p)) { + return false; + } + + // Check used fields to ensure that predicate use only key columns from aggregation. + TSet<TStringBuf> usedFields; + // Predicate with HaveFieldsSubset()==true and any usedFields (including empty) can be used for pushdown (for example constant predicates can have empty usedFields). + if (!HaveFieldsSubset(p, *arg, usedFields, *optCtx.ParentsMap)) { + static const char optName[] = "FilterOverAggregateAllFields"; + const bool canPushdownAll = IsOptimizerEnabled<optName>(*optCtx.Types) && !IsOptimizerDisabled<optName>(*optCtx.Types); + if (!canPushdownAll) { + return false; + } + + // Predicate with HaveFieldsSubset()==false and non-empty usedFields also can be used for pushdown (all fields are used). + if (usedFields.empty()) { + return false; + } + } + + return AllOf(usedFields, [&keyColumns] (TStringBuf field) { return keyColumns.contains(field); }); +} + TExprBase FilterOverAggregate(const TCoFlatMapBase& node, TExprContext& ctx, TOptimizeContext& optCtx) { YQL_ENSURE(optCtx.ParentsMap); if (!TCoConditionalValueBase::Match(node.Lambda().Body().Raw())) { @@ -1682,18 +1719,12 @@ TExprBase FilterOverAggregate(const TCoFlatMapBase& node, TExprContext& ctx, TOp TExprNodeList pushComponents; TExprNodeList restComponents; size_t separableComponents = 0; - for (auto& p : andComponents) { - TSet<TStringBuf> usedFields; - if (IsNoPush(*p) || - HasDependsOn(p, arg.Ptr()) || - !HaveFieldsSubset(p, arg.Ref(), usedFields, *optCtx.ParentsMap) || - !AllOf(usedFields, [&](TStringBuf field) { return keyColumns.contains(field); }) || - !p->IsComplete() && !IsStrict(p)) - { - restComponents.push_back(p); - } else { + for (const auto& p : andComponents) { + if (CanPushdownOverAggregate(p, arg.Ptr(), optCtx, keyColumns)) { pushComponents.push_back(p); ++separableComponents; + } else { + restComponents.push_back(p); } } diff --git a/yql/essentials/tests/sql/minirun/part1/canondata/result.json b/yql/essentials/tests/sql/minirun/part1/canondata/result.json index 404d7a82754..c2222bfd7e5 100644 --- a/yql/essentials/tests/sql/minirun/part1/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part1/canondata/result.json @@ -181,6 +181,20 @@ "uri": "https://{canondata_backend}/1599023/38ec9754770d90a028f92de4e0d6fc9f0a72c9ef/resource.tar.gz#test.test_aggregate-listbuiltin_constness-default.txt-Results_/results.txt" } ], + "test.test[aggregate-yql-20197-default.txt-Debug]": [ + { + "checksum": "eb0f0e300d22ca671394b50f2fa343d3", + "size": 607, + "uri": "https://{canondata_backend}/1917492/13e0784e60be917c2c6c6d40e5017618fa1d2c5e/resource.tar.gz#test.test_aggregate-yql-20197-default.txt-Debug_/opt.yql" + } + ], + "test.test[aggregate-yql-20197-default.txt-Results]": [ + { + "checksum": "86d710093fdd7e4c5d5a50ef473ee047", + "size": 1080, + "uri": "https://{canondata_backend}/1871002/a43a4af8f04899d43a39e11d0162607cc4a4897b/resource.tar.gz#test.test_aggregate-yql-20197-default.txt-Results_/results.txt" + } + ], "test.test[bigdate-common_type-default.txt-Debug]": [ { "checksum": "7d8249b6484a480738e702db45a5130c", diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json index b4351ac41d7..2a2b2d5737e 100644 --- a/yql/essentials/tests/sql/sql2yql/canondata/result.json +++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json @@ -1042,6 +1042,13 @@ "uri": "https://{canondata_backend}/1925821/6494c0b47eb6d65247ddb7962a165f6a764c86f7/resource.tar.gz#test_sql2yql.test_aggregate-yql-20171_/sql.yql" } ], + "test_sql2yql.test[aggregate-yql-20197]": [ + { + "checksum": "bdc823fd53979146ce174fd73fc73cf2", + "size": 2241, + "uri": "https://{canondata_backend}/1942278/31c3c0763ef1cab14897810b757db9b69954e27f/resource.tar.gz#test_sql2yql.test_aggregate-yql-20197_/sql.yql" + } + ], "test_sql2yql.test[ansi_idents-escaping]": [ { "checksum": "4870ad0bb397aa5a3edad1f634eb6e93", @@ -8598,6 +8605,11 @@ "uri": "file://test_sql_format.test_aggregate-yql-20171_/formatted.sql" } ], + "test_sql_format.test[aggregate-yql-20197]": [ + { + "uri": "file://test_sql_format.test_aggregate-yql-20197_/formatted.sql" + } + ], "test_sql_format.test[ansi_idents-escaping]": [ { "uri": "file://test_sql_format.test_ansi_idents-escaping_/formatted.sql" diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_aggregate-yql-20197_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_aggregate-yql-20197_/formatted.sql new file mode 100644 index 00000000000..0ce76676370 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_aggregate-yql-20197_/formatted.sql @@ -0,0 +1,22 @@ +PRAGMA config.flags('OptimizerFlags', 'FilterOverAggregateAllFields'); + +SELECT + * +FROM ( + SELECT + m1, + m2, + FROM + AS_TABLE([ + <|m1: 0, m2: 0|>, + <|m1: 0, m2: 1|>, + <|m1: 0, m2: 2|>, + <|m1: 1, m2: 1|>, + ]) + GROUP BY + m1, + m2 +) +WHERE + (m1 == 1 OR m2 == 1) +; diff --git a/yql/essentials/tests/sql/suites/aggregate/yql-20197.sql b/yql/essentials/tests/sql/suites/aggregate/yql-20197.sql new file mode 100644 index 00000000000..d921d568702 --- /dev/null +++ b/yql/essentials/tests/sql/suites/aggregate/yql-20197.sql @@ -0,0 +1,22 @@ +PRAGMA config.flags("OptimizerFlags", "FilterOverAggregateAllFields"); + +SELECT + * +FROM ( + SELECT + m1, + m2, + FROM + AS_TABLE([ + <|m1: 0, m2: 0|>, + <|m1: 0, m2: 1|>, + <|m1: 0, m2: 2|>, + <|m1: 1, m2: 1|>, + ]) + GROUP BY + m1, + m2 +) +WHERE + (m1 == 1 OR m2 == 1) +; |
