diff options
| author | ziganshinmr <[email protected]> | 2025-10-03 12:23:31 +0300 |
|---|---|---|
| committer | ziganshinmr <[email protected]> | 2025-10-03 12:57:45 +0300 |
| commit | af2d06d42adfbcdbcc23f84975e43d690630fcba (patch) | |
| tree | fa250b5836da32c52575c9de5e2cffd5cab352d0 | |
| parent | dd3884bdfd83865b67f53df14fcf34e5b2a356f3 (diff) | |
CalcOverWindow fuse fix
commit_hash:3cc0ca5c687c7623fb16e77fb3a9043071a2478b
| -rw-r--r-- | yql/essentials/core/common_opt/yql_co_flow2.cpp | 52 | ||||
| -rw-r--r-- | yql/essentials/core/type_ann/type_ann_list.cpp | 16 | ||||
| -rw-r--r-- | yql/essentials/core/yql_opt_utils.cpp | 16 | ||||
| -rw-r--r-- | yql/essentials/core/yql_opt_utils.h | 1 | ||||
| -rw-r--r-- | yt/yql/tests/sql/suites/window/yql-20456.yql | 16 |
5 files changed, 84 insertions, 17 deletions
diff --git a/yql/essentials/core/common_opt/yql_co_flow2.cpp b/yql/essentials/core/common_opt/yql_co_flow2.cpp index d743d65c318..d090473eb98 100644 --- a/yql/essentials/core/common_opt/yql_co_flow2.cpp +++ b/yql/essentials/core/common_opt/yql_co_flow2.cpp @@ -1994,6 +1994,47 @@ TExprNode::TPtr FilterNullMembersToSkipNullMembers(const TCoFlatMapBase& node, T .Build(); } +bool CheckWindowFramesFieldSubset(const TExprNodeList& calcNodes, const TStructExprType& inputItemType, const TTypeAnnotationContext& types) { + static const char optName[] = "CheckWindowFramesFieldSubset"; + if (!IsOptimizerEnabled<optName>(types) || IsOptimizerDisabled<optName>(types)) { + return true; + } + + for (auto calcNode : calcNodes) { + TCoCalcOverWindowTuple calc(calcNode); + for (auto frameNode : calc.Frames().Ref().Children()) { + YQL_ENSURE(TCoWinOnBase::Match(frameNode.Get())); + for (ui32 i = 1; i < frameNode->ChildrenSize(); ++i) { + auto kvTuple = frameNode->ChildPtr(i); + YQL_ENSURE(kvTuple->IsList()); + YQL_ENSURE(2 <= kvTuple->ChildrenSize() && kvTuple->ChildrenSize() <= 3); + + auto traits = kvTuple->ChildPtr(1); + YQL_ENSURE(traits->IsCallable({"Lag", "Lead", "RowNumber", "Rank", "DenseRank", "WindowTraits", "PercentRank", "CumeDist", "NTile"})); + if (traits->IsCallable("WindowTraits")) { + bool isDistinct = kvTuple->ChildrenSize() == 3; + if (!isDistinct) { + YQL_ENSURE(traits->Head().GetTypeAnn()); + const TStructExprType& specItemType = *traits->Head().GetTypeAnn()->Cast<TTypeExprType>()->GetType()->Cast<TStructExprType>(); + if (!IsFieldSubset(specItemType, inputItemType)) { + return false; + } + } + } else if (traits->IsCallable({"Lag", "Lead", "Rank", "DenseRank", "PercentRank"})) { + YQL_ENSURE(traits->Head().GetTypeAnn()); + const TStructExprType& specItemType = *traits->Head().GetTypeAnn()->Cast<TTypeExprType>()->GetType() + ->Cast<TListExprType>()->GetItemType()->Cast<TStructExprType>(); + if (!IsFieldSubset(specItemType, inputItemType)) { + return false; + } + } + } + } + } + + return true; +} + } // namespace void RegisterCoFlowCallables2(TCallableOptimizerMap& map) { @@ -2889,13 +2930,22 @@ void RegisterCoFlowCallables2(TCallableOptimizerMap& map) { return node; } + auto input = node->Head().HeadPtr(); + YQL_ENSURE(input->GetTypeAnn()); + const TStructExprType& inputItemType = *input->GetTypeAnn()->Cast<TListExprType>()->GetItemType()->Cast<TStructExprType>(); + TExprNodeList parentCalcs = ExtractCalcsOverWindow(node, ctx); + YQL_ENSURE(optCtx.Types); + if (!CheckWindowFramesFieldSubset(parentCalcs, inputItemType, *optCtx.Types)) { + return node; + } + TExprNodeList calcs = ExtractCalcsOverWindow(node->HeadPtr(), ctx); calcs.insert(calcs.end(), parentCalcs.begin(), parentCalcs.end()); YQL_CLOG(DEBUG, Core) << "Fuse nested " << node->Content() << " and " << node->Head().Content(); - return RebuildCalcOverWindowGroup(node->Head().Pos(), node->Head().HeadPtr(), calcs, ctx); + return RebuildCalcOverWindowGroup(node->Head().Pos(), std::move(input), calcs, ctx); }; map[TCoCondense::CallableName()] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { diff --git a/yql/essentials/core/type_ann/type_ann_list.cpp b/yql/essentials/core/type_ann/type_ann_list.cpp index 059c5e58d3c..6dc2fa87fdf 100644 --- a/yql/essentials/core/type_ann/type_ann_list.cpp +++ b/yql/essentials/core/type_ann/type_ann_list.cpp @@ -26,22 +26,6 @@ namespace { return x->GetTypeAnn() && x->GetTypeAnn()->GetKind() == ETypeAnnotationKind::EmptyList; }; - bool IsFieldSubset(const TStructExprType& structType, const TStructExprType& sourceStructType) { - for (auto& item : structType.GetItems()) { - auto name = item->GetName(); - auto type = item->GetItemType(); - if (auto idx = sourceStructType.FindItem(name)) { - if (sourceStructType.GetItems()[*idx]->GetItemType() == type) { - continue; - } - } - - return false; - } - - return true; - } - TExprNode::TPtr RewriteMultiAggregate(const TExprNode& node, TExprContext& ctx) { auto exprLambda = node.Child(1); const TStructExprType* structType = nullptr; diff --git a/yql/essentials/core/yql_opt_utils.cpp b/yql/essentials/core/yql_opt_utils.cpp index 30c22b4fbcf..d495fc27cd7 100644 --- a/yql/essentials/core/yql_opt_utils.cpp +++ b/yql/essentials/core/yql_opt_utils.cpp @@ -497,6 +497,22 @@ template bool HaveFieldsSubset(const TExprNode::TPtr& start, const TExprNode& ar template bool HaveFieldsSubset(const TExprNode::TPtr& start, const TExprNode& arg, std::map<std::string_view, TExprNode::TPtr>& usedFields, const TParentsMap& parentsMap, bool allowDependsOn); +bool IsFieldSubset(const TStructExprType& structType, const TStructExprType& sourceStructType) { + for (auto& item : structType.GetItems()) { + auto name = item->GetName(); + auto type = item->GetItemType(); + if (auto idx = sourceStructType.FindItem(name)) { + if (sourceStructType.GetItems()[*idx]->GetItemType() == type) { + continue; + } + } + + return false; + } + + return true; +} + TExprNode::TPtr AddMembersUsedInside(const TExprNode::TPtr& start, const TExprNode& arg, TExprNode::TPtr&& members, const TParentsMap& parentsMap, TExprContext& ctx) { if (!members || !start || &arg == start.Get()) { return {}; diff --git a/yql/essentials/core/yql_opt_utils.h b/yql/essentials/core/yql_opt_utils.h index 392933d8880..72b9afffe31 100644 --- a/yql/essentials/core/yql_opt_utils.h +++ b/yql/essentials/core/yql_opt_utils.h @@ -41,6 +41,7 @@ TExprNode::TPtr KeepColumnOrder(const TColumnOrder& order, const TExprNode::TPtr template<class TFieldsSet> bool HaveFieldsSubset(const TExprNode::TPtr& start, const TExprNode& arg, TFieldsSet& usedFields, const TParentsMap& parentsMap, bool allowDependsOn = true); +bool IsFieldSubset(const TStructExprType& structType, const TStructExprType& sourceStructType); template<class TFieldsSet> TExprNode::TPtr FilterByFields(TPositionHandle position, const TExprNode::TPtr& input, const TFieldsSet& subsetFields, diff --git a/yt/yql/tests/sql/suites/window/yql-20456.yql b/yt/yql/tests/sql/suites/window/yql-20456.yql new file mode 100644 index 00000000000..b0834ad5777 --- /dev/null +++ b/yt/yql/tests/sql/suites/window/yql-20456.yql @@ -0,0 +1,16 @@ +USE plato; + +PRAGMA warning("disable", "1101"); +PRAGMA config.flags("NormalizeDependsOn"); +PRAGMA config.flags("OptimizerFlags", "CheckWindowFramesFieldSubset"); +PRAGMA DisableDirectRowDependsOn; + +$input = AsList( + AsStruct(1 AS yandexuid, "aaa" AS host, '123' AS timestamp) +); + +select + LEAD(host) over (partition by input.yandexuid order by timestamp) as next_host, + MAX(TableName()) over w as `max_date`, +from AS_TABLE($input) as input +window w as (partition by input.yandexuid); |
