diff options
author | zverevgeny <zverevgeny@ydb.tech> | 2025-03-04 09:19:31 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-03-04 09:19:31 +0300 |
commit | 3a5cfede787f6bf6cc63627bf8bd30d9c0cfd4e8 (patch) | |
tree | 3755a1eae363516c01b910f120ef03898fa0c194 | |
parent | 80dee287373544fb2fab855727ee54e1daa50e5a (diff) | |
download | ydb-3a5cfede787f6bf6cc63627bf8bd30d9c0cfd4e8.tar.gz |
Do not use OlapApply for known functions pushdown (#15055)
6 files changed, 205 insertions, 201 deletions
diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp index 77e9170c63..5ec99a88ef 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp @@ -24,6 +24,32 @@ static const std::unordered_set<std::string> SecondLevelFilters = { "ends_with" }; +static TMaybeNode<TExprBase> CombinePredicatesWithAnd(const TVector<TExprBase>& conjuncts, TExprContext& ctx, TPositionHandle pos, bool useOlapAnd, bool trueForEmpty) { + if (conjuncts.empty()) { + return trueForEmpty ? TMaybeNode<TExprBase>{MakeBool<true>(pos, ctx)} : TMaybeNode<TExprBase>{}; + } else if (conjuncts.size() == 1) { + return conjuncts[0]; + } else { + if (useOlapAnd) { + return Build<TKqpOlapAnd>(ctx, pos) + .Add(conjuncts) + .Done(); + } else { + return Build<TCoAnd>(ctx, pos) + .Add(conjuncts) + .Done(); + } + } +} + +static TMaybeNode<TExprBase> CombinePredicatesWithAnd(const TVector<TOLAPPredicateNode>& conjuncts, TExprContext& ctx, TPositionHandle pos, bool useOlapAnd, bool trueForEmpty) { + TVector<TExprBase> exprs; + for(const auto& c: conjuncts) { + exprs.emplace_back(c.ExprNode); + } + return CombinePredicatesWithAnd(exprs, ctx, pos, useOlapAnd, trueForEmpty); +} + struct TFilterOpsLevels { TFilterOpsLevels(const TMaybeNode<TExprBase>& firstLevel, const TMaybeNode<TExprBase>& secondLevel) : FirstLevelOps(firstLevel) @@ -69,6 +95,23 @@ struct TFilterOpsLevels { } + static TFilterOpsLevels Merge(TVector<TFilterOpsLevels> predicates, TExprContext& ctx, TPositionHandle pos) { + TVector<TExprBase> predicatesFirstLevel; + TVector<TExprBase> predicatesSecondLevel; + for (const auto& p: predicates) { + if (p.FirstLevelOps.IsValid()) { + predicatesFirstLevel.emplace_back(p.FirstLevelOps.Cast()); + } + if (p.SecondLevelOps.IsValid()) { + predicatesSecondLevel.emplace_back(p.SecondLevelOps.Cast()); + } + } + return { + CombinePredicatesWithAnd(predicatesFirstLevel, ctx, pos, true, false), + CombinePredicatesWithAnd(predicatesSecondLevel, ctx, pos, true, false), + }; + } + TMaybeNode<TExprBase> FirstLevelOps; TMaybeNode<TExprBase> SecondLevelOps; }; @@ -262,7 +305,6 @@ TMaybeNode<TExprBase> SafeCastPredicatePushdown(const TCoFlatMap& inputFlatmap, std::vector<TExprBase> ConvertComparisonNode(const TExprBase& nodeIn, const TExprNode& argument, TExprContext& ctx, TPositionHandle pos) { - std::vector<TExprBase> out; const auto convertNode = [&ctx, &pos, &argument](const TExprBase& node) -> TMaybeNode<TExprBase> { if (node.Maybe<TCoNull>()) { return node; @@ -368,36 +410,27 @@ std::vector<TExprBase> ConvertComparisonNode(const TExprBase& nodeIn, const TExp } }; - // Columns & values may be single element - TMaybeNode<TExprBase> node = convertNode(nodeIn); - - if (node.IsValid()) { - out.emplace_back(std::move(node.Cast())); - return out; - } - - // Or columns and values can be Tuple - if (!nodeIn.Maybe<TExprList>()) { - // something unusual found, return empty vector - return out; - } + if (const auto& list = nodeIn.Maybe<TExprList>()) { + const auto& tuple = list.Cast(); + std::vector<TExprBase> out; - auto tuple = nodeIn.Cast<TExprList>(); + out.reserve(tuple.Size()); + for (ui32 i = 0; i < tuple.Size(); ++i) { + TMaybeNode<TExprBase> node = convertNode(tuple.Item(i)); - out.reserve(tuple.Size()); - - for (ui32 i = 0; i < tuple.Size(); ++i) { - TMaybeNode<TExprBase> node = convertNode(tuple.Item(i)); + if (!node.IsValid()) { + // Return empty vector + return TVector<TExprBase>(); + } - if (!node.IsValid()) { - // Return empty vector - return TVector<TExprBase>(); + out.emplace_back(node.Cast()); } - - out.emplace_back(node.Cast()); + return out; + } else if (const auto& node = convertNode(nodeIn); node.IsValid()) { + return {node.Cast()}; + } else { + return {}; } - - return out; } TExprBase BuildOneElementComparison(const std::pair<TExprBase, TExprBase>& parameter, const TCoCompare& predicate, @@ -663,50 +696,22 @@ TFilterOpsLevels PredicatePushdown(const TExprBase& predicate, const TExprNode& return YqlApplyPushdown(predicate, argument, ctx); } -TOLAPPredicateNode WrapPredicates(const std::vector<TOLAPPredicateNode>& predicates, TExprContext& ctx, TPositionHandle pos) { - if (predicates.empty()) { - return {}; - } - - if (const auto predicatesSize = predicates.size(); 1U == predicatesSize) { - return predicates.front(); - } else { - TOLAPPredicateNode result; - result.Children = predicates; - result.CanBePushed = true; - - TVector<NNodes::TExprBase> exprNodes; - exprNodes.reserve(predicatesSize); - for (const auto& pred : predicates) { - exprNodes.emplace_back(pred.ExprNode); - result.CanBePushed &= pred.CanBePushed; - } - result.ExprNode = NNodes::Build<NNodes::TCoAnd>(ctx, pos) - .Add(exprNodes) - .Done().Ptr(); - return result; - } -} - -void SplitForPartialPushdown(const TOLAPPredicateNode& predicateTree, TOLAPPredicateNode& predicatesToPush, TOLAPPredicateNode& remainingPredicates, - TExprContext& ctx, TPositionHandle pos) +std::pair<TVector<TOLAPPredicateNode>, TVector<TOLAPPredicateNode>> SplitForPartialPushdown(const TOLAPPredicateNode& predicateTree) { if (predicateTree.CanBePushed) { - predicatesToPush = predicateTree; - remainingPredicates.ExprNode = MakeBool<true>(pos, ctx); - return; + return {{predicateTree}, {}}; } if (!TCoAnd::Match(predicateTree.ExprNode.Get())) { // We can partially pushdown predicates from AND operator only. // For OR operator we would need to have several read operators which is not acceptable. // TODO: Add support for NOT(op1 OR op2), because it expands to (!op1 AND !op2). - remainingPredicates = predicateTree; - return; + return {{}, {predicateTree}}; } bool isFoundNotStrictOp = false; - std::vector<TOLAPPredicateNode> pushable, remaining; + TVector<TOLAPPredicateNode> pushable; + TVector<TOLAPPredicateNode> remaining; for (const auto& predicate : predicateTree.Children) { if (predicate.CanBePushed && !isFoundNotStrictOp) { pushable.emplace_back(predicate); @@ -717,8 +722,7 @@ void SplitForPartialPushdown(const TOLAPPredicateNode& predicateTree, TOLAPPredi remaining.emplace_back(predicate); } } - predicatesToPush = WrapPredicates(pushable, ctx, pos); - remainingPredicates = WrapPredicates(remaining, ctx, pos); + return {pushable, remaining}; } } // anonymous namespace end @@ -742,6 +746,7 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz } const auto& lambda = flatmap.Lambda(); + const auto& lambdaArg = lambda.Args().Arg(0).Ref(); YQL_CLOG(TRACE, ProviderKqp) << "Initial OLAP lambda: " << KqpExprToPrettyString(lambda, ctx); @@ -754,55 +759,68 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz auto predicate = optionaIf.Predicate(); auto value = optionaIf.Value(); - if constexpr (NSsa::RuntimeVersion >= 5U) { - TExprNode::TPtr afterPeephole; - bool hasNonDeterministicFunctions; - if (const auto status = PeepHoleOptimizeNode(optionaIf.Ptr(), afterPeephole, ctx, typesCtx, nullptr, hasNonDeterministicFunctions); - status != IGraphTransformer::TStatus::Ok) { - YQL_CLOG(ERROR, ProviderKqp) << "Peephole OLAP failed." << Endl << ctx.IssueManager.GetIssues().ToString(); - return node; - } - - const TCoIf simplified(std::move(afterPeephole)); - predicate = simplified.Predicate(); - value = simplified.ThenValue().Cast<TCoJust>().Input(); - } - TOLAPPredicateNode predicateTree; predicateTree.ExprNode = predicate.Ptr(); - const auto& lambdaArg = lambda.Args().Arg(0).Ref(); - CollectPredicates(predicate, predicateTree, &lambdaArg, read.Process().Body()); + CollectPredicates(predicate, predicateTree, &lambdaArg, read.Process().Body(), false); YQL_ENSURE(predicateTree.IsValid(), "Collected OLAP predicates are invalid"); - TOLAPPredicateNode predicatesToPush, remainingPredicates; - SplitForPartialPushdown(predicateTree, predicatesToPush, remainingPredicates, ctx, node.Pos()); - if (!predicatesToPush.IsValid()) { - return node; + auto [pushable, remaining] = SplitForPartialPushdown(predicateTree); + TVector<TFilterOpsLevels> pushedPredicates; + for (const auto& p: pushable) { + pushedPredicates.emplace_back(PredicatePushdown(TExprBase(p.ExprNode), lambdaArg, ctx, node.Pos())); } - YQL_ENSURE(predicatesToPush.IsValid(), "Predicates to push is invalid"); - YQL_ENSURE(remainingPredicates.IsValid(), "Remaining predicates is invalid"); - - const auto pushedFilters = PredicatePushdown(TExprBase(predicatesToPush.ExprNode), lambdaArg, ctx, node.Pos()); - // Temporary fix for https://st.yandex-team.ru/KIKIMR-22560 - // YQL_ENSURE(pushedFilters.IsValid(), "Pushed predicate should be always valid!"); - - if (!pushedFilters.IsValid()) { + if constexpr (NSsa::RuntimeVersion >= 5U) { + TVector<TOLAPPredicateNode> remainingAfterApply; + for(const auto& p: remaining) { + const auto recoveredOptinalIfForNonPushedDownPredicates = Build<TCoOptionalIf>(ctx, node.Pos()) + .Predicate(p.ExprNode) + .Value(value) + .Build(); + TExprNode::TPtr afterPeephole; + bool hasNonDeterministicFunctions; + if (const auto status = PeepHoleOptimizeNode(recoveredOptinalIfForNonPushedDownPredicates.Value().Ptr(), afterPeephole, ctx, typesCtx, nullptr, hasNonDeterministicFunctions); + status != IGraphTransformer::TStatus::Ok) { + YQL_CLOG(ERROR, ProviderKqp) << "Peephole OLAP failed." << Endl << ctx.IssueManager.GetIssues().ToString(); + return node; + } + const TCoIf simplified(std::move(afterPeephole)); + predicate = simplified.Predicate(); + value = simplified.ThenValue().Cast<TCoJust>().Input(); + + TOLAPPredicateNode predicateTree; + predicateTree.ExprNode = predicate.Ptr(); + CollectPredicates(predicate, predicateTree, &lambdaArg, read.Process().Body(), true); + YQL_ENSURE(predicateTree.IsValid(), "Collected OLAP predicates are invalid"); + auto [pushableWithApply, remaining] = SplitForPartialPushdown(predicateTree); + for (const auto& p: pushableWithApply) { + pushedPredicates.emplace_back(PredicatePushdown(TExprBase(p.ExprNode), lambdaArg, ctx, node.Pos())); + } + remainingAfterApply.insert(remainingAfterApply.end(), remaining.begin(), remaining.end()); + } + remaining = std::move(remainingAfterApply); + } + + if (pushedPredicates.empty()) { return node; } + const auto& pushedFilter = TFilterOpsLevels::Merge(pushedPredicates, ctx, node.Pos()); + + const auto remainingFilter = CombinePredicatesWithAnd(remaining, ctx, node.Pos(), false, true); + TMaybeNode<TExprBase> olapFilter; - if (pushedFilters.FirstLevelOps.IsValid()) { + if (pushedFilter.FirstLevelOps.IsValid()) { olapFilter = Build<TKqpOlapFilter>(ctx, node.Pos()) .Input(read.Process().Body()) - .Condition(pushedFilters.FirstLevelOps.Cast()) + .Condition(pushedFilter.FirstLevelOps.Cast()) .Done(); } - if (pushedFilters.SecondLevelOps.IsValid()) { + if (pushedFilter.SecondLevelOps.IsValid()) { olapFilter = Build<TKqpOlapFilter>(ctx, node.Pos()) .Input(olapFilter.IsValid() ? olapFilter.Cast() : read.Process().Body()) - .Condition(pushedFilters.SecondLevelOps.Cast()) + .Condition(pushedFilter.SecondLevelOps.Cast()) .Done(); } @@ -846,7 +864,7 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz .Args({"new_arg"}) .Body<TCoOptionalIf>() .Predicate<TExprApplier>() - .Apply(TExprBase(remainingPredicates.ExprNode)) + .Apply(remainingFilter.Cast()) .With(lambda.Args().Arg(0), "new_arg") .Build() .Value<TExprApplier>() diff --git a/ydb/core/kqp/opt/physical/predicate_collector.cpp b/ydb/core/kqp/opt/physical/predicate_collector.cpp index bf17becf1f..5e911f1de1 100644 --- a/ydb/core/kqp/opt/physical/predicate_collector.cpp +++ b/ydb/core/kqp/opt/physical/predicate_collector.cpp @@ -15,7 +15,7 @@ bool IsSupportedPredicate(const TCoCompare& predicate) { return !predicate.Ref().Content().starts_with("Aggr"); } -bool IsSupportedDataType(const TCoDataCtor& node) { +bool IsSupportedDataType(const TCoDataCtor& node, bool allowOlapApply) { if (node.Maybe<TCoBool>() || node.Maybe<TCoFloat>() || node.Maybe<TCoDouble>() || @@ -36,7 +36,7 @@ bool IsSupportedDataType(const TCoDataCtor& node) { return true; } - if constexpr (NKikimr::NSsa::RuntimeVersion >= 5U) { + if (allowOlapApply) { if (node.Maybe<TCoDate32>() || node.Maybe<TCoDatetime64>() || node.Maybe<TCoTimestamp64>() || node.Maybe<TCoInterval64>()) { return true; } @@ -86,17 +86,17 @@ bool IsMemberColumn(const TExprBase& node, const TExprNode* lambdaArg) { return false; } -bool IsGoodTypeForArithmeticPushdown(const TTypeAnnotationNode& type) { +bool IsGoodTypeForArithmeticPushdown(const TTypeAnnotationNode& type, bool allowOlapApply) { const auto fatures = NUdf::GetDataTypeInfo(RemoveOptionality(type).Cast<TDataExprType>()->GetSlot()).Features; return NUdf::EDataTypeFeatures::NumericType & fatures - || (NKikimr::NSsa::RuntimeVersion >= 5U && (NUdf::EDataTypeFeatures::BigDateType & fatures) && !(NUdf::EDataTypeFeatures::TzDateType & fatures)); + || (allowOlapApply && (NUdf::EDataTypeFeatures::BigDateType & fatures) && !(NUdf::EDataTypeFeatures::TzDateType & fatures)); } -bool IsGoodTypeForComparsionPushdown(const TTypeAnnotationNode& type) { +bool IsGoodTypeForComparsionPushdown(const TTypeAnnotationNode& type, bool allowOlapApply) { const auto fatures = NUdf::GetDataTypeInfo(RemoveOptionality(type).Cast<TDataExprType>()->GetSlot()).Features; return (NUdf::EDataTypeFeatures::CanCompare & fatures) && (((NUdf::EDataTypeFeatures::NumericType | NUdf::EDataTypeFeatures::StringType) & fatures) || - (NKikimr::NSsa::RuntimeVersion >= 5U && (NUdf::EDataTypeFeatures::BigDateType & fatures) && !(NUdf::EDataTypeFeatures::TzDateType & fatures))); + (allowOlapApply && (NUdf::EDataTypeFeatures::BigDateType & fatures) && !(NUdf::EDataTypeFeatures::TzDateType & fatures))); } [[maybe_unused]] @@ -139,8 +139,8 @@ bool AbstractTreeCanBePushed(const TExprBase& expr, const TExprNode* ) { return true; } -bool CheckExpressionNodeForPushdown(const TExprBase& node, const TExprNode* lambdaArg) { - if constexpr (NKikimr::NSsa::RuntimeVersion >= 5U) { +bool CheckExpressionNodeForPushdown(const TExprBase& node, const TExprNode* lambdaArg, bool allowOlapApply) { + if (allowOlapApply) { if (node.Maybe<TCoJust>() || node.Maybe<TCoCoalesce>()) { return true; } @@ -156,7 +156,7 @@ bool CheckExpressionNodeForPushdown(const TExprBase& node, const TExprNode* lamb if (const auto maybeSafeCast = node.Maybe<TCoSafeCast>()) { return IsSupportedCast(maybeSafeCast.Cast()); } else if (const auto maybeData = node.Maybe<TCoDataCtor>()) { - return IsSupportedDataType(maybeData.Cast()); + return IsSupportedDataType(maybeData.Cast(), allowOlapApply); } else if (const auto maybeMember = node.Maybe<TCoMember>()) { return IsMemberColumn(maybeMember.Cast(), lambdaArg); } else if (const auto maybeJsonValue = node.Maybe<TCoJsonValue>()) { @@ -167,20 +167,20 @@ bool CheckExpressionNodeForPushdown(const TExprBase& node, const TExprNode* lamb } if (const auto op = node.Maybe<TCoUnaryArithmetic>()) { - return CheckExpressionNodeForPushdown(op.Cast().Arg(), lambdaArg) && IsGoodTypeForArithmeticPushdown(*op.Cast().Ref().GetTypeAnn()); + return CheckExpressionNodeForPushdown(op.Cast().Arg(), lambdaArg, allowOlapApply) && IsGoodTypeForArithmeticPushdown(*op.Cast().Ref().GetTypeAnn(), allowOlapApply); } else if (const auto op = node.Maybe<TCoBinaryArithmetic>()) { - return CheckExpressionNodeForPushdown(op.Cast().Left(), lambdaArg) && CheckExpressionNodeForPushdown(op.Cast().Right(), lambdaArg) - && IsGoodTypeForArithmeticPushdown(*op.Cast().Ref().GetTypeAnn()) && !op.Cast().Maybe<TCoAggrAdd>(); + return CheckExpressionNodeForPushdown(op.Cast().Left(), lambdaArg, allowOlapApply) && CheckExpressionNodeForPushdown(op.Cast().Right(), lambdaArg, allowOlapApply) + && IsGoodTypeForArithmeticPushdown(*op.Cast().Ref().GetTypeAnn(), allowOlapApply) && !op.Cast().Maybe<TCoAggrAdd>(); } - if constexpr (NKikimr::NSsa::RuntimeVersion >= 5U) { + if (allowOlapApply) { return AbstractTreeCanBePushed(node, lambdaArg); } return false; } -bool IsGoodTypesForPushdownCompare(const TTypeAnnotationNode& typeOne, const TTypeAnnotationNode& typeTwo) { +bool IsGoodTypesForPushdownCompare(const TTypeAnnotationNode& typeOne, const TTypeAnnotationNode& typeTwo, bool allowOlapApply) { const auto& rawOne = RemoveOptionality(typeOne); const auto& rawTwo = RemoveOptionality(typeTwo); if (IsSameAnnotation(rawOne, rawTwo)) @@ -202,21 +202,21 @@ bool IsGoodTypesForPushdownCompare(const TTypeAnnotationNode& typeOne, const TTy if (size != itemsTwo.size()) return false; for (auto i = 0U; i < size; ++i) { - if (!IsGoodTypesForPushdownCompare(*itemsOne[i], *itemsTwo[i])) { + if (!IsGoodTypesForPushdownCompare(*itemsOne[i], *itemsTwo[i], allowOlapApply)) { return false; } } return true; } case ETypeAnnotationKind::Data: - return IsGoodTypeForComparsionPushdown(typeOne) && IsGoodTypeForComparsionPushdown(typeTwo); + return IsGoodTypeForComparsionPushdown(typeOne, allowOlapApply) && IsGoodTypeForComparsionPushdown(typeTwo, allowOlapApply); default: break; } return false; } -bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExprNode* lambdaArg, const TExprBase& input) { +bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExprNode* lambdaArg, const TExprBase& input, bool allowOlapApply) { const auto* inputType = input.Ref().GetTypeAnn(); switch (inputType->GetKind()) { case ETypeAnnotationKind::Flow: @@ -239,7 +239,7 @@ bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExpr return false; } - if (!IsGoodTypesForPushdownCompare(*compare.Left().Ref().GetTypeAnn(), *compare.Right().Ref().GetTypeAnn())) { + if (!IsGoodTypesForPushdownCompare(*compare.Left().Ref().GetTypeAnn(), *compare.Right().Ref().GetTypeAnn(), allowOlapApply)) { return false; } @@ -248,7 +248,7 @@ bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExpr YQL_ENSURE(leftList.size() == rightList.size(), "Different sizes of lists in comparison!"); for (size_t i = 0; i < leftList.size(); ++i) { - if (!CheckExpressionNodeForPushdown(leftList[i], lambdaArg) || !CheckExpressionNodeForPushdown(rightList[i], lambdaArg)) { + if (!CheckExpressionNodeForPushdown(leftList[i], lambdaArg, allowOlapApply) || !CheckExpressionNodeForPushdown(rightList[i], lambdaArg, allowOlapApply)) { return false; } } @@ -256,11 +256,11 @@ bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExpr return true; } -bool CompareCanBePushed(const TCoCompare& compare, const TExprNode* lambdaArg, const TExprBase& lambdaBody) { - return IsSupportedPredicate(compare) && CheckComparisonParametersForPushdown(compare, lambdaArg, lambdaBody); +bool CompareCanBePushed(const TCoCompare& compare, const TExprNode* lambdaArg, const TExprBase& lambdaBody, bool allowOlapApply) { + return IsSupportedPredicate(compare) && CheckComparisonParametersForPushdown(compare, lambdaArg, lambdaBody, allowOlapApply); } -bool SafeCastCanBePushed(const TCoFlatMap& flatmap, const TExprNode* lambdaArg) { +bool SafeCastCanBePushed(const TCoFlatMap& flatmap, const TExprNode* lambdaArg, bool allowOlapApply) { /* * There are three ways of comparison in following format: * @@ -281,7 +281,7 @@ bool SafeCastCanBePushed(const TCoFlatMap& flatmap, const TExprNode* lambdaArg) YQL_ENSURE(leftList.size() == rightList.size(), "Different sizes of lists in comparison!"); for (size_t i = 0; i < leftList.size(); ++i) { - if (!CheckExpressionNodeForPushdown(leftList[i], lambdaArg) || !CheckExpressionNodeForPushdown(rightList[i], lambdaArg)) { + if (!CheckExpressionNodeForPushdown(leftList[i], lambdaArg, allowOlapApply) || !CheckExpressionNodeForPushdown(rightList[i], lambdaArg, allowOlapApply)) { return false; } } @@ -315,16 +315,16 @@ bool JsonExistsCanBePushed(const TCoJsonExists& jsonExists, const TExprNode* lam return true; } -bool CoalesceCanBePushed(const TCoCoalesce& coalesce, const TExprNode* lambdaArg, const TExprBase& lambdaBody) { +bool CoalesceCanBePushed(const TCoCoalesce& coalesce, const TExprNode* lambdaArg, const TExprBase& lambdaBody, bool allowOlapApply) { if (!coalesce.Value().Maybe<TCoBool>()) { return false; } const auto predicate = coalesce.Predicate(); if (const auto maybeCompare = predicate.Maybe<TCoCompare>()) { - return CompareCanBePushed(maybeCompare.Cast(), lambdaArg, lambdaBody); + return CompareCanBePushed(maybeCompare.Cast(), lambdaArg, lambdaBody, allowOlapApply); } else if (const auto maybeFlatmap = predicate.Maybe<TCoFlatMap>()) { - return SafeCastCanBePushed(maybeFlatmap.Cast(), lambdaArg); + return SafeCastCanBePushed(maybeFlatmap.Cast(), lambdaArg, allowOlapApply); } else if (const auto maybeJsonExists = predicate.Maybe<TCoJsonExists>()) { return JsonExistsCanBePushed(maybeJsonExists.Cast(), lambdaArg); } @@ -336,47 +336,42 @@ bool ExistsCanBePushed(const TCoExists& exists, const TExprNode* lambdaArg) { return IsMemberColumn(exists.Optional(), lambdaArg); } -void CollectChildrenPredicates(const TExprNode& opNode, TOLAPPredicateNode& predicateTree, const TExprNode* lambdaArg, const TExprBase& lambdaBody) { +void CollectChildrenPredicates(const TExprNode& opNode, TOLAPPredicateNode& predicateTree, const TExprNode* lambdaArg, const TExprBase& lambdaBody, bool allowOlapApply) { predicateTree.Children.reserve(opNode.ChildrenSize()); predicateTree.CanBePushed = true; for (const auto& childNodePtr: opNode.Children()) { TOLAPPredicateNode child; child.ExprNode = childNodePtr; if (const auto maybeCtor = TMaybeNode<TCoDataCtor>(child.ExprNode)) - child.CanBePushed = IsSupportedDataType(maybeCtor.Cast()); + child.CanBePushed = IsSupportedDataType(maybeCtor.Cast(), allowOlapApply); else - CollectPredicates(TExprBase(child.ExprNode), child, lambdaArg, lambdaBody); + CollectPredicates(TExprBase(child.ExprNode), child, lambdaArg, lambdaBody, allowOlapApply); predicateTree.Children.emplace_back(child); predicateTree.CanBePushed &= child.CanBePushed; } } -} - -void CollectPredicates(const TExprBase& predicate, TOLAPPredicateNode& predicateTree, const TExprNode* lambdaArg, const TExprBase& lambdaBody) { - if constexpr (NKikimr::NSsa::RuntimeVersion >= 5U) { - if (predicate.Maybe<TCoIf>() || predicate.Maybe<TCoJust>() || predicate.Maybe<TCoCoalesce>()) { - return CollectChildrenPredicates(predicate.Ref(), predicateTree, lambdaArg, lambdaBody); - } - } +} // namespace +void CollectPredicates(const TExprBase& predicate, TOLAPPredicateNode& predicateTree, const TExprNode* lambdaArg, const TExprBase& lambdaBody, bool allowOlapApply) { if (predicate.Maybe<TCoNot>() || predicate.Maybe<TCoAnd>() || predicate.Maybe<TCoOr>() || predicate.Maybe<TCoXor>()) { - return CollectChildrenPredicates(predicate.Ref(), predicateTree, lambdaArg, lambdaBody); + return CollectChildrenPredicates(predicate.Ref(), predicateTree, lambdaArg, lambdaBody, allowOlapApply); } else if (const auto maybeCoalesce = predicate.Maybe<TCoCoalesce>()) { - predicateTree.CanBePushed = CoalesceCanBePushed(maybeCoalesce.Cast(), lambdaArg, lambdaBody); + predicateTree.CanBePushed = CoalesceCanBePushed(maybeCoalesce.Cast(), lambdaArg, lambdaBody, allowOlapApply); } else if (const auto maybeCompare = predicate.Maybe<TCoCompare>()) { - predicateTree.CanBePushed = CompareCanBePushed(maybeCompare.Cast(), lambdaArg, lambdaBody); + predicateTree.CanBePushed = CompareCanBePushed(maybeCompare.Cast(), lambdaArg, lambdaBody, allowOlapApply); } else if (const auto maybeExists = predicate.Maybe<TCoExists>()) { predicateTree.CanBePushed = ExistsCanBePushed(maybeExists.Cast(), lambdaArg); } else if (const auto maybeJsonExists = predicate.Maybe<TCoJsonExists>()) { predicateTree.CanBePushed = JsonExistsCanBePushed(maybeJsonExists.Cast(), lambdaArg); - } else { - if constexpr (NKikimr::NSsa::RuntimeVersion >= 5U) { - predicateTree.CanBePushed = AbstractTreeCanBePushed(predicate, lambdaArg); - } else { - predicateTree.CanBePushed = false; + } + if (predicateTree.CanBePushed) { + return; + } else if (allowOlapApply){ + if (predicate.Maybe<TCoIf>() || predicate.Maybe<TCoJust>() || predicate.Maybe<TCoCoalesce>()) { + return CollectChildrenPredicates(predicate.Ref(), predicateTree, lambdaArg, lambdaBody, allowOlapApply); } + predicateTree.CanBePushed = AbstractTreeCanBePushed(predicate, lambdaArg); } } - -} +} //namespace NKikimr::NKqp::NOpt diff --git a/ydb/core/kqp/opt/physical/predicate_collector.h b/ydb/core/kqp/opt/physical/predicate_collector.h index d6663c9d6a..f65ac2ebdd 100644 --- a/ydb/core/kqp/opt/physical/predicate_collector.h +++ b/ydb/core/kqp/opt/physical/predicate_collector.h @@ -15,6 +15,6 @@ struct TOLAPPredicateNode { } }; -void CollectPredicates(const NNodes::TExprBase& predicate, TOLAPPredicateNode& predicateTree, const TExprNode* lambdaArg, const NNodes::TExprBase& lambdaBody); +void CollectPredicates(const NNodes::TExprBase& predicate, TOLAPPredicateNode& predicateTree, const TExprNode* lambdaArg, const NNodes::TExprBase& lambdaBody, bool allowOlapApply); } diff --git a/ydb/core/kqp/ut/olap/aggregations_ut.cpp b/ydb/core/kqp/ut/olap/aggregations_ut.cpp index a6910105f6..b842b9932a 100644 --- a/ydb/core/kqp/ut/olap/aggregations_ut.cpp +++ b/ydb/core/kqp/ut/olap/aggregations_ut.cpp @@ -1189,26 +1189,19 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) { SELECT id, JSON_VALUE(jsonval, "$.col1"), JSON_VALUE(jsondoc, "$.col1") FROM `/Root/tableWithNulls` WHERE JSON_VALUE(jsonval, "$.col1") = "val1" AND id = 1; )") -#if SSA_RUNTIME_VERSION >= 5U - .AddExpectedPlanOptions("KqpOlapApply"); -#else .AddExpectedPlanOptions("KqpOlapJsonValue") .SetExpectedReply(R"([[1;["val1"];#]])"); -#endif TestTableWithNulls({testCase}); } + Y_UNIT_TEST(Json_GetValue_Minus) { TAggregationTestCase testCase; testCase.SetQuery(R"( SELECT id, JSON_VALUE(jsonval, "$.'col-abc'"), JSON_VALUE(jsondoc, "$.'col-abc'") FROM `/Root/tableWithNulls` WHERE JSON_VALUE(jsonval, "$.'col-abc'") = "val-abc" AND id = 1; )") -#if SSA_RUNTIME_VERSION >= 5U - .AddExpectedPlanOptions("KqpOlapApply") -#else .AddExpectedPlanOptions("KqpOlapJsonValue") -#endif .SetExpectedReply(R"([[1;["val-abc"];#]])"); TestTableWithNulls({testCase}); @@ -1220,11 +1213,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) { SELECT id, JSON_VALUE(jsonval, "$.col1" RETURNING String), JSON_VALUE(jsondoc, "$.col1") FROM `/Root/tableWithNulls` WHERE JSON_VALUE(jsonval, "$.col1" RETURNING String) = "val1" AND id = 1; )") -#if SSA_RUNTIME_VERSION >= 5U - .AddExpectedPlanOptions("KqpOlapApply") -#else .AddExpectedPlanOptions("KqpOlapJsonValue") -#endif .SetExpectedReply(R"([[1;["val1"];#]])"); TestTableWithNulls({ testCase }); @@ -1236,11 +1225,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) { SELECT id, JSON_VALUE(jsonval, "$.obj.obj_col2_int" RETURNING Int), JSON_VALUE(jsondoc, "$.obj.obj_col2_int" RETURNING Int) FROM `/Root/tableWithNulls` WHERE JSON_VALUE(jsonval, "$.obj.obj_col2_int" RETURNING Int) = 16 AND id = 1; )") -#if SSA_RUNTIME_VERSION >= 5U - .AddExpectedPlanOptions("KqpOlapApply") -#else .AddExpectedPlanOptions("KqpOlapJsonValue") -#endif .SetExpectedReply(R"([[1;[16];#]])"); TestTableWithNulls({ testCase }); @@ -1252,11 +1237,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) { SELECT id, JSON_VALUE(jsonval, "$.col1"), JSON_VALUE(jsondoc, "$.col1") FROM `/Root/tableWithNulls` WHERE JSON_VALUE(jsondoc, "$.col1") = "val1" AND id = 6; )") -#if SSA_RUNTIME_VERSION >= 5U - .AddExpectedPlanOptions("KqpOlapApply") -#else .AddExpectedPlanOptions("KqpOlapJsonValue") -#endif .SetExpectedReply(R"([[6;#;["val1"]]])"); TestTableWithNulls({ testCase }); @@ -1268,11 +1249,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) { SELECT id, JSON_VALUE(jsonval, "$.col1"), JSON_VALUE(jsondoc, "$.col1" RETURNING String) FROM `/Root/tableWithNulls` WHERE JSON_VALUE(jsondoc, "$.col1" RETURNING String) = "val1" AND id = 6; )") -#if SSA_RUNTIME_VERSION >= 5U - .AddExpectedPlanOptions("KqpOlapApply") -#else .AddExpectedPlanOptions("KqpOlapJsonValue") -#endif .SetExpectedReply(R"([[6;#;["val1"]]])"); TestTableWithNulls({ testCase }); @@ -1284,11 +1261,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) { SELECT id, JSON_VALUE(jsonval, "$.obj.obj_col2_int"), JSON_VALUE(jsondoc, "$.obj.obj_col2_int" RETURNING Int) FROM `/Root/tableWithNulls` WHERE JSON_VALUE(jsondoc, "$.obj.obj_col2_int" RETURNING Int) = 16 AND id = 6; )") -#if SSA_RUNTIME_VERSION >= 5U - .AddExpectedPlanOptions("KqpOlapApply") -#else .AddExpectedPlanOptions("KqpOlapJsonValue") -#endif .SetExpectedReply(R"([[6;#;[16]]])"); TestTableWithNulls({ testCase }); @@ -1301,11 +1274,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) { WHERE JSON_EXISTS(jsonval, "$.col1") AND level = 1; )") -#if SSA_RUNTIME_VERSION >= 5U - .AddExpectedPlanOptions("KqpOlapApply") -#else .AddExpectedPlanOptions("KqpOlapJsonExists") -#endif .SetExpectedReply(R"([[1;[%true];#]])"); TestTableWithNulls({ testCase }); @@ -1318,11 +1287,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) { WHERE JSON_EXISTS(jsondoc, "$.col1") AND id = 6; )") -#if SSA_RUNTIME_VERSION >= 5U - .AddExpectedPlanOptions("KqpOlapApply") -#else .AddExpectedPlanOptions("KqpOlapJsonExists") -#endif .SetExpectedReply(R"([[6;#;[%true]]])"); TestTableWithNulls({ testCase }); @@ -1343,6 +1308,26 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) { TestTableWithNulls({ testCase }); } + Y_UNIT_TEST(MixedJsonAndOlapApply) { + TAggregationTestCase testCase; + //(R"({"col1": "val1", "col-abc": "val-abc", "obj": {"obj_col2_int": 16}})" + testCase.SetQuery(R"( + SELECT id, JSON_VALUE(jsonval, "$.\"col-abc\"") FROM `/Root/tableWithNulls` + WHERE id = 1 + AND JSON_VALUE(jsonval, "$.col1") = "val1" + AND JSON_VALUE(jsonval, "$.\"col-abc\"") ilike "%A%b%" + AND JSON_EXISTS(jsonval, "$.obj.obj_col2_int") + + )") + .AddExpectedPlanOptions("KqpOlapJsonValue") + .AddExpectedPlanOptions("KqpOlapJsonExists") + .AddExpectedPlanOptions("KqpOlapApply") + .SetExpectedReply(R"([[1;["val-abc"]]])") + ; + + TestTableWithNulls({testCase}); + } + Y_UNIT_TEST(BlockGenericWithDistinct) { TAggregationTestCase testCase; testCase.SetQuery(R"( diff --git a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp index 6116dbed18..61f8b0f38a 100644 --- a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp +++ b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp @@ -1452,9 +1452,15 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto result = CollectStreamResult(it); auto ast = result.QueryStats->Getquery_ast(); UNIT_ASSERT_C(ast.find(R"(('eq '"resource_id")") != std::string::npos, - TStringBuilder() << "Predicate not pushed down. Query: " << query); - UNIT_ASSERT_C(ast.find(R"(('gt '"level")") == std::string::npos, - TStringBuilder() << "Predicate pushed down. Query: " << query); + TStringBuilder() << "Subpredicate is not pushed down. Query: " << query); + UNIT_ASSERT_C(ast.find(R"(('gt '"level")") != std::string::npos, + TStringBuilder() << "Subpredicate is not pushed down. Query: " << query); + //This check is based on an assumpltion, that for pushed down predicates column names are preserved in AST + //But for non-pushed down predicates column names are (usually) replaced with a label, started with $. It' not a rule, but a heuristic + //So this check may require a correction when some ast optimization rules are changed + UNIT_ASSERT_C(ast.find(R"((Unwrap (/ $)") != std::string::npos, + TStringBuilder() << "Unsafe subpredicate is pushed down. Query: " << query); + UNIT_ASSERT_C(ast.find("NarrowMap") != std::string::npos, TStringBuilder() << "NarrowMap was removed. Query: " << query); } diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40 index a87ce4a6ce..600be0f4a4 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40 @@ -107,7 +107,7 @@ } ], "Name": "Filter", - "Predicate": "IsRefresh == 0 AND TraficSourceID == -1 OR TraficSourceID == 6 AND RefererHash == 3594120000172545465", + "Predicate": "IsRefresh == 0 AND RefererHash == 3594120000172545465 AND TraficSourceID == -1 OR TraficSourceID == 6", "Pushdown": "True" }, { @@ -169,7 +169,7 @@ "Id": 108 }, "Constant": { - "Int32": -1 + "Int64": 3594120000172545465 } } }, @@ -181,7 +181,7 @@ "Function": { "Arguments": [ { - "Id": 38 + "Id": 103 }, { "Id": 108 @@ -199,7 +199,7 @@ "Id": 110 }, "Constant": { - "Int32": 6 + "Int32": -1 } } }, @@ -228,28 +228,28 @@ "Column": { "Id": 112 }, + "Constant": { + "Int32": 6 + } + } + }, + { + "Assign": { + "Column": { + "Id": 113 + }, "Function": { "Arguments": [ { - "Id": 109 + "Id": 38 }, { - "Id": 111 + "Id": 112 } ], "FunctionType": 2, "KernelIdx": 3, - "YqlOperationId": 1 - } - } - }, - { - "Assign": { - "Column": { - "Id": 113 - }, - "Constant": { - "Int64": 3594120000172545465 + "YqlOperationId": 11 } } }, @@ -261,7 +261,7 @@ "Function": { "Arguments": [ { - "Id": 103 + "Id": 111 }, { "Id": 113 @@ -269,7 +269,7 @@ ], "FunctionType": 2, "KernelIdx": 4, - "YqlOperationId": 11 + "YqlOperationId": 1 } } }, @@ -281,7 +281,7 @@ "Function": { "Arguments": [ { - "Id": 112 + "Id": 109 }, { "Id": 114 |