Do not use OlapApply for known functions pushdown (#15055)

author: zverevgeny <zverevgeny@ydb.tech> 2025-03-04 09:19:31 +0300
committer: GitHub <noreply@github.com> 2025-03-04 09:19:31 +0300
commit: 3a5cfede787f6bf6cc63627bf8bd30d9c0cfd4e8 (patch)
tree: 3755a1eae363516c01b910f120ef03898fa0c194
parent: 80dee287373544fb2fab855727ee54e1daa50e5a (diff)
download: ydb-3a5cfede787f6bf6cc63627bf8bd30d9c0cfd4e8.tar.gz
6 files changed, 205 insertions, 201 deletions
diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp
index 77e9170c63..5ec99a88ef 100644
--- a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp
+++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp
@@ -24,6 +24,32 @@ static const std::unordered_set<std::string> SecondLevelFilters = {
     "ends_with"
 };
 
+static TMaybeNode<TExprBase> CombinePredicatesWithAnd(const TVector<TExprBase>& conjuncts, TExprContext& ctx, TPositionHandle pos, bool useOlapAnd, bool trueForEmpty) {
+    if (conjuncts.empty()) {
+        return trueForEmpty ? TMaybeNode<TExprBase>{MakeBool<true>(pos, ctx)} : TMaybeNode<TExprBase>{};
+    } else if (conjuncts.size() == 1) {
+        return conjuncts[0];
+    } else {
+        if (useOlapAnd) {
+            return Build<TKqpOlapAnd>(ctx, pos)
+                .Add(conjuncts)
+            .Done();
+        } else {
+            return Build<TCoAnd>(ctx, pos)
+                .Add(conjuncts)
+            .Done();
+        }
+    }
+}
+
+static TMaybeNode<TExprBase> CombinePredicatesWithAnd(const TVector<TOLAPPredicateNode>& conjuncts, TExprContext& ctx, TPositionHandle pos, bool useOlapAnd, bool trueForEmpty) {
+    TVector<TExprBase> exprs;
+    for(const auto& c: conjuncts) {
+        exprs.emplace_back(c.ExprNode);
+    }
+    return CombinePredicatesWithAnd(exprs, ctx, pos, useOlapAnd, trueForEmpty);
+}
+
 struct TFilterOpsLevels {
     TFilterOpsLevels(const TMaybeNode<TExprBase>& firstLevel, const TMaybeNode<TExprBase>& secondLevel)
         : FirstLevelOps(firstLevel)
@@ -69,6 +95,23 @@ struct TFilterOpsLevels {
     }
 
 
+    static TFilterOpsLevels Merge(TVector<TFilterOpsLevels> predicates, TExprContext& ctx, TPositionHandle pos) {
+        TVector<TExprBase> predicatesFirstLevel;
+        TVector<TExprBase> predicatesSecondLevel;
+        for (const auto& p: predicates) {
+            if (p.FirstLevelOps.IsValid()) {
+                predicatesFirstLevel.emplace_back(p.FirstLevelOps.Cast());
+            }
+            if (p.SecondLevelOps.IsValid()) {
+                predicatesSecondLevel.emplace_back(p.SecondLevelOps.Cast());
+            }
+        }
+        return {
+            CombinePredicatesWithAnd(predicatesFirstLevel, ctx, pos, true, false),
+            CombinePredicatesWithAnd(predicatesSecondLevel, ctx, pos, true, false),
+        };
+    }
+
     TMaybeNode<TExprBase> FirstLevelOps;
     TMaybeNode<TExprBase> SecondLevelOps;
 };
@@ -262,7 +305,6 @@ TMaybeNode<TExprBase> SafeCastPredicatePushdown(const TCoFlatMap& inputFlatmap,
 
 std::vector<TExprBase> ConvertComparisonNode(const TExprBase& nodeIn, const TExprNode& argument, TExprContext& ctx, TPositionHandle pos)
 {
-    std::vector<TExprBase> out;
     const auto convertNode = [&ctx, &pos, &argument](const TExprBase& node) -> TMaybeNode<TExprBase> {
         if (node.Maybe<TCoNull>()) {
             return node;
@@ -368,36 +410,27 @@ std::vector<TExprBase> ConvertComparisonNode(const TExprBase& nodeIn, const TExp
         }
     };
 
-    // Columns & values may be single element
-    TMaybeNode<TExprBase> node = convertNode(nodeIn);
-
-    if (node.IsValid()) {
-        out.emplace_back(std::move(node.Cast()));
-        return out;
-    }
-
-    // Or columns and values can be Tuple
-    if (!nodeIn.Maybe<TExprList>()) {
-        // something unusual found, return empty vector
-        return out;
-    }
+    if (const auto& list = nodeIn.Maybe<TExprList>()) {
+        const auto& tuple = list.Cast();
+        std::vector<TExprBase> out;
 
-    auto tuple = nodeIn.Cast<TExprList>();
+        out.reserve(tuple.Size());
+        for (ui32 i = 0; i < tuple.Size(); ++i) {
+            TMaybeNode<TExprBase> node = convertNode(tuple.Item(i));
 
-    out.reserve(tuple.Size());
-
-    for (ui32 i = 0; i < tuple.Size(); ++i) {
-        TMaybeNode<TExprBase> node = convertNode(tuple.Item(i));
+            if (!node.IsValid()) {
+                // Return empty vector
+                return TVector<TExprBase>();
+            }
 
-        if (!node.IsValid()) {
-            // Return empty vector
-            return TVector<TExprBase>();
+            out.emplace_back(node.Cast());
         }
-
-        out.emplace_back(node.Cast());
+        return out;
+    } else if (const auto& node = convertNode(nodeIn); node.IsValid()) {
+        return {node.Cast()};
+    } else {
+        return {};
     }
-
-    return out;
 }
 
 TExprBase BuildOneElementComparison(const std::pair<TExprBase, TExprBase>& parameter, const TCoCompare& predicate,
@@ -663,50 +696,22 @@ TFilterOpsLevels PredicatePushdown(const TExprBase& predicate, const TExprNode&
     return YqlApplyPushdown(predicate, argument, ctx);
 }
 
-TOLAPPredicateNode WrapPredicates(const std::vector<TOLAPPredicateNode>& predicates, TExprContext& ctx, TPositionHandle pos) {
-    if (predicates.empty()) {
-        return {};
-    }
-
-    if (const auto predicatesSize = predicates.size(); 1U == predicatesSize) {
-        return predicates.front();
-    } else {
-        TOLAPPredicateNode result;
-        result.Children = predicates;
-        result.CanBePushed = true;
-
-        TVector<NNodes::TExprBase> exprNodes;
-        exprNodes.reserve(predicatesSize);
-        for (const auto& pred : predicates) {
-            exprNodes.emplace_back(pred.ExprNode);
-            result.CanBePushed &= pred.CanBePushed;
-        }
-        result.ExprNode = NNodes::Build<NNodes::TCoAnd>(ctx, pos)
-            .Add(exprNodes)
-            .Done().Ptr();
-        return result;
-    }
-}
-
-void SplitForPartialPushdown(const TOLAPPredicateNode& predicateTree, TOLAPPredicateNode& predicatesToPush, TOLAPPredicateNode& remainingPredicates,
-    TExprContext& ctx, TPositionHandle pos)
+std::pair<TVector<TOLAPPredicateNode>, TVector<TOLAPPredicateNode>> SplitForPartialPushdown(const TOLAPPredicateNode& predicateTree)
 {
     if (predicateTree.CanBePushed) {
-        predicatesToPush = predicateTree;
-        remainingPredicates.ExprNode = MakeBool<true>(pos, ctx);
-        return;
+        return {{predicateTree}, {}};
     }
 
     if (!TCoAnd::Match(predicateTree.ExprNode.Get())) {
         // We can partially pushdown predicates from AND operator only.
         // For OR operator we would need to have several read operators which is not acceptable.
         // TODO: Add support for NOT(op1 OR op2), because it expands to (!op1 AND !op2).
-        remainingPredicates = predicateTree;
-        return;
+        return {{}, {predicateTree}};
     }
 
     bool isFoundNotStrictOp = false;
-    std::vector<TOLAPPredicateNode> pushable, remaining;
+    TVector<TOLAPPredicateNode> pushable;
+    TVector<TOLAPPredicateNode> remaining;
     for (const auto& predicate : predicateTree.Children) {
         if (predicate.CanBePushed && !isFoundNotStrictOp) {
             pushable.emplace_back(predicate);
@@ -717,8 +722,7 @@ void SplitForPartialPushdown(const TOLAPPredicateNode& predicateTree, TOLAPPredi
             remaining.emplace_back(predicate);
         }
     }
-    predicatesToPush = WrapPredicates(pushable, ctx, pos);
-    remainingPredicates = WrapPredicates(remaining, ctx, pos);
+    return {pushable, remaining};
 }
 
 } // anonymous namespace end
@@ -742,6 +746,7 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz
     }
 
     const auto& lambda = flatmap.Lambda();
+    const auto& lambdaArg = lambda.Args().Arg(0).Ref();
 
     YQL_CLOG(TRACE, ProviderKqp) << "Initial OLAP lambda: " << KqpExprToPrettyString(lambda, ctx);
 
@@ -754,55 +759,68 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz
     auto predicate = optionaIf.Predicate();
     auto value = optionaIf.Value();
 
-    if constexpr (NSsa::RuntimeVersion >= 5U) {
-        TExprNode::TPtr afterPeephole;
-        bool hasNonDeterministicFunctions;
-        if (const auto status = PeepHoleOptimizeNode(optionaIf.Ptr(), afterPeephole, ctx, typesCtx, nullptr, hasNonDeterministicFunctions);
-            status != IGraphTransformer::TStatus::Ok) {
-            YQL_CLOG(ERROR, ProviderKqp) << "Peephole OLAP failed." << Endl << ctx.IssueManager.GetIssues().ToString();
-            return node;
-        }
-
-        const TCoIf simplified(std::move(afterPeephole));
-        predicate = simplified.Predicate();
-        value = simplified.ThenValue().Cast<TCoJust>().Input();
-    }
-
     TOLAPPredicateNode predicateTree;
     predicateTree.ExprNode = predicate.Ptr();
-    const auto& lambdaArg = lambda.Args().Arg(0).Ref();
-    CollectPredicates(predicate, predicateTree, &lambdaArg, read.Process().Body());
+    CollectPredicates(predicate, predicateTree, &lambdaArg, read.Process().Body(), false);
     YQL_ENSURE(predicateTree.IsValid(), "Collected OLAP predicates are invalid");
 
-    TOLAPPredicateNode predicatesToPush, remainingPredicates;
-    SplitForPartialPushdown(predicateTree, predicatesToPush, remainingPredicates, ctx, node.Pos());
-    if (!predicatesToPush.IsValid()) {
-        return node;
+    auto [pushable, remaining] = SplitForPartialPushdown(predicateTree);
+    TVector<TFilterOpsLevels> pushedPredicates;
+    for (const auto& p: pushable) {
+        pushedPredicates.emplace_back(PredicatePushdown(TExprBase(p.ExprNode), lambdaArg, ctx, node.Pos()));
     }
 
-    YQL_ENSURE(predicatesToPush.IsValid(), "Predicates to push is invalid");
-    YQL_ENSURE(remainingPredicates.IsValid(), "Remaining predicates is invalid");
-
-    const auto pushedFilters = PredicatePushdown(TExprBase(predicatesToPush.ExprNode), lambdaArg, ctx, node.Pos());
-    // Temporary fix for https://st.yandex-team.ru/KIKIMR-22560
-    // YQL_ENSURE(pushedFilters.IsValid(), "Pushed predicate should be always valid!");
-
-    if (!pushedFilters.IsValid()) {
+    if constexpr (NSsa::RuntimeVersion >= 5U) {
+        TVector<TOLAPPredicateNode> remainingAfterApply;
+        for(const auto& p: remaining) {
+            const auto recoveredOptinalIfForNonPushedDownPredicates = Build<TCoOptionalIf>(ctx, node.Pos())
+                .Predicate(p.ExprNode)
+                .Value(value)
+            .Build();
+            TExprNode::TPtr afterPeephole;
+            bool hasNonDeterministicFunctions;
+            if (const auto status = PeepHoleOptimizeNode(recoveredOptinalIfForNonPushedDownPredicates.Value().Ptr(), afterPeephole, ctx, typesCtx, nullptr, hasNonDeterministicFunctions);
+                status != IGraphTransformer::TStatus::Ok) {
+                YQL_CLOG(ERROR, ProviderKqp) << "Peephole OLAP failed." << Endl << ctx.IssueManager.GetIssues().ToString();
+                return node;
+            }
+            const TCoIf simplified(std::move(afterPeephole));
+            predicate = simplified.Predicate();
+            value = simplified.ThenValue().Cast<TCoJust>().Input();
+
+            TOLAPPredicateNode predicateTree;
+            predicateTree.ExprNode = predicate.Ptr();
+            CollectPredicates(predicate, predicateTree, &lambdaArg, read.Process().Body(), true);
+            YQL_ENSURE(predicateTree.IsValid(), "Collected OLAP predicates are invalid");
+            auto [pushableWithApply, remaining] = SplitForPartialPushdown(predicateTree);
+            for (const auto& p: pushableWithApply) {
+               pushedPredicates.emplace_back(PredicatePushdown(TExprBase(p.ExprNode), lambdaArg, ctx, node.Pos()));
+            }
+            remainingAfterApply.insert(remainingAfterApply.end(), remaining.begin(), remaining.end());
+        }
+        remaining = std::move(remainingAfterApply);
+    }
+    
+    if (pushedPredicates.empty()) {
         return node;
     }
 
+    const auto& pushedFilter = TFilterOpsLevels::Merge(pushedPredicates, ctx, node.Pos());
+
+    const auto remainingFilter = CombinePredicatesWithAnd(remaining, ctx, node.Pos(), false, true);
+
     TMaybeNode<TExprBase> olapFilter;
-    if (pushedFilters.FirstLevelOps.IsValid()) {    
+    if (pushedFilter.FirstLevelOps.IsValid()) {    
         olapFilter = Build<TKqpOlapFilter>(ctx, node.Pos())
             .Input(read.Process().Body())
-            .Condition(pushedFilters.FirstLevelOps.Cast())
+            .Condition(pushedFilter.FirstLevelOps.Cast())
             .Done();
     }
 
-    if (pushedFilters.SecondLevelOps.IsValid()) {
+    if (pushedFilter.SecondLevelOps.IsValid()) {
         olapFilter = Build<TKqpOlapFilter>(ctx, node.Pos())
             .Input(olapFilter.IsValid() ? olapFilter.Cast() : read.Process().Body())
-            .Condition(pushedFilters.SecondLevelOps.Cast())
+            .Condition(pushedFilter.SecondLevelOps.Cast())
             .Done();
     }
 
@@ -846,7 +864,7 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz
             .Args({"new_arg"})
             .Body<TCoOptionalIf>()
                 .Predicate<TExprApplier>()
-                    .Apply(TExprBase(remainingPredicates.ExprNode))
+                    .Apply(remainingFilter.Cast())
                     .With(lambda.Args().Arg(0), "new_arg")
                     .Build()
                 .Value<TExprApplier>()
diff --git a/ydb/core/kqp/opt/physical/predicate_collector.cpp b/ydb/core/kqp/opt/physical/predicate_collector.cpp
index bf17becf1f..5e911f1de1 100644
--- a/ydb/core/kqp/opt/physical/predicate_collector.cpp
+++ b/ydb/core/kqp/opt/physical/predicate_collector.cpp
@@ -15,7 +15,7 @@ bool IsSupportedPredicate(const TCoCompare& predicate) {
     return !predicate.Ref().Content().starts_with("Aggr");
 }
 
-bool IsSupportedDataType(const TCoDataCtor& node) {
+bool IsSupportedDataType(const TCoDataCtor& node, bool allowOlapApply) {
     if (node.Maybe<TCoBool>() ||
         node.Maybe<TCoFloat>() ||
         node.Maybe<TCoDouble>() ||
@@ -36,7 +36,7 @@ bool IsSupportedDataType(const TCoDataCtor& node) {
         return true;
     }
 
-    if constexpr (NKikimr::NSsa::RuntimeVersion >= 5U) {
+    if (allowOlapApply) {
         if (node.Maybe<TCoDate32>() ||  node.Maybe<TCoDatetime64>() || node.Maybe<TCoTimestamp64>() || node.Maybe<TCoInterval64>()) {
             return true;
         }
@@ -86,17 +86,17 @@ bool IsMemberColumn(const TExprBase& node, const TExprNode* lambdaArg) {
     return false;
 }
 
-bool IsGoodTypeForArithmeticPushdown(const TTypeAnnotationNode& type) {
+bool IsGoodTypeForArithmeticPushdown(const TTypeAnnotationNode& type, bool allowOlapApply) {
     const auto fatures = NUdf::GetDataTypeInfo(RemoveOptionality(type).Cast<TDataExprType>()->GetSlot()).Features;
     return NUdf::EDataTypeFeatures::NumericType & fatures
-        || (NKikimr::NSsa::RuntimeVersion >= 5U && (NUdf::EDataTypeFeatures::BigDateType & fatures) && !(NUdf::EDataTypeFeatures::TzDateType & fatures));
+        || (allowOlapApply && (NUdf::EDataTypeFeatures::BigDateType & fatures) && !(NUdf::EDataTypeFeatures::TzDateType & fatures));
 }
 
-bool IsGoodTypeForComparsionPushdown(const TTypeAnnotationNode& type) {
+bool IsGoodTypeForComparsionPushdown(const TTypeAnnotationNode& type, bool allowOlapApply) {
     const auto fatures = NUdf::GetDataTypeInfo(RemoveOptionality(type).Cast<TDataExprType>()->GetSlot()).Features;
     return (NUdf::EDataTypeFeatures::CanCompare  & fatures)
         && (((NUdf::EDataTypeFeatures::NumericType | NUdf::EDataTypeFeatures::StringType) & fatures) ||
-            (NKikimr::NSsa::RuntimeVersion >= 5U && (NUdf::EDataTypeFeatures::BigDateType & fatures) && !(NUdf::EDataTypeFeatures::TzDateType & fatures)));
+            (allowOlapApply && (NUdf::EDataTypeFeatures::BigDateType & fatures) && !(NUdf::EDataTypeFeatures::TzDateType & fatures)));
 }
 
 [[maybe_unused]]
@@ -139,8 +139,8 @@ bool AbstractTreeCanBePushed(const TExprBase& expr, const TExprNode* ) {
     return true;
 }
 
-bool CheckExpressionNodeForPushdown(const TExprBase& node, const TExprNode* lambdaArg) {
-    if constexpr (NKikimr::NSsa::RuntimeVersion >= 5U) {
+bool CheckExpressionNodeForPushdown(const TExprBase& node, const TExprNode* lambdaArg, bool allowOlapApply) {
+    if (allowOlapApply) {
         if (node.Maybe<TCoJust>() || node.Maybe<TCoCoalesce>()) {
             return true;
         }
@@ -156,7 +156,7 @@ bool CheckExpressionNodeForPushdown(const TExprBase& node, const TExprNode* lamb
     if (const auto maybeSafeCast = node.Maybe<TCoSafeCast>()) {
         return IsSupportedCast(maybeSafeCast.Cast());
     } else if (const auto maybeData = node.Maybe<TCoDataCtor>()) {
-        return IsSupportedDataType(maybeData.Cast());
+        return IsSupportedDataType(maybeData.Cast(), allowOlapApply);
     } else if (const auto maybeMember = node.Maybe<TCoMember>()) {
         return IsMemberColumn(maybeMember.Cast(), lambdaArg);
     } else if (const auto maybeJsonValue = node.Maybe<TCoJsonValue>()) {
@@ -167,20 +167,20 @@ bool CheckExpressionNodeForPushdown(const TExprBase& node, const TExprNode* lamb
     }
 
     if (const auto op = node.Maybe<TCoUnaryArithmetic>()) {
-        return CheckExpressionNodeForPushdown(op.Cast().Arg(), lambdaArg) && IsGoodTypeForArithmeticPushdown(*op.Cast().Ref().GetTypeAnn());
+        return CheckExpressionNodeForPushdown(op.Cast().Arg(), lambdaArg, allowOlapApply) && IsGoodTypeForArithmeticPushdown(*op.Cast().Ref().GetTypeAnn(), allowOlapApply);
     } else if (const auto op = node.Maybe<TCoBinaryArithmetic>()) {
-        return CheckExpressionNodeForPushdown(op.Cast().Left(), lambdaArg) && CheckExpressionNodeForPushdown(op.Cast().Right(), lambdaArg)
-            && IsGoodTypeForArithmeticPushdown(*op.Cast().Ref().GetTypeAnn()) && !op.Cast().Maybe<TCoAggrAdd>();
+        return CheckExpressionNodeForPushdown(op.Cast().Left(), lambdaArg, allowOlapApply) && CheckExpressionNodeForPushdown(op.Cast().Right(), lambdaArg, allowOlapApply)
+            && IsGoodTypeForArithmeticPushdown(*op.Cast().Ref().GetTypeAnn(), allowOlapApply) && !op.Cast().Maybe<TCoAggrAdd>();
     }
 
-    if constexpr (NKikimr::NSsa::RuntimeVersion >= 5U) {
+    if (allowOlapApply) {
         return AbstractTreeCanBePushed(node, lambdaArg);
     }
 
     return false;
 }
 
-bool IsGoodTypesForPushdownCompare(const TTypeAnnotationNode& typeOne, const TTypeAnnotationNode& typeTwo) {
+bool IsGoodTypesForPushdownCompare(const TTypeAnnotationNode& typeOne, const TTypeAnnotationNode& typeTwo, bool allowOlapApply) {
     const auto& rawOne = RemoveOptionality(typeOne);
     const auto& rawTwo = RemoveOptionality(typeTwo);
     if (IsSameAnnotation(rawOne, rawTwo))
@@ -202,21 +202,21 @@ bool IsGoodTypesForPushdownCompare(const TTypeAnnotationNode& typeOne, const TTy
             if (size != itemsTwo.size())
                 return false;
             for (auto i = 0U; i < size; ++i) {
-                if (!IsGoodTypesForPushdownCompare(*itemsOne[i], *itemsTwo[i])) {
+                if (!IsGoodTypesForPushdownCompare(*itemsOne[i], *itemsTwo[i], allowOlapApply)) {
                     return false;
                 }
             }
             return true;
         }
         case ETypeAnnotationKind::Data:
-            return IsGoodTypeForComparsionPushdown(typeOne) && IsGoodTypeForComparsionPushdown(typeTwo);
+            return IsGoodTypeForComparsionPushdown(typeOne, allowOlapApply) && IsGoodTypeForComparsionPushdown(typeTwo, allowOlapApply);
         default:
             break;
     }
     return false;
 }
 
-bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExprNode* lambdaArg, const TExprBase& input) {
+bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExprNode* lambdaArg, const TExprBase& input, bool allowOlapApply) {
     const auto* inputType = input.Ref().GetTypeAnn();
     switch (inputType->GetKind()) {
         case ETypeAnnotationKind::Flow:
@@ -239,7 +239,7 @@ bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExpr
         return false;
     }
 
-    if (!IsGoodTypesForPushdownCompare(*compare.Left().Ref().GetTypeAnn(), *compare.Right().Ref().GetTypeAnn())) {
+    if (!IsGoodTypesForPushdownCompare(*compare.Left().Ref().GetTypeAnn(), *compare.Right().Ref().GetTypeAnn(), allowOlapApply)) {
         return false;
     }
 
@@ -248,7 +248,7 @@ bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExpr
     YQL_ENSURE(leftList.size() == rightList.size(), "Different sizes of lists in comparison!");
 
     for (size_t i = 0; i < leftList.size(); ++i) {
-        if (!CheckExpressionNodeForPushdown(leftList[i], lambdaArg) || !CheckExpressionNodeForPushdown(rightList[i], lambdaArg)) {
+        if (!CheckExpressionNodeForPushdown(leftList[i], lambdaArg, allowOlapApply) || !CheckExpressionNodeForPushdown(rightList[i], lambdaArg, allowOlapApply)) {
             return false;
         }
     }
@@ -256,11 +256,11 @@ bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExpr
     return true;
 }
 
-bool CompareCanBePushed(const TCoCompare& compare, const TExprNode* lambdaArg, const TExprBase& lambdaBody) {
-    return IsSupportedPredicate(compare) && CheckComparisonParametersForPushdown(compare, lambdaArg, lambdaBody);
+bool CompareCanBePushed(const TCoCompare& compare, const TExprNode* lambdaArg, const TExprBase& lambdaBody, bool allowOlapApply) {
+    return IsSupportedPredicate(compare) && CheckComparisonParametersForPushdown(compare, lambdaArg, lambdaBody, allowOlapApply);
 }
 
-bool SafeCastCanBePushed(const TCoFlatMap& flatmap, const TExprNode* lambdaArg) {
+bool SafeCastCanBePushed(const TCoFlatMap& flatmap, const TExprNode* lambdaArg, bool allowOlapApply) {
     /*
      * There are three ways of comparison in following format:
      *
@@ -281,7 +281,7 @@ bool SafeCastCanBePushed(const TCoFlatMap& flatmap, const TExprNode* lambdaArg)
     YQL_ENSURE(leftList.size() == rightList.size(), "Different sizes of lists in comparison!");
 
     for (size_t i = 0; i < leftList.size(); ++i) {
-        if (!CheckExpressionNodeForPushdown(leftList[i], lambdaArg) || !CheckExpressionNodeForPushdown(rightList[i], lambdaArg)) {
+        if (!CheckExpressionNodeForPushdown(leftList[i], lambdaArg, allowOlapApply) || !CheckExpressionNodeForPushdown(rightList[i], lambdaArg, allowOlapApply)) {
             return false;
         }
     }
@@ -315,16 +315,16 @@ bool JsonExistsCanBePushed(const TCoJsonExists& jsonExists, const TExprNode* lam
     return true;
 }
 
-bool CoalesceCanBePushed(const TCoCoalesce& coalesce, const TExprNode* lambdaArg, const TExprBase& lambdaBody) {
+bool CoalesceCanBePushed(const TCoCoalesce& coalesce, const TExprNode* lambdaArg, const TExprBase& lambdaBody, bool allowOlapApply) {
     if (!coalesce.Value().Maybe<TCoBool>()) {
         return false;
     }
 
     const auto predicate = coalesce.Predicate();
     if (const auto maybeCompare = predicate.Maybe<TCoCompare>()) {
-        return CompareCanBePushed(maybeCompare.Cast(), lambdaArg, lambdaBody);
+        return CompareCanBePushed(maybeCompare.Cast(), lambdaArg, lambdaBody, allowOlapApply);
     } else if (const auto maybeFlatmap = predicate.Maybe<TCoFlatMap>()) {
-        return SafeCastCanBePushed(maybeFlatmap.Cast(), lambdaArg);
+        return SafeCastCanBePushed(maybeFlatmap.Cast(), lambdaArg, allowOlapApply);
     } else if (const auto maybeJsonExists = predicate.Maybe<TCoJsonExists>()) {
         return JsonExistsCanBePushed(maybeJsonExists.Cast(), lambdaArg);
     }
@@ -336,47 +336,42 @@ bool ExistsCanBePushed(const TCoExists& exists, const TExprNode* lambdaArg) {
     return IsMemberColumn(exists.Optional(), lambdaArg);
 }
 
-void CollectChildrenPredicates(const TExprNode& opNode, TOLAPPredicateNode& predicateTree, const TExprNode* lambdaArg, const TExprBase& lambdaBody) {
+void CollectChildrenPredicates(const TExprNode& opNode, TOLAPPredicateNode& predicateTree, const TExprNode* lambdaArg, const TExprBase& lambdaBody, bool allowOlapApply) {
     predicateTree.Children.reserve(opNode.ChildrenSize());
     predicateTree.CanBePushed = true;
     for (const auto& childNodePtr: opNode.Children()) {
         TOLAPPredicateNode child;
         child.ExprNode = childNodePtr;
         if (const auto maybeCtor = TMaybeNode<TCoDataCtor>(child.ExprNode))
-            child.CanBePushed = IsSupportedDataType(maybeCtor.Cast());
+            child.CanBePushed = IsSupportedDataType(maybeCtor.Cast(), allowOlapApply);
         else
-            CollectPredicates(TExprBase(child.ExprNode), child, lambdaArg, lambdaBody);
+            CollectPredicates(TExprBase(child.ExprNode), child, lambdaArg, lambdaBody, allowOlapApply);
         predicateTree.Children.emplace_back(child);
         predicateTree.CanBePushed &= child.CanBePushed;
     }
 }
 
-}
-
-void CollectPredicates(const TExprBase& predicate, TOLAPPredicateNode& predicateTree, const TExprNode* lambdaArg, const TExprBase& lambdaBody) {
-    if constexpr (NKikimr::NSsa::RuntimeVersion >= 5U) {
-        if (predicate.Maybe<TCoIf>() || predicate.Maybe<TCoJust>() || predicate.Maybe<TCoCoalesce>()) {
-            return CollectChildrenPredicates(predicate.Ref(), predicateTree, lambdaArg, lambdaBody);
-        }
-    }
+} // namespace
 
+void CollectPredicates(const TExprBase& predicate, TOLAPPredicateNode& predicateTree, const TExprNode* lambdaArg, const TExprBase& lambdaBody, bool allowOlapApply) {
     if (predicate.Maybe<TCoNot>() || predicate.Maybe<TCoAnd>() || predicate.Maybe<TCoOr>() || predicate.Maybe<TCoXor>()) {
-        return CollectChildrenPredicates(predicate.Ref(), predicateTree, lambdaArg, lambdaBody);
+        return CollectChildrenPredicates(predicate.Ref(), predicateTree, lambdaArg, lambdaBody, allowOlapApply);
     } else if (const auto maybeCoalesce = predicate.Maybe<TCoCoalesce>()) {
-        predicateTree.CanBePushed = CoalesceCanBePushed(maybeCoalesce.Cast(), lambdaArg, lambdaBody);
+        predicateTree.CanBePushed = CoalesceCanBePushed(maybeCoalesce.Cast(), lambdaArg, lambdaBody, allowOlapApply);
     } else if (const auto maybeCompare = predicate.Maybe<TCoCompare>()) {
-        predicateTree.CanBePushed = CompareCanBePushed(maybeCompare.Cast(), lambdaArg, lambdaBody);
+        predicateTree.CanBePushed = CompareCanBePushed(maybeCompare.Cast(), lambdaArg, lambdaBody, allowOlapApply);
     } else if (const auto maybeExists = predicate.Maybe<TCoExists>()) {
         predicateTree.CanBePushed = ExistsCanBePushed(maybeExists.Cast(), lambdaArg);
     } else if (const auto maybeJsonExists = predicate.Maybe<TCoJsonExists>()) {
         predicateTree.CanBePushed = JsonExistsCanBePushed(maybeJsonExists.Cast(), lambdaArg);
-    } else {
-        if constexpr (NKikimr::NSsa::RuntimeVersion >= 5U) {
-            predicateTree.CanBePushed = AbstractTreeCanBePushed(predicate, lambdaArg);
-        } else {
-            predicateTree.CanBePushed = false;
+    }
+    if (predicateTree.CanBePushed) {
+        return;
+    } else if (allowOlapApply){
+        if (predicate.Maybe<TCoIf>() || predicate.Maybe<TCoJust>() || predicate.Maybe<TCoCoalesce>()) {
+            return CollectChildrenPredicates(predicate.Ref(), predicateTree, lambdaArg, lambdaBody, allowOlapApply);
         }
+        predicateTree.CanBePushed =  AbstractTreeCanBePushed(predicate, lambdaArg);
     }
 }
-
-}
+} //namespace NKikimr::NKqp::NOpt
diff --git a/ydb/core/kqp/opt/physical/predicate_collector.h b/ydb/core/kqp/opt/physical/predicate_collector.h
index d6663c9d6a..f65ac2ebdd 100644
--- a/ydb/core/kqp/opt/physical/predicate_collector.h
+++ b/ydb/core/kqp/opt/physical/predicate_collector.h
@@ -15,6 +15,6 @@ struct TOLAPPredicateNode {
     }
 };
 
-void CollectPredicates(const NNodes::TExprBase& predicate, TOLAPPredicateNode& predicateTree, const TExprNode* lambdaArg, const NNodes::TExprBase& lambdaBody);
+void CollectPredicates(const NNodes::TExprBase& predicate, TOLAPPredicateNode& predicateTree, const TExprNode* lambdaArg, const NNodes::TExprBase& lambdaBody, bool allowOlapApply);
 
 }
diff --git a/ydb/core/kqp/ut/olap/aggregations_ut.cpp b/ydb/core/kqp/ut/olap/aggregations_ut.cpp
index a6910105f6..b842b9932a 100644
--- a/ydb/core/kqp/ut/olap/aggregations_ut.cpp
+++ b/ydb/core/kqp/ut/olap/aggregations_ut.cpp
@@ -1189,26 +1189,19 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) {
                 SELECT id, JSON_VALUE(jsonval, "$.col1"), JSON_VALUE(jsondoc, "$.col1") FROM `/Root/tableWithNulls`
                 WHERE JSON_VALUE(jsonval, "$.col1") = "val1" AND id = 1;
             )")
-#if SSA_RUNTIME_VERSION >= 5U
-            .AddExpectedPlanOptions("KqpOlapApply");
-#else
             .AddExpectedPlanOptions("KqpOlapJsonValue")
             .SetExpectedReply(R"([[1;["val1"];#]])");
-#endif
         TestTableWithNulls({testCase});
     }
 
+
     Y_UNIT_TEST(Json_GetValue_Minus) {
         TAggregationTestCase testCase;
         testCase.SetQuery(R"(
                 SELECT id, JSON_VALUE(jsonval, "$.'col-abc'"), JSON_VALUE(jsondoc, "$.'col-abc'") FROM `/Root/tableWithNulls`
                 WHERE JSON_VALUE(jsonval, "$.'col-abc'") = "val-abc" AND id = 1;
             )")
-#if SSA_RUNTIME_VERSION >= 5U
-            .AddExpectedPlanOptions("KqpOlapApply")
-#else
             .AddExpectedPlanOptions("KqpOlapJsonValue")
-#endif
             .SetExpectedReply(R"([[1;["val-abc"];#]])");
 
         TestTableWithNulls({testCase});
@@ -1220,11 +1213,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) {
                 SELECT id, JSON_VALUE(jsonval, "$.col1" RETURNING String), JSON_VALUE(jsondoc, "$.col1") FROM `/Root/tableWithNulls`
                 WHERE JSON_VALUE(jsonval, "$.col1" RETURNING String) = "val1" AND id = 1;
             )")
-#if SSA_RUNTIME_VERSION >= 5U
-            .AddExpectedPlanOptions("KqpOlapApply")
-#else
             .AddExpectedPlanOptions("KqpOlapJsonValue")
-#endif
             .SetExpectedReply(R"([[1;["val1"];#]])");
 
         TestTableWithNulls({ testCase });
@@ -1236,11 +1225,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) {
                 SELECT id, JSON_VALUE(jsonval, "$.obj.obj_col2_int" RETURNING Int), JSON_VALUE(jsondoc, "$.obj.obj_col2_int" RETURNING Int) FROM `/Root/tableWithNulls`
                 WHERE JSON_VALUE(jsonval, "$.obj.obj_col2_int" RETURNING Int) = 16 AND id = 1;
             )")
-#if SSA_RUNTIME_VERSION >= 5U
-            .AddExpectedPlanOptions("KqpOlapApply")
-#else
             .AddExpectedPlanOptions("KqpOlapJsonValue")
-#endif
             .SetExpectedReply(R"([[1;[16];#]])");
 
         TestTableWithNulls({ testCase });
@@ -1252,11 +1237,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) {
                 SELECT id, JSON_VALUE(jsonval, "$.col1"), JSON_VALUE(jsondoc, "$.col1") FROM `/Root/tableWithNulls`
                 WHERE JSON_VALUE(jsondoc, "$.col1") = "val1" AND id = 6;
             )")
-#if SSA_RUNTIME_VERSION >= 5U
-            .AddExpectedPlanOptions("KqpOlapApply")
-#else
             .AddExpectedPlanOptions("KqpOlapJsonValue")
-#endif
             .SetExpectedReply(R"([[6;#;["val1"]]])");
 
         TestTableWithNulls({ testCase });
@@ -1268,11 +1249,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) {
                 SELECT id, JSON_VALUE(jsonval, "$.col1"), JSON_VALUE(jsondoc, "$.col1" RETURNING String) FROM `/Root/tableWithNulls`
                 WHERE JSON_VALUE(jsondoc, "$.col1" RETURNING String) = "val1" AND id = 6;
             )")
-#if SSA_RUNTIME_VERSION >= 5U
-            .AddExpectedPlanOptions("KqpOlapApply")
-#else
             .AddExpectedPlanOptions("KqpOlapJsonValue")
-#endif
             .SetExpectedReply(R"([[6;#;["val1"]]])");
 
         TestTableWithNulls({ testCase });
@@ -1284,11 +1261,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) {
                 SELECT id, JSON_VALUE(jsonval, "$.obj.obj_col2_int"), JSON_VALUE(jsondoc, "$.obj.obj_col2_int" RETURNING Int) FROM `/Root/tableWithNulls`
                 WHERE JSON_VALUE(jsondoc, "$.obj.obj_col2_int" RETURNING Int) = 16 AND id = 6;
             )")
-#if SSA_RUNTIME_VERSION >= 5U
-            .AddExpectedPlanOptions("KqpOlapApply")
-#else
             .AddExpectedPlanOptions("KqpOlapJsonValue")
-#endif
             .SetExpectedReply(R"([[6;#;[16]]])");
 
         TestTableWithNulls({ testCase });
@@ -1301,11 +1274,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) {
                 WHERE
                     JSON_EXISTS(jsonval, "$.col1") AND level = 1;
             )")
-#if SSA_RUNTIME_VERSION >= 5U
-            .AddExpectedPlanOptions("KqpOlapApply")
-#else
             .AddExpectedPlanOptions("KqpOlapJsonExists")
-#endif
             .SetExpectedReply(R"([[1;[%true];#]])");
 
         TestTableWithNulls({ testCase });
@@ -1318,11 +1287,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) {
                 WHERE
                     JSON_EXISTS(jsondoc, "$.col1") AND id = 6;
             )")
-#if SSA_RUNTIME_VERSION >= 5U
-            .AddExpectedPlanOptions("KqpOlapApply")
-#else
             .AddExpectedPlanOptions("KqpOlapJsonExists")
-#endif
             .SetExpectedReply(R"([[6;#;[%true]]])");
 
         TestTableWithNulls({ testCase });
@@ -1343,6 +1308,26 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) {
         TestTableWithNulls({ testCase });
     }
 
+    Y_UNIT_TEST(MixedJsonAndOlapApply) {
+        TAggregationTestCase testCase;
+        //(R"({"col1": "val1", "col-abc": "val-abc", "obj": {"obj_col2_int": 16}})"
+        testCase.SetQuery(R"(
+                SELECT id, JSON_VALUE(jsonval, "$.\"col-abc\"") FROM `/Root/tableWithNulls`
+                WHERE id = 1
+                    AND  JSON_VALUE(jsonval, "$.col1") = "val1"
+                    AND  JSON_VALUE(jsonval, "$.\"col-abc\"") ilike "%A%b%"
+                    AND JSON_EXISTS(jsonval, "$.obj.obj_col2_int")
+
+            )")
+            .AddExpectedPlanOptions("KqpOlapJsonValue")
+            .AddExpectedPlanOptions("KqpOlapJsonExists")
+            .AddExpectedPlanOptions("KqpOlapApply")
+            .SetExpectedReply(R"([[1;["val-abc"]]])")
+        ;
+
+        TestTableWithNulls({testCase});
+    }
+
     Y_UNIT_TEST(BlockGenericWithDistinct) {
         TAggregationTestCase testCase;
         testCase.SetQuery(R"(
diff --git a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp
index 6116dbed18..61f8b0f38a 100644
--- a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp
+++ b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp
@@ -1452,9 +1452,15 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
         auto result = CollectStreamResult(it);
         auto ast = result.QueryStats->Getquery_ast();
         UNIT_ASSERT_C(ast.find(R"(('eq '"resource_id")") != std::string::npos,
-                          TStringBuilder() << "Predicate not pushed down. Query: " << query);
-        UNIT_ASSERT_C(ast.find(R"(('gt '"level")") == std::string::npos,
-                          TStringBuilder() << "Predicate pushed down. Query: " << query);
+                          TStringBuilder() << "Subpredicate is not pushed down. Query: " << query);
+        UNIT_ASSERT_C(ast.find(R"(('gt '"level")") != std::string::npos,
+                          TStringBuilder() << "Subpredicate is not pushed down. Query: " << query);
+        //This check is based on an assumpltion, that for pushed down predicates column names are preserved in AST
+        //But for non-pushed down predicates column names are (usually) replaced with a label, started with $. It' not a rule, but a heuristic
+        //So this check may require a correction when some ast optimization rules are changed
+        UNIT_ASSERT_C(ast.find(R"((Unwrap (/ $)") != std::string::npos, 
+                          TStringBuilder() << "Unsafe subpredicate is pushed down. Query: " << query);
+
         UNIT_ASSERT_C(ast.find("NarrowMap") != std::string::npos,
                           TStringBuilder() << "NarrowMap was removed. Query: " << query);
     }
diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40
index a87ce4a6ce..600be0f4a4 100644
--- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40
+++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40
@@ -107,7 +107,7 @@
                                                                                             }
                                                                                         ],
                                                                                         "Name": "Filter",
-                                                                                        "Predicate": "IsRefresh == 0 AND TraficSourceID == -1 OR TraficSourceID == 6 AND RefererHash == 3594120000172545465",
+                                                                                        "Predicate": "IsRefresh == 0 AND RefererHash == 3594120000172545465 AND TraficSourceID == -1 OR TraficSourceID == 6",
                                                                                         "Pushdown": "True"
                                                                                     },
                                                                                     {
@@ -169,7 +169,7 @@
                                                                                                             "Id": 108
                                                                                                         },
                                                                                                         "Constant": {
-                                                                                                            "Int32": -1
+                                                                                                            "Int64": 3594120000172545465
                                                                                                         }
                                                                                                     }
                                                                                                 },
@@ -181,7 +181,7 @@
                                                                                                         "Function": {
                                                                                                             "Arguments": [
                                                                                                                 {
-                                                                                                                    "Id": 38
+                                                                                                                    "Id": 103
                                                                                                                 },
                                                                                                                 {
                                                                                                                     "Id": 108
@@ -199,7 +199,7 @@
                                                                                                             "Id": 110
                                                                                                         },
                                                                                                         "Constant": {
-                                                                                                            "Int32": 6
+                                                                                                            "Int32": -1
                                                                                                         }
                                                                                                     }
                                                                                                 },
@@ -228,28 +228,28 @@
                                                                                                         "Column": {
                                                                                                             "Id": 112
                                                                                                         },
+                                                                                                        "Constant": {
+                                                                                                            "Int32": 6
+                                                                                                        }
+                                                                                                    }
+                                                                                                },
+                                                                                                {
+                                                                                                    "Assign": {
+                                                                                                        "Column": {
+                                                                                                            "Id": 113
+                                                                                                        },
                                                                                                         "Function": {
                                                                                                             "Arguments": [
                                                                                                                 {
-                                                                                                                    "Id": 109
+                                                                                                                    "Id": 38
                                                                                                                 },
                                                                                                                 {
-                                                                                                                    "Id": 111
+                                                                                                                    "Id": 112
                                                                                                                 }
                                                                                                             ],
                                                                                                             "FunctionType": 2,
                                                                                                             "KernelIdx": 3,
-                                                                                                            "YqlOperationId": 1
-                                                                                                        }
-                                                                                                    }
-                                                                                                },
-                                                                                                {
-                                                                                                    "Assign": {
-                                                                                                        "Column": {
-                                                                                                            "Id": 113
-                                                                                                        },
-                                                                                                        "Constant": {
-                                                                                                            "Int64": 3594120000172545465
+                                                                                                            "YqlOperationId": 11
                                                                                                         }
                                                                                                     }
                                                                                                 },
@@ -261,7 +261,7 @@
                                                                                                         "Function": {
                                                                                                             "Arguments": [
                                                                                                                 {
-                                                                                                                    "Id": 103
+                                                                                                                    "Id": 111
                                                                                                                 },
                                                                                                                 {
                                                                                                                     "Id": 113
@@ -269,7 +269,7 @@
                                                                                                             ],
                                                                                                             "FunctionType": 2,
                                                                                                             "KernelIdx": 4,
-                                                                                                            "YqlOperationId": 11
+                                                                                                            "YqlOperationId": 1
                                                                                                         }
                                                                                                     }
                                                                                                 },
@@ -281,7 +281,7 @@
                                                                                                         "Function": {
                                                                                                             "Arguments": [
                                                                                                                 {
-                                                                                                                    "Id": 112
+                                                                                                                    "Id": 109
                                                                                                                 },
                                                                                                                 {
                                                                                                                     "Id": 114
author	zverevgeny <zverevgeny@ydb.tech>	2025-03-04 09:19:31 +0300
committer	GitHub <noreply@github.com>	2025-03-04 09:19:31 +0300
commit	3a5cfede787f6bf6cc63627bf8bd30d9c0cfd4e8 (patch)
tree	3755a1eae363516c01b910f120ef03898fa0c194
parent	80dee287373544fb2fab855727ee54e1daa50e5a (diff)
download	ydb-3a5cfede787f6bf6cc63627bf8bd30d9c0cfd4e8.tar.gz