aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorzverevgeny <zverevgeny@ydb.tech>2025-03-04 09:19:31 +0300
committerGitHub <noreply@github.com>2025-03-04 09:19:31 +0300
commit3a5cfede787f6bf6cc63627bf8bd30d9c0cfd4e8 (patch)
tree3755a1eae363516c01b910f120ef03898fa0c194
parent80dee287373544fb2fab855727ee54e1daa50e5a (diff)
downloadydb-3a5cfede787f6bf6cc63627bf8bd30d9c0cfd4e8.tar.gz
Do not use OlapApply for known functions pushdown (#15055)
-rw-r--r--ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp206
-rw-r--r--ydb/core/kqp/opt/physical/predicate_collector.cpp89
-rw-r--r--ydb/core/kqp/opt/physical/predicate_collector.h2
-rw-r--r--ydb/core/kqp/ut/olap/aggregations_ut.cpp57
-rw-r--r--ydb/core/kqp/ut/olap/kqp_olap_ut.cpp12
-rw-r--r--ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-4040
6 files changed, 205 insertions, 201 deletions
diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp
index 77e9170c63..5ec99a88ef 100644
--- a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp
+++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp
@@ -24,6 +24,32 @@ static const std::unordered_set<std::string> SecondLevelFilters = {
"ends_with"
};
+static TMaybeNode<TExprBase> CombinePredicatesWithAnd(const TVector<TExprBase>& conjuncts, TExprContext& ctx, TPositionHandle pos, bool useOlapAnd, bool trueForEmpty) {
+ if (conjuncts.empty()) {
+ return trueForEmpty ? TMaybeNode<TExprBase>{MakeBool<true>(pos, ctx)} : TMaybeNode<TExprBase>{};
+ } else if (conjuncts.size() == 1) {
+ return conjuncts[0];
+ } else {
+ if (useOlapAnd) {
+ return Build<TKqpOlapAnd>(ctx, pos)
+ .Add(conjuncts)
+ .Done();
+ } else {
+ return Build<TCoAnd>(ctx, pos)
+ .Add(conjuncts)
+ .Done();
+ }
+ }
+}
+
+static TMaybeNode<TExprBase> CombinePredicatesWithAnd(const TVector<TOLAPPredicateNode>& conjuncts, TExprContext& ctx, TPositionHandle pos, bool useOlapAnd, bool trueForEmpty) {
+ TVector<TExprBase> exprs;
+ for(const auto& c: conjuncts) {
+ exprs.emplace_back(c.ExprNode);
+ }
+ return CombinePredicatesWithAnd(exprs, ctx, pos, useOlapAnd, trueForEmpty);
+}
+
struct TFilterOpsLevels {
TFilterOpsLevels(const TMaybeNode<TExprBase>& firstLevel, const TMaybeNode<TExprBase>& secondLevel)
: FirstLevelOps(firstLevel)
@@ -69,6 +95,23 @@ struct TFilterOpsLevels {
}
+ static TFilterOpsLevels Merge(TVector<TFilterOpsLevels> predicates, TExprContext& ctx, TPositionHandle pos) {
+ TVector<TExprBase> predicatesFirstLevel;
+ TVector<TExprBase> predicatesSecondLevel;
+ for (const auto& p: predicates) {
+ if (p.FirstLevelOps.IsValid()) {
+ predicatesFirstLevel.emplace_back(p.FirstLevelOps.Cast());
+ }
+ if (p.SecondLevelOps.IsValid()) {
+ predicatesSecondLevel.emplace_back(p.SecondLevelOps.Cast());
+ }
+ }
+ return {
+ CombinePredicatesWithAnd(predicatesFirstLevel, ctx, pos, true, false),
+ CombinePredicatesWithAnd(predicatesSecondLevel, ctx, pos, true, false),
+ };
+ }
+
TMaybeNode<TExprBase> FirstLevelOps;
TMaybeNode<TExprBase> SecondLevelOps;
};
@@ -262,7 +305,6 @@ TMaybeNode<TExprBase> SafeCastPredicatePushdown(const TCoFlatMap& inputFlatmap,
std::vector<TExprBase> ConvertComparisonNode(const TExprBase& nodeIn, const TExprNode& argument, TExprContext& ctx, TPositionHandle pos)
{
- std::vector<TExprBase> out;
const auto convertNode = [&ctx, &pos, &argument](const TExprBase& node) -> TMaybeNode<TExprBase> {
if (node.Maybe<TCoNull>()) {
return node;
@@ -368,36 +410,27 @@ std::vector<TExprBase> ConvertComparisonNode(const TExprBase& nodeIn, const TExp
}
};
- // Columns & values may be single element
- TMaybeNode<TExprBase> node = convertNode(nodeIn);
-
- if (node.IsValid()) {
- out.emplace_back(std::move(node.Cast()));
- return out;
- }
-
- // Or columns and values can be Tuple
- if (!nodeIn.Maybe<TExprList>()) {
- // something unusual found, return empty vector
- return out;
- }
+ if (const auto& list = nodeIn.Maybe<TExprList>()) {
+ const auto& tuple = list.Cast();
+ std::vector<TExprBase> out;
- auto tuple = nodeIn.Cast<TExprList>();
+ out.reserve(tuple.Size());
+ for (ui32 i = 0; i < tuple.Size(); ++i) {
+ TMaybeNode<TExprBase> node = convertNode(tuple.Item(i));
- out.reserve(tuple.Size());
-
- for (ui32 i = 0; i < tuple.Size(); ++i) {
- TMaybeNode<TExprBase> node = convertNode(tuple.Item(i));
+ if (!node.IsValid()) {
+ // Return empty vector
+ return TVector<TExprBase>();
+ }
- if (!node.IsValid()) {
- // Return empty vector
- return TVector<TExprBase>();
+ out.emplace_back(node.Cast());
}
-
- out.emplace_back(node.Cast());
+ return out;
+ } else if (const auto& node = convertNode(nodeIn); node.IsValid()) {
+ return {node.Cast()};
+ } else {
+ return {};
}
-
- return out;
}
TExprBase BuildOneElementComparison(const std::pair<TExprBase, TExprBase>& parameter, const TCoCompare& predicate,
@@ -663,50 +696,22 @@ TFilterOpsLevels PredicatePushdown(const TExprBase& predicate, const TExprNode&
return YqlApplyPushdown(predicate, argument, ctx);
}
-TOLAPPredicateNode WrapPredicates(const std::vector<TOLAPPredicateNode>& predicates, TExprContext& ctx, TPositionHandle pos) {
- if (predicates.empty()) {
- return {};
- }
-
- if (const auto predicatesSize = predicates.size(); 1U == predicatesSize) {
- return predicates.front();
- } else {
- TOLAPPredicateNode result;
- result.Children = predicates;
- result.CanBePushed = true;
-
- TVector<NNodes::TExprBase> exprNodes;
- exprNodes.reserve(predicatesSize);
- for (const auto& pred : predicates) {
- exprNodes.emplace_back(pred.ExprNode);
- result.CanBePushed &= pred.CanBePushed;
- }
- result.ExprNode = NNodes::Build<NNodes::TCoAnd>(ctx, pos)
- .Add(exprNodes)
- .Done().Ptr();
- return result;
- }
-}
-
-void SplitForPartialPushdown(const TOLAPPredicateNode& predicateTree, TOLAPPredicateNode& predicatesToPush, TOLAPPredicateNode& remainingPredicates,
- TExprContext& ctx, TPositionHandle pos)
+std::pair<TVector<TOLAPPredicateNode>, TVector<TOLAPPredicateNode>> SplitForPartialPushdown(const TOLAPPredicateNode& predicateTree)
{
if (predicateTree.CanBePushed) {
- predicatesToPush = predicateTree;
- remainingPredicates.ExprNode = MakeBool<true>(pos, ctx);
- return;
+ return {{predicateTree}, {}};
}
if (!TCoAnd::Match(predicateTree.ExprNode.Get())) {
// We can partially pushdown predicates from AND operator only.
// For OR operator we would need to have several read operators which is not acceptable.
// TODO: Add support for NOT(op1 OR op2), because it expands to (!op1 AND !op2).
- remainingPredicates = predicateTree;
- return;
+ return {{}, {predicateTree}};
}
bool isFoundNotStrictOp = false;
- std::vector<TOLAPPredicateNode> pushable, remaining;
+ TVector<TOLAPPredicateNode> pushable;
+ TVector<TOLAPPredicateNode> remaining;
for (const auto& predicate : predicateTree.Children) {
if (predicate.CanBePushed && !isFoundNotStrictOp) {
pushable.emplace_back(predicate);
@@ -717,8 +722,7 @@ void SplitForPartialPushdown(const TOLAPPredicateNode& predicateTree, TOLAPPredi
remaining.emplace_back(predicate);
}
}
- predicatesToPush = WrapPredicates(pushable, ctx, pos);
- remainingPredicates = WrapPredicates(remaining, ctx, pos);
+ return {pushable, remaining};
}
} // anonymous namespace end
@@ -742,6 +746,7 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz
}
const auto& lambda = flatmap.Lambda();
+ const auto& lambdaArg = lambda.Args().Arg(0).Ref();
YQL_CLOG(TRACE, ProviderKqp) << "Initial OLAP lambda: " << KqpExprToPrettyString(lambda, ctx);
@@ -754,55 +759,68 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz
auto predicate = optionaIf.Predicate();
auto value = optionaIf.Value();
- if constexpr (NSsa::RuntimeVersion >= 5U) {
- TExprNode::TPtr afterPeephole;
- bool hasNonDeterministicFunctions;
- if (const auto status = PeepHoleOptimizeNode(optionaIf.Ptr(), afterPeephole, ctx, typesCtx, nullptr, hasNonDeterministicFunctions);
- status != IGraphTransformer::TStatus::Ok) {
- YQL_CLOG(ERROR, ProviderKqp) << "Peephole OLAP failed." << Endl << ctx.IssueManager.GetIssues().ToString();
- return node;
- }
-
- const TCoIf simplified(std::move(afterPeephole));
- predicate = simplified.Predicate();
- value = simplified.ThenValue().Cast<TCoJust>().Input();
- }
-
TOLAPPredicateNode predicateTree;
predicateTree.ExprNode = predicate.Ptr();
- const auto& lambdaArg = lambda.Args().Arg(0).Ref();
- CollectPredicates(predicate, predicateTree, &lambdaArg, read.Process().Body());
+ CollectPredicates(predicate, predicateTree, &lambdaArg, read.Process().Body(), false);
YQL_ENSURE(predicateTree.IsValid(), "Collected OLAP predicates are invalid");
- TOLAPPredicateNode predicatesToPush, remainingPredicates;
- SplitForPartialPushdown(predicateTree, predicatesToPush, remainingPredicates, ctx, node.Pos());
- if (!predicatesToPush.IsValid()) {
- return node;
+ auto [pushable, remaining] = SplitForPartialPushdown(predicateTree);
+ TVector<TFilterOpsLevels> pushedPredicates;
+ for (const auto& p: pushable) {
+ pushedPredicates.emplace_back(PredicatePushdown(TExprBase(p.ExprNode), lambdaArg, ctx, node.Pos()));
}
- YQL_ENSURE(predicatesToPush.IsValid(), "Predicates to push is invalid");
- YQL_ENSURE(remainingPredicates.IsValid(), "Remaining predicates is invalid");
-
- const auto pushedFilters = PredicatePushdown(TExprBase(predicatesToPush.ExprNode), lambdaArg, ctx, node.Pos());
- // Temporary fix for https://st.yandex-team.ru/KIKIMR-22560
- // YQL_ENSURE(pushedFilters.IsValid(), "Pushed predicate should be always valid!");
-
- if (!pushedFilters.IsValid()) {
+ if constexpr (NSsa::RuntimeVersion >= 5U) {
+ TVector<TOLAPPredicateNode> remainingAfterApply;
+ for(const auto& p: remaining) {
+ const auto recoveredOptinalIfForNonPushedDownPredicates = Build<TCoOptionalIf>(ctx, node.Pos())
+ .Predicate(p.ExprNode)
+ .Value(value)
+ .Build();
+ TExprNode::TPtr afterPeephole;
+ bool hasNonDeterministicFunctions;
+ if (const auto status = PeepHoleOptimizeNode(recoveredOptinalIfForNonPushedDownPredicates.Value().Ptr(), afterPeephole, ctx, typesCtx, nullptr, hasNonDeterministicFunctions);
+ status != IGraphTransformer::TStatus::Ok) {
+ YQL_CLOG(ERROR, ProviderKqp) << "Peephole OLAP failed." << Endl << ctx.IssueManager.GetIssues().ToString();
+ return node;
+ }
+ const TCoIf simplified(std::move(afterPeephole));
+ predicate = simplified.Predicate();
+ value = simplified.ThenValue().Cast<TCoJust>().Input();
+
+ TOLAPPredicateNode predicateTree;
+ predicateTree.ExprNode = predicate.Ptr();
+ CollectPredicates(predicate, predicateTree, &lambdaArg, read.Process().Body(), true);
+ YQL_ENSURE(predicateTree.IsValid(), "Collected OLAP predicates are invalid");
+ auto [pushableWithApply, remaining] = SplitForPartialPushdown(predicateTree);
+ for (const auto& p: pushableWithApply) {
+ pushedPredicates.emplace_back(PredicatePushdown(TExprBase(p.ExprNode), lambdaArg, ctx, node.Pos()));
+ }
+ remainingAfterApply.insert(remainingAfterApply.end(), remaining.begin(), remaining.end());
+ }
+ remaining = std::move(remainingAfterApply);
+ }
+
+ if (pushedPredicates.empty()) {
return node;
}
+ const auto& pushedFilter = TFilterOpsLevels::Merge(pushedPredicates, ctx, node.Pos());
+
+ const auto remainingFilter = CombinePredicatesWithAnd(remaining, ctx, node.Pos(), false, true);
+
TMaybeNode<TExprBase> olapFilter;
- if (pushedFilters.FirstLevelOps.IsValid()) {
+ if (pushedFilter.FirstLevelOps.IsValid()) {
olapFilter = Build<TKqpOlapFilter>(ctx, node.Pos())
.Input(read.Process().Body())
- .Condition(pushedFilters.FirstLevelOps.Cast())
+ .Condition(pushedFilter.FirstLevelOps.Cast())
.Done();
}
- if (pushedFilters.SecondLevelOps.IsValid()) {
+ if (pushedFilter.SecondLevelOps.IsValid()) {
olapFilter = Build<TKqpOlapFilter>(ctx, node.Pos())
.Input(olapFilter.IsValid() ? olapFilter.Cast() : read.Process().Body())
- .Condition(pushedFilters.SecondLevelOps.Cast())
+ .Condition(pushedFilter.SecondLevelOps.Cast())
.Done();
}
@@ -846,7 +864,7 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz
.Args({"new_arg"})
.Body<TCoOptionalIf>()
.Predicate<TExprApplier>()
- .Apply(TExprBase(remainingPredicates.ExprNode))
+ .Apply(remainingFilter.Cast())
.With(lambda.Args().Arg(0), "new_arg")
.Build()
.Value<TExprApplier>()
diff --git a/ydb/core/kqp/opt/physical/predicate_collector.cpp b/ydb/core/kqp/opt/physical/predicate_collector.cpp
index bf17becf1f..5e911f1de1 100644
--- a/ydb/core/kqp/opt/physical/predicate_collector.cpp
+++ b/ydb/core/kqp/opt/physical/predicate_collector.cpp
@@ -15,7 +15,7 @@ bool IsSupportedPredicate(const TCoCompare& predicate) {
return !predicate.Ref().Content().starts_with("Aggr");
}
-bool IsSupportedDataType(const TCoDataCtor& node) {
+bool IsSupportedDataType(const TCoDataCtor& node, bool allowOlapApply) {
if (node.Maybe<TCoBool>() ||
node.Maybe<TCoFloat>() ||
node.Maybe<TCoDouble>() ||
@@ -36,7 +36,7 @@ bool IsSupportedDataType(const TCoDataCtor& node) {
return true;
}
- if constexpr (NKikimr::NSsa::RuntimeVersion >= 5U) {
+ if (allowOlapApply) {
if (node.Maybe<TCoDate32>() || node.Maybe<TCoDatetime64>() || node.Maybe<TCoTimestamp64>() || node.Maybe<TCoInterval64>()) {
return true;
}
@@ -86,17 +86,17 @@ bool IsMemberColumn(const TExprBase& node, const TExprNode* lambdaArg) {
return false;
}
-bool IsGoodTypeForArithmeticPushdown(const TTypeAnnotationNode& type) {
+bool IsGoodTypeForArithmeticPushdown(const TTypeAnnotationNode& type, bool allowOlapApply) {
const auto fatures = NUdf::GetDataTypeInfo(RemoveOptionality(type).Cast<TDataExprType>()->GetSlot()).Features;
return NUdf::EDataTypeFeatures::NumericType & fatures
- || (NKikimr::NSsa::RuntimeVersion >= 5U && (NUdf::EDataTypeFeatures::BigDateType & fatures) && !(NUdf::EDataTypeFeatures::TzDateType & fatures));
+ || (allowOlapApply && (NUdf::EDataTypeFeatures::BigDateType & fatures) && !(NUdf::EDataTypeFeatures::TzDateType & fatures));
}
-bool IsGoodTypeForComparsionPushdown(const TTypeAnnotationNode& type) {
+bool IsGoodTypeForComparsionPushdown(const TTypeAnnotationNode& type, bool allowOlapApply) {
const auto fatures = NUdf::GetDataTypeInfo(RemoveOptionality(type).Cast<TDataExprType>()->GetSlot()).Features;
return (NUdf::EDataTypeFeatures::CanCompare & fatures)
&& (((NUdf::EDataTypeFeatures::NumericType | NUdf::EDataTypeFeatures::StringType) & fatures) ||
- (NKikimr::NSsa::RuntimeVersion >= 5U && (NUdf::EDataTypeFeatures::BigDateType & fatures) && !(NUdf::EDataTypeFeatures::TzDateType & fatures)));
+ (allowOlapApply && (NUdf::EDataTypeFeatures::BigDateType & fatures) && !(NUdf::EDataTypeFeatures::TzDateType & fatures)));
}
[[maybe_unused]]
@@ -139,8 +139,8 @@ bool AbstractTreeCanBePushed(const TExprBase& expr, const TExprNode* ) {
return true;
}
-bool CheckExpressionNodeForPushdown(const TExprBase& node, const TExprNode* lambdaArg) {
- if constexpr (NKikimr::NSsa::RuntimeVersion >= 5U) {
+bool CheckExpressionNodeForPushdown(const TExprBase& node, const TExprNode* lambdaArg, bool allowOlapApply) {
+ if (allowOlapApply) {
if (node.Maybe<TCoJust>() || node.Maybe<TCoCoalesce>()) {
return true;
}
@@ -156,7 +156,7 @@ bool CheckExpressionNodeForPushdown(const TExprBase& node, const TExprNode* lamb
if (const auto maybeSafeCast = node.Maybe<TCoSafeCast>()) {
return IsSupportedCast(maybeSafeCast.Cast());
} else if (const auto maybeData = node.Maybe<TCoDataCtor>()) {
- return IsSupportedDataType(maybeData.Cast());
+ return IsSupportedDataType(maybeData.Cast(), allowOlapApply);
} else if (const auto maybeMember = node.Maybe<TCoMember>()) {
return IsMemberColumn(maybeMember.Cast(), lambdaArg);
} else if (const auto maybeJsonValue = node.Maybe<TCoJsonValue>()) {
@@ -167,20 +167,20 @@ bool CheckExpressionNodeForPushdown(const TExprBase& node, const TExprNode* lamb
}
if (const auto op = node.Maybe<TCoUnaryArithmetic>()) {
- return CheckExpressionNodeForPushdown(op.Cast().Arg(), lambdaArg) && IsGoodTypeForArithmeticPushdown(*op.Cast().Ref().GetTypeAnn());
+ return CheckExpressionNodeForPushdown(op.Cast().Arg(), lambdaArg, allowOlapApply) && IsGoodTypeForArithmeticPushdown(*op.Cast().Ref().GetTypeAnn(), allowOlapApply);
} else if (const auto op = node.Maybe<TCoBinaryArithmetic>()) {
- return CheckExpressionNodeForPushdown(op.Cast().Left(), lambdaArg) && CheckExpressionNodeForPushdown(op.Cast().Right(), lambdaArg)
- && IsGoodTypeForArithmeticPushdown(*op.Cast().Ref().GetTypeAnn()) && !op.Cast().Maybe<TCoAggrAdd>();
+ return CheckExpressionNodeForPushdown(op.Cast().Left(), lambdaArg, allowOlapApply) && CheckExpressionNodeForPushdown(op.Cast().Right(), lambdaArg, allowOlapApply)
+ && IsGoodTypeForArithmeticPushdown(*op.Cast().Ref().GetTypeAnn(), allowOlapApply) && !op.Cast().Maybe<TCoAggrAdd>();
}
- if constexpr (NKikimr::NSsa::RuntimeVersion >= 5U) {
+ if (allowOlapApply) {
return AbstractTreeCanBePushed(node, lambdaArg);
}
return false;
}
-bool IsGoodTypesForPushdownCompare(const TTypeAnnotationNode& typeOne, const TTypeAnnotationNode& typeTwo) {
+bool IsGoodTypesForPushdownCompare(const TTypeAnnotationNode& typeOne, const TTypeAnnotationNode& typeTwo, bool allowOlapApply) {
const auto& rawOne = RemoveOptionality(typeOne);
const auto& rawTwo = RemoveOptionality(typeTwo);
if (IsSameAnnotation(rawOne, rawTwo))
@@ -202,21 +202,21 @@ bool IsGoodTypesForPushdownCompare(const TTypeAnnotationNode& typeOne, const TTy
if (size != itemsTwo.size())
return false;
for (auto i = 0U; i < size; ++i) {
- if (!IsGoodTypesForPushdownCompare(*itemsOne[i], *itemsTwo[i])) {
+ if (!IsGoodTypesForPushdownCompare(*itemsOne[i], *itemsTwo[i], allowOlapApply)) {
return false;
}
}
return true;
}
case ETypeAnnotationKind::Data:
- return IsGoodTypeForComparsionPushdown(typeOne) && IsGoodTypeForComparsionPushdown(typeTwo);
+ return IsGoodTypeForComparsionPushdown(typeOne, allowOlapApply) && IsGoodTypeForComparsionPushdown(typeTwo, allowOlapApply);
default:
break;
}
return false;
}
-bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExprNode* lambdaArg, const TExprBase& input) {
+bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExprNode* lambdaArg, const TExprBase& input, bool allowOlapApply) {
const auto* inputType = input.Ref().GetTypeAnn();
switch (inputType->GetKind()) {
case ETypeAnnotationKind::Flow:
@@ -239,7 +239,7 @@ bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExpr
return false;
}
- if (!IsGoodTypesForPushdownCompare(*compare.Left().Ref().GetTypeAnn(), *compare.Right().Ref().GetTypeAnn())) {
+ if (!IsGoodTypesForPushdownCompare(*compare.Left().Ref().GetTypeAnn(), *compare.Right().Ref().GetTypeAnn(), allowOlapApply)) {
return false;
}
@@ -248,7 +248,7 @@ bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExpr
YQL_ENSURE(leftList.size() == rightList.size(), "Different sizes of lists in comparison!");
for (size_t i = 0; i < leftList.size(); ++i) {
- if (!CheckExpressionNodeForPushdown(leftList[i], lambdaArg) || !CheckExpressionNodeForPushdown(rightList[i], lambdaArg)) {
+ if (!CheckExpressionNodeForPushdown(leftList[i], lambdaArg, allowOlapApply) || !CheckExpressionNodeForPushdown(rightList[i], lambdaArg, allowOlapApply)) {
return false;
}
}
@@ -256,11 +256,11 @@ bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExpr
return true;
}
-bool CompareCanBePushed(const TCoCompare& compare, const TExprNode* lambdaArg, const TExprBase& lambdaBody) {
- return IsSupportedPredicate(compare) && CheckComparisonParametersForPushdown(compare, lambdaArg, lambdaBody);
+bool CompareCanBePushed(const TCoCompare& compare, const TExprNode* lambdaArg, const TExprBase& lambdaBody, bool allowOlapApply) {
+ return IsSupportedPredicate(compare) && CheckComparisonParametersForPushdown(compare, lambdaArg, lambdaBody, allowOlapApply);
}
-bool SafeCastCanBePushed(const TCoFlatMap& flatmap, const TExprNode* lambdaArg) {
+bool SafeCastCanBePushed(const TCoFlatMap& flatmap, const TExprNode* lambdaArg, bool allowOlapApply) {
/*
* There are three ways of comparison in following format:
*
@@ -281,7 +281,7 @@ bool SafeCastCanBePushed(const TCoFlatMap& flatmap, const TExprNode* lambdaArg)
YQL_ENSURE(leftList.size() == rightList.size(), "Different sizes of lists in comparison!");
for (size_t i = 0; i < leftList.size(); ++i) {
- if (!CheckExpressionNodeForPushdown(leftList[i], lambdaArg) || !CheckExpressionNodeForPushdown(rightList[i], lambdaArg)) {
+ if (!CheckExpressionNodeForPushdown(leftList[i], lambdaArg, allowOlapApply) || !CheckExpressionNodeForPushdown(rightList[i], lambdaArg, allowOlapApply)) {
return false;
}
}
@@ -315,16 +315,16 @@ bool JsonExistsCanBePushed(const TCoJsonExists& jsonExists, const TExprNode* lam
return true;
}
-bool CoalesceCanBePushed(const TCoCoalesce& coalesce, const TExprNode* lambdaArg, const TExprBase& lambdaBody) {
+bool CoalesceCanBePushed(const TCoCoalesce& coalesce, const TExprNode* lambdaArg, const TExprBase& lambdaBody, bool allowOlapApply) {
if (!coalesce.Value().Maybe<TCoBool>()) {
return false;
}
const auto predicate = coalesce.Predicate();
if (const auto maybeCompare = predicate.Maybe<TCoCompare>()) {
- return CompareCanBePushed(maybeCompare.Cast(), lambdaArg, lambdaBody);
+ return CompareCanBePushed(maybeCompare.Cast(), lambdaArg, lambdaBody, allowOlapApply);
} else if (const auto maybeFlatmap = predicate.Maybe<TCoFlatMap>()) {
- return SafeCastCanBePushed(maybeFlatmap.Cast(), lambdaArg);
+ return SafeCastCanBePushed(maybeFlatmap.Cast(), lambdaArg, allowOlapApply);
} else if (const auto maybeJsonExists = predicate.Maybe<TCoJsonExists>()) {
return JsonExistsCanBePushed(maybeJsonExists.Cast(), lambdaArg);
}
@@ -336,47 +336,42 @@ bool ExistsCanBePushed(const TCoExists& exists, const TExprNode* lambdaArg) {
return IsMemberColumn(exists.Optional(), lambdaArg);
}
-void CollectChildrenPredicates(const TExprNode& opNode, TOLAPPredicateNode& predicateTree, const TExprNode* lambdaArg, const TExprBase& lambdaBody) {
+void CollectChildrenPredicates(const TExprNode& opNode, TOLAPPredicateNode& predicateTree, const TExprNode* lambdaArg, const TExprBase& lambdaBody, bool allowOlapApply) {
predicateTree.Children.reserve(opNode.ChildrenSize());
predicateTree.CanBePushed = true;
for (const auto& childNodePtr: opNode.Children()) {
TOLAPPredicateNode child;
child.ExprNode = childNodePtr;
if (const auto maybeCtor = TMaybeNode<TCoDataCtor>(child.ExprNode))
- child.CanBePushed = IsSupportedDataType(maybeCtor.Cast());
+ child.CanBePushed = IsSupportedDataType(maybeCtor.Cast(), allowOlapApply);
else
- CollectPredicates(TExprBase(child.ExprNode), child, lambdaArg, lambdaBody);
+ CollectPredicates(TExprBase(child.ExprNode), child, lambdaArg, lambdaBody, allowOlapApply);
predicateTree.Children.emplace_back(child);
predicateTree.CanBePushed &= child.CanBePushed;
}
}
-}
-
-void CollectPredicates(const TExprBase& predicate, TOLAPPredicateNode& predicateTree, const TExprNode* lambdaArg, const TExprBase& lambdaBody) {
- if constexpr (NKikimr::NSsa::RuntimeVersion >= 5U) {
- if (predicate.Maybe<TCoIf>() || predicate.Maybe<TCoJust>() || predicate.Maybe<TCoCoalesce>()) {
- return CollectChildrenPredicates(predicate.Ref(), predicateTree, lambdaArg, lambdaBody);
- }
- }
+} // namespace
+void CollectPredicates(const TExprBase& predicate, TOLAPPredicateNode& predicateTree, const TExprNode* lambdaArg, const TExprBase& lambdaBody, bool allowOlapApply) {
if (predicate.Maybe<TCoNot>() || predicate.Maybe<TCoAnd>() || predicate.Maybe<TCoOr>() || predicate.Maybe<TCoXor>()) {
- return CollectChildrenPredicates(predicate.Ref(), predicateTree, lambdaArg, lambdaBody);
+ return CollectChildrenPredicates(predicate.Ref(), predicateTree, lambdaArg, lambdaBody, allowOlapApply);
} else if (const auto maybeCoalesce = predicate.Maybe<TCoCoalesce>()) {
- predicateTree.CanBePushed = CoalesceCanBePushed(maybeCoalesce.Cast(), lambdaArg, lambdaBody);
+ predicateTree.CanBePushed = CoalesceCanBePushed(maybeCoalesce.Cast(), lambdaArg, lambdaBody, allowOlapApply);
} else if (const auto maybeCompare = predicate.Maybe<TCoCompare>()) {
- predicateTree.CanBePushed = CompareCanBePushed(maybeCompare.Cast(), lambdaArg, lambdaBody);
+ predicateTree.CanBePushed = CompareCanBePushed(maybeCompare.Cast(), lambdaArg, lambdaBody, allowOlapApply);
} else if (const auto maybeExists = predicate.Maybe<TCoExists>()) {
predicateTree.CanBePushed = ExistsCanBePushed(maybeExists.Cast(), lambdaArg);
} else if (const auto maybeJsonExists = predicate.Maybe<TCoJsonExists>()) {
predicateTree.CanBePushed = JsonExistsCanBePushed(maybeJsonExists.Cast(), lambdaArg);
- } else {
- if constexpr (NKikimr::NSsa::RuntimeVersion >= 5U) {
- predicateTree.CanBePushed = AbstractTreeCanBePushed(predicate, lambdaArg);
- } else {
- predicateTree.CanBePushed = false;
+ }
+ if (predicateTree.CanBePushed) {
+ return;
+ } else if (allowOlapApply){
+ if (predicate.Maybe<TCoIf>() || predicate.Maybe<TCoJust>() || predicate.Maybe<TCoCoalesce>()) {
+ return CollectChildrenPredicates(predicate.Ref(), predicateTree, lambdaArg, lambdaBody, allowOlapApply);
}
+ predicateTree.CanBePushed = AbstractTreeCanBePushed(predicate, lambdaArg);
}
}
-
-}
+} //namespace NKikimr::NKqp::NOpt
diff --git a/ydb/core/kqp/opt/physical/predicate_collector.h b/ydb/core/kqp/opt/physical/predicate_collector.h
index d6663c9d6a..f65ac2ebdd 100644
--- a/ydb/core/kqp/opt/physical/predicate_collector.h
+++ b/ydb/core/kqp/opt/physical/predicate_collector.h
@@ -15,6 +15,6 @@ struct TOLAPPredicateNode {
}
};
-void CollectPredicates(const NNodes::TExprBase& predicate, TOLAPPredicateNode& predicateTree, const TExprNode* lambdaArg, const NNodes::TExprBase& lambdaBody);
+void CollectPredicates(const NNodes::TExprBase& predicate, TOLAPPredicateNode& predicateTree, const TExprNode* lambdaArg, const NNodes::TExprBase& lambdaBody, bool allowOlapApply);
}
diff --git a/ydb/core/kqp/ut/olap/aggregations_ut.cpp b/ydb/core/kqp/ut/olap/aggregations_ut.cpp
index a6910105f6..b842b9932a 100644
--- a/ydb/core/kqp/ut/olap/aggregations_ut.cpp
+++ b/ydb/core/kqp/ut/olap/aggregations_ut.cpp
@@ -1189,26 +1189,19 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) {
SELECT id, JSON_VALUE(jsonval, "$.col1"), JSON_VALUE(jsondoc, "$.col1") FROM `/Root/tableWithNulls`
WHERE JSON_VALUE(jsonval, "$.col1") = "val1" AND id = 1;
)")
-#if SSA_RUNTIME_VERSION >= 5U
- .AddExpectedPlanOptions("KqpOlapApply");
-#else
.AddExpectedPlanOptions("KqpOlapJsonValue")
.SetExpectedReply(R"([[1;["val1"];#]])");
-#endif
TestTableWithNulls({testCase});
}
+
Y_UNIT_TEST(Json_GetValue_Minus) {
TAggregationTestCase testCase;
testCase.SetQuery(R"(
SELECT id, JSON_VALUE(jsonval, "$.'col-abc'"), JSON_VALUE(jsondoc, "$.'col-abc'") FROM `/Root/tableWithNulls`
WHERE JSON_VALUE(jsonval, "$.'col-abc'") = "val-abc" AND id = 1;
)")
-#if SSA_RUNTIME_VERSION >= 5U
- .AddExpectedPlanOptions("KqpOlapApply")
-#else
.AddExpectedPlanOptions("KqpOlapJsonValue")
-#endif
.SetExpectedReply(R"([[1;["val-abc"];#]])");
TestTableWithNulls({testCase});
@@ -1220,11 +1213,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) {
SELECT id, JSON_VALUE(jsonval, "$.col1" RETURNING String), JSON_VALUE(jsondoc, "$.col1") FROM `/Root/tableWithNulls`
WHERE JSON_VALUE(jsonval, "$.col1" RETURNING String) = "val1" AND id = 1;
)")
-#if SSA_RUNTIME_VERSION >= 5U
- .AddExpectedPlanOptions("KqpOlapApply")
-#else
.AddExpectedPlanOptions("KqpOlapJsonValue")
-#endif
.SetExpectedReply(R"([[1;["val1"];#]])");
TestTableWithNulls({ testCase });
@@ -1236,11 +1225,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) {
SELECT id, JSON_VALUE(jsonval, "$.obj.obj_col2_int" RETURNING Int), JSON_VALUE(jsondoc, "$.obj.obj_col2_int" RETURNING Int) FROM `/Root/tableWithNulls`
WHERE JSON_VALUE(jsonval, "$.obj.obj_col2_int" RETURNING Int) = 16 AND id = 1;
)")
-#if SSA_RUNTIME_VERSION >= 5U
- .AddExpectedPlanOptions("KqpOlapApply")
-#else
.AddExpectedPlanOptions("KqpOlapJsonValue")
-#endif
.SetExpectedReply(R"([[1;[16];#]])");
TestTableWithNulls({ testCase });
@@ -1252,11 +1237,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) {
SELECT id, JSON_VALUE(jsonval, "$.col1"), JSON_VALUE(jsondoc, "$.col1") FROM `/Root/tableWithNulls`
WHERE JSON_VALUE(jsondoc, "$.col1") = "val1" AND id = 6;
)")
-#if SSA_RUNTIME_VERSION >= 5U
- .AddExpectedPlanOptions("KqpOlapApply")
-#else
.AddExpectedPlanOptions("KqpOlapJsonValue")
-#endif
.SetExpectedReply(R"([[6;#;["val1"]]])");
TestTableWithNulls({ testCase });
@@ -1268,11 +1249,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) {
SELECT id, JSON_VALUE(jsonval, "$.col1"), JSON_VALUE(jsondoc, "$.col1" RETURNING String) FROM `/Root/tableWithNulls`
WHERE JSON_VALUE(jsondoc, "$.col1" RETURNING String) = "val1" AND id = 6;
)")
-#if SSA_RUNTIME_VERSION >= 5U
- .AddExpectedPlanOptions("KqpOlapApply")
-#else
.AddExpectedPlanOptions("KqpOlapJsonValue")
-#endif
.SetExpectedReply(R"([[6;#;["val1"]]])");
TestTableWithNulls({ testCase });
@@ -1284,11 +1261,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) {
SELECT id, JSON_VALUE(jsonval, "$.obj.obj_col2_int"), JSON_VALUE(jsondoc, "$.obj.obj_col2_int" RETURNING Int) FROM `/Root/tableWithNulls`
WHERE JSON_VALUE(jsondoc, "$.obj.obj_col2_int" RETURNING Int) = 16 AND id = 6;
)")
-#if SSA_RUNTIME_VERSION >= 5U
- .AddExpectedPlanOptions("KqpOlapApply")
-#else
.AddExpectedPlanOptions("KqpOlapJsonValue")
-#endif
.SetExpectedReply(R"([[6;#;[16]]])");
TestTableWithNulls({ testCase });
@@ -1301,11 +1274,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) {
WHERE
JSON_EXISTS(jsonval, "$.col1") AND level = 1;
)")
-#if SSA_RUNTIME_VERSION >= 5U
- .AddExpectedPlanOptions("KqpOlapApply")
-#else
.AddExpectedPlanOptions("KqpOlapJsonExists")
-#endif
.SetExpectedReply(R"([[1;[%true];#]])");
TestTableWithNulls({ testCase });
@@ -1318,11 +1287,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) {
WHERE
JSON_EXISTS(jsondoc, "$.col1") AND id = 6;
)")
-#if SSA_RUNTIME_VERSION >= 5U
- .AddExpectedPlanOptions("KqpOlapApply")
-#else
.AddExpectedPlanOptions("KqpOlapJsonExists")
-#endif
.SetExpectedReply(R"([[6;#;[%true]]])");
TestTableWithNulls({ testCase });
@@ -1343,6 +1308,26 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) {
TestTableWithNulls({ testCase });
}
+ Y_UNIT_TEST(MixedJsonAndOlapApply) {
+ TAggregationTestCase testCase;
+ //(R"({"col1": "val1", "col-abc": "val-abc", "obj": {"obj_col2_int": 16}})"
+ testCase.SetQuery(R"(
+ SELECT id, JSON_VALUE(jsonval, "$.\"col-abc\"") FROM `/Root/tableWithNulls`
+ WHERE id = 1
+ AND JSON_VALUE(jsonval, "$.col1") = "val1"
+ AND JSON_VALUE(jsonval, "$.\"col-abc\"") ilike "%A%b%"
+ AND JSON_EXISTS(jsonval, "$.obj.obj_col2_int")
+
+ )")
+ .AddExpectedPlanOptions("KqpOlapJsonValue")
+ .AddExpectedPlanOptions("KqpOlapJsonExists")
+ .AddExpectedPlanOptions("KqpOlapApply")
+ .SetExpectedReply(R"([[1;["val-abc"]]])")
+ ;
+
+ TestTableWithNulls({testCase});
+ }
+
Y_UNIT_TEST(BlockGenericWithDistinct) {
TAggregationTestCase testCase;
testCase.SetQuery(R"(
diff --git a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp
index 6116dbed18..61f8b0f38a 100644
--- a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp
+++ b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp
@@ -1452,9 +1452,15 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
auto result = CollectStreamResult(it);
auto ast = result.QueryStats->Getquery_ast();
UNIT_ASSERT_C(ast.find(R"(('eq '"resource_id")") != std::string::npos,
- TStringBuilder() << "Predicate not pushed down. Query: " << query);
- UNIT_ASSERT_C(ast.find(R"(('gt '"level")") == std::string::npos,
- TStringBuilder() << "Predicate pushed down. Query: " << query);
+ TStringBuilder() << "Subpredicate is not pushed down. Query: " << query);
+ UNIT_ASSERT_C(ast.find(R"(('gt '"level")") != std::string::npos,
+ TStringBuilder() << "Subpredicate is not pushed down. Query: " << query);
+ //This check is based on an assumpltion, that for pushed down predicates column names are preserved in AST
+ //But for non-pushed down predicates column names are (usually) replaced with a label, started with $. It' not a rule, but a heuristic
+ //So this check may require a correction when some ast optimization rules are changed
+ UNIT_ASSERT_C(ast.find(R"((Unwrap (/ $)") != std::string::npos,
+ TStringBuilder() << "Unsafe subpredicate is pushed down. Query: " << query);
+
UNIT_ASSERT_C(ast.find("NarrowMap") != std::string::npos,
TStringBuilder() << "NarrowMap was removed. Query: " << query);
}
diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40
index a87ce4a6ce..600be0f4a4 100644
--- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40
+++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40
@@ -107,7 +107,7 @@
}
],
"Name": "Filter",
- "Predicate": "IsRefresh == 0 AND TraficSourceID == -1 OR TraficSourceID == 6 AND RefererHash == 3594120000172545465",
+ "Predicate": "IsRefresh == 0 AND RefererHash == 3594120000172545465 AND TraficSourceID == -1 OR TraficSourceID == 6",
"Pushdown": "True"
},
{
@@ -169,7 +169,7 @@
"Id": 108
},
"Constant": {
- "Int32": -1
+ "Int64": 3594120000172545465
}
}
},
@@ -181,7 +181,7 @@
"Function": {
"Arguments": [
{
- "Id": 38
+ "Id": 103
},
{
"Id": 108
@@ -199,7 +199,7 @@
"Id": 110
},
"Constant": {
- "Int32": 6
+ "Int32": -1
}
}
},
@@ -228,28 +228,28 @@
"Column": {
"Id": 112
},
+ "Constant": {
+ "Int32": 6
+ }
+ }
+ },
+ {
+ "Assign": {
+ "Column": {
+ "Id": 113
+ },
"Function": {
"Arguments": [
{
- "Id": 109
+ "Id": 38
},
{
- "Id": 111
+ "Id": 112
}
],
"FunctionType": 2,
"KernelIdx": 3,
- "YqlOperationId": 1
- }
- }
- },
- {
- "Assign": {
- "Column": {
- "Id": 113
- },
- "Constant": {
- "Int64": 3594120000172545465
+ "YqlOperationId": 11
}
}
},
@@ -261,7 +261,7 @@
"Function": {
"Arguments": [
{
- "Id": 103
+ "Id": 111
},
{
"Id": 113
@@ -269,7 +269,7 @@
],
"FunctionType": 2,
"KernelIdx": 4,
- "YqlOperationId": 11
+ "YqlOperationId": 1
}
}
},
@@ -281,7 +281,7 @@
"Function": {
"Arguments": [
{
- "Id": 112
+ "Id": 109
},
{
"Id": 114