diff options
author | Tony-Romanov <150126326+Tony-Romanov@users.noreply.github.com> | 2024-01-23 19:10:55 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-23 19:10:55 +0100 |
commit | de0a267b1e4f45b380c51818d82e0a2d98267a71 (patch) | |
tree | 94d23529859967dd67977dc45982c4bda8242ca0 | |
parent | bc9a0eeb1457f6fd82c25eaab5b9878598509909 (diff) | |
download | ydb-de0a267b1e4f45b380c51818d82e0a2d98267a71.tar.gz |
Check types for pushdown on OLAP level. (#1144)
* Check types for pushdown on OLAP level.
* Update test.
-rw-r--r-- | ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp | 119 | ||||
-rw-r--r-- | ydb/core/kqp/ut/olap/kqp_olap_ut.cpp | 47 | ||||
-rw-r--r-- | ydb/core/kqp/ut/query/kqp_explain_ut.cpp | 2 | ||||
-rw-r--r-- | ydb/library/yql/providers/common/pushdown/collection.cpp | 13 | ||||
-rw-r--r-- | ydb/library/yql/providers/common/pushdown/settings.h | 1 |
5 files changed, 135 insertions, 47 deletions
diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp index 1b5bec6be4..f65324c621 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp @@ -33,7 +33,7 @@ struct TPushdownSettings : public NPushdown::TSettings { Enable(EFlag::LikeOperator, NSsa::RuntimeVersion >= 2U); Enable(EFlag::LikeOperatorOnlyForUtf8, NSsa::RuntimeVersion < 3U); Enable(EFlag::JsonQueryOperators | EFlag::JsonExistsOperator, NSsa::RuntimeVersion >= 3U); - Enable(EFlag::ArithmeticalExpressions | EFlag::UnaryOperators, NSsa::RuntimeVersion >= 4U); + Enable(EFlag::ArithmeticalExpressions | EFlag::UnaryOperators | EFlag::DoNotCheckCompareArgumentsTypes, NSsa::RuntimeVersion >= 4U); Enable(EFlag::LogicalXorOperator | EFlag::ParameterExpression | EFlag::CastExpression @@ -61,7 +61,7 @@ struct TFilterOpsLevels { } } - bool IsValid() { + bool IsValid() const { return FirstLevelOps.IsValid() || SecondLevelOps.IsValid(); } @@ -152,10 +152,6 @@ TMaybeNode<TExprBase> YqlCoalescePushdown(const TCoCoalesce& coalesce, TExprCont return NullNode; } -bool IsGoodTypeForPushdown(const TTypeAnnotationNode& type) { - return NUdf::EDataTypeFeatures::IntegralType & NUdf::GetDataTypeInfo(RemoveOptionality(type).Cast<TDataExprType>()->GetSlot()).Features; -} - std::vector<TExprBase> ConvertComparisonNode(const TExprBase& nodeIn, TExprContext& ctx, TPositionHandle pos) { std::vector<TExprBase> out; @@ -203,27 +199,25 @@ std::vector<TExprBase> ConvertComparisonNode(const TExprBase& nodeIn, TExprConte if constexpr (NKikimr::NSsa::RuntimeVersion >= 4U) { if (const auto maybeArithmetic = node.Maybe<TCoBinaryArithmetic>()) { - if (const auto arithmetic = maybeArithmetic.Cast(); IsGoodTypeForPushdown(*arithmetic.Ref().GetTypeAnn()) && !arithmetic.Maybe<TCoAggrAdd>()) { - if (const auto params = ExtractBinaryFunctionParameters(arithmetic, ctx, pos)) { - return Build<TKqpOlapFilterBinaryOp>(ctx, pos) - .Operator().Value(arithmetic.Ref().Content(), TNodeFlags::Default).Build() - .Left(params->first) - .Right(params->second) - .Done(); - } + const auto arithmetic = maybeArithmetic.Cast(); + if (const auto params = ExtractBinaryFunctionParameters(arithmetic, ctx, pos)) { + return Build<TKqpOlapFilterBinaryOp>(ctx, pos) + .Operator().Value(arithmetic.Ref().Content(), TNodeFlags::Default).Build() + .Left(params->first) + .Right(params->second) + .Done(); } } if (const auto maybeArithmetic = node.Maybe<TCoUnaryArithmetic>()) { - if (const auto arithmetic = maybeArithmetic.Cast(); IsGoodTypeForPushdown(*arithmetic.Ref().GetTypeAnn())) { - if (const auto params = ConvertComparisonNode(arithmetic.Arg(), ctx, pos); 1U == params.size()) { - TString oper(arithmetic.Ref().Content()); - YQL_ENSURE(oper.to_lower()); - return Build<TKqpOlapFilterUnaryOp>(ctx, pos) - .Operator().Value(oper, TNodeFlags::Default).Build() - .Arg(params.front()) - .Done(); - } + const auto arithmetic = maybeArithmetic.Cast(); + if (const auto params = ConvertComparisonNode(arithmetic.Arg(), ctx, pos); 1U == params.size()) { + TString oper(arithmetic.Ref().Content()); + YQL_ENSURE(oper.to_lower()); + return Build<TKqpOlapFilterUnaryOp>(ctx, pos) + .Operator().Value(oper, TNodeFlags::Default).Build() + .Arg(params.front()) + .Done(); } } @@ -653,6 +647,80 @@ void SplitForPartialPushdown(const NPushdown::TPredicateNode& predicateTree, NPu remainingPredicates.SetPredicates(remaining, ctx, pos); } +bool IsGoodTypeForPushdown(const TTypeAnnotationNode& type) { + return NUdf::EDataTypeFeatures::IntegralType & NUdf::GetDataTypeInfo(RemoveOptionality(type).Cast<TDataExprType>()->GetSlot()).Features; +} + +bool IsGoodTypesForPushdownCompare(const TTypeAnnotationNode& typeOne, const TTypeAnnotationNode& typeTwo) { + const auto& rawOne = RemoveOptionality(typeOne); + const auto& rawTwo = RemoveOptionality(typeTwo); + if (IsSameAnnotation(rawOne, rawTwo)) + return true; + + const auto kindOne = rawOne.GetKind(); + const auto kindTwo = rawTwo.GetKind(); + if (ETypeAnnotationKind::Null == kindOne || ETypeAnnotationKind::Null == kindTwo) + return true; + + if (kindTwo != kindOne) + return false; + + switch (kindOne) { + case ETypeAnnotationKind::Tuple: { + const auto& itemsOne = rawOne.Cast<TTupleExprType>()->GetItems(); + const auto& itemsTwo = rawTwo.Cast<TTupleExprType>()->GetItems(); + const auto size = itemsOne.size(); + if (size != itemsTwo.size()) + return false; + for (auto i = 0U; i < size; ++i) { + if (!IsGoodTypesForPushdownCompare(*itemsOne[i], *itemsTwo[i])) { + return false; + } + } + return true; + } + case ETypeAnnotationKind::Data: { + const auto fOne = NUdf::GetDataTypeInfo(rawOne.Cast<TDataExprType>()->GetSlot()).Features; + const auto fTwo = NUdf::GetDataTypeInfo(rawTwo.Cast<TDataExprType>()->GetSlot()).Features; + return ((NUdf::EDataTypeFeatures::NumericType | NUdf::EDataTypeFeatures::StringType) & fOne) && (NUdf::EDataTypeFeatures::CanCompare & fOne) + && ((NUdf::EDataTypeFeatures::NumericType | NUdf::EDataTypeFeatures::StringType) & fTwo) && (NUdf::EDataTypeFeatures::CanCompare & fTwo); + } + default: break; + } + return false; +} + +bool IsGoodNodeForPushdown(const TExprBase& node) { + if (const auto maybeCompare = node.Maybe<TCoCompare>()) { + const auto compare = maybeCompare.Cast(); + return IsGoodTypesForPushdownCompare(*compare.Left().Ref().GetTypeAnn(), *compare.Right().Ref().GetTypeAnn()) + && IsGoodNodeForPushdown(compare.Left()) && IsGoodNodeForPushdown(compare.Right()); + } else if (const auto maybeUnaryOp = node.Maybe<TCoUnaryArithmetic>()) { + return IsGoodTypeForPushdown(*node.Ref().GetTypeAnn()) && IsGoodNodeForPushdown(maybeUnaryOp.Cast().Arg()); + } else if (const auto maybeBinaryOp = node.Maybe<TCoBinaryArithmetic>()) { + const auto binaryOp = maybeBinaryOp.Cast(); + return IsGoodTypeForPushdown(*binaryOp.Ref().GetTypeAnn()) && !binaryOp.Maybe<TCoAggrAdd>() + && IsGoodNodeForPushdown(binaryOp.Left()) && IsGoodNodeForPushdown(binaryOp.Right()); + } else if (const auto maybeCoalesce = node.Maybe<TCoCoalesce>()) { + const auto coalesce = maybeCoalesce.Cast(); + return IsGoodNodeForPushdown(coalesce.Predicate()) && IsGoodNodeForPushdown(coalesce.Value()); + } + + return true; +} + +void UpdatePushableFlagWithOlapSpecific(NPushdown::TPredicateNode& tree) { + if constexpr (NSsa::RuntimeVersion < 4U) + return; + + std::for_each(tree.Children.begin(), tree.Children.end(), std::bind(&UpdatePushableFlagWithOlapSpecific, std::placeholders::_1)); + tree.CanBePushed = tree.CanBePushed && std::all_of(tree.Children.cbegin(), tree.Children.cend(), [](const NPushdown::TPredicateNode& node) { return node.CanBePushed; }); + + if (tree.CanBePushed && NPushdown::EBoolOp::Undefined == tree.Op) { + tree.CanBePushed = IsGoodNodeForPushdown(tree.ExprNode.Cast()); + } +} + } // anonymous namespace end TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx, @@ -685,10 +753,11 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz return node; } - auto optionalIf = maybeOptionalIf.Cast(); + const auto optionalIf = maybeOptionalIf.Cast(); NPushdown::TPredicateNode predicateTree(optionalIf.Predicate()); CollectPredicates(optionalIf.Predicate(), predicateTree, lambdaArg, read.Process().Body(), TPushdownSettings()); YQL_ENSURE(predicateTree.IsValid(), "Collected OLAP predicates are invalid"); + UpdatePushableFlagWithOlapSpecific(predicateTree); NPushdown::TPredicateNode predicatesToPush; NPushdown::TPredicateNode remainingPredicates; @@ -700,7 +769,7 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz YQL_ENSURE(predicatesToPush.IsValid(), "Predicates to push is invalid"); YQL_ENSURE(remainingPredicates.IsValid(), "Remaining predicates is invalid"); - auto pushedFilters = PredicatePushdown(predicatesToPush.ExprNode.Cast(), ctx, node.Pos()); + const auto pushedFilters = PredicatePushdown(predicatesToPush.ExprNode.Cast(), ctx, node.Pos()); YQL_ENSURE(pushedFilters.IsValid(), "Pushed predicate should be always valid!"); TMaybeNode<TExprBase> olapFilter; diff --git a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp index 66781c6dfa..adff2eb29e 100644 --- a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp +++ b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp @@ -1595,12 +1595,12 @@ Y_UNIT_TEST_SUITE(KqpOlap) { R"(`level` % 3 != 1)", R"(-`level` < -2)", R"(Abs(`level` - 3) >= 1)", - R"(LENGTH(`message`) > 1037U)", - R"(LENGTH(`uid`) > 1U OR `resource_id` = "10001")", - R"((LENGTH(`uid`) > 2U AND `resource_id` = "10001") OR `resource_id` = "10002")", - R"((LENGTH(`uid`) > 3U OR `resource_id` = "10002") AND (LENGTH(`uid`) < 15 OR `resource_id` = "10001"))", - R"(NOT(LENGTH(`uid`) > 0U AND `resource_id` = "10001"))", - R"(NOT(LENGTH(`uid`) > 0U OR `resource_id` = "10001"))", + R"(LENGTH(`message`) > 1037)", + R"(LENGTH(`uid`) > 1 OR `resource_id` = "10001")", + R"((LENGTH(`uid`) > 2 AND `resource_id` = "10001") OR `resource_id` = "10002")", + R"((LENGTH(`uid`) > 3 OR `resource_id` = "10002") AND (LENGTH(`uid`) < 15 OR `resource_id` = "10001"))", + R"(NOT(LENGTH(`uid`) > 0 AND `resource_id` = "10001"))", + R"(NOT(LENGTH(`uid`) > 0 OR `resource_id` = "10001"))", R"(`level` IS NULL OR `message` IS NULL)", R"(`level` IS NOT NULL AND `message` IS NULL)", R"(`level` IS NULL AND `message` IS NOT NULL)", @@ -1667,14 +1667,6 @@ Y_UNIT_TEST_SUITE(KqpOlap) { R"(`level` >= CAST("2" As Uint32))", R"(`level` = NULL)", R"(`level` > NULL)", - R"(LENGTH(`uid`) > 0 OR `resource_id` = "10001")", - R"((LENGTH(`uid`) > 0 AND `resource_id` = "10001") OR `resource_id` = "10002")", - R"((LENGTH(`uid`) > 0 OR `resource_id` = "10002") AND (LENGTH(`uid`) < 15 OR `resource_id` = "10001"))", - R"(NOT(LENGTH(`uid`) > 0 AND `resource_id` = "10001"))", - // Not strict function in the beginning causes to disable pushdown - R"(Unwrap(`level`/1) = `level` AND `resource_id` = "10001")", - // We can handle this case in future - R"(NOT(LENGTH(`uid`) > 0 OR `resource_id` = "10001"))", R"(`level` * 3.14 > 4)", #if SSA_RUNTIME_VERSION < 2U R"(`uid` LIKE "%30000%")", @@ -1683,6 +1675,12 @@ Y_UNIT_TEST_SUITE(KqpOlap) { R"(`uid` LIKE "uid%001")", #endif #if SSA_RUNTIME_VERSION < 4U + R"(LENGTH(`uid`) > 0 OR `resource_id` = "10001")", + R"((LENGTH(`uid`) > 0 AND `resource_id` = "10001") OR `resource_id` = "10002")", + R"((LENGTH(`uid`) > 0 OR `resource_id` = "10002") AND (LENGTH(`uid`) < 15 OR `resource_id` = "10001"))", + R"(NOT(LENGTH(`uid`) > 0 AND `resource_id` = "10001"))", + R"(Unwrap(`level`/1) = `level` AND `resource_id` = "10001")", + R"(NOT(LENGTH(`uid`) > 0 OR `resource_id` = "10001"))", R"(`level` + 2 < 5)", R"(`level` - 2 >= 1)", R"(`level` * 3 > 4)", @@ -4705,6 +4703,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { ); } */ + Y_UNIT_TEST(PredicatePushdownCastErrors) { auto settings = TKikimrSettings() .SetWithSampleTables(false); @@ -4717,6 +4716,23 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto tableClient = kikimr.GetTableClient(); +#if SSA_RUNTIME_VERSION >= 4U + const std::set<std::string> numerics = {"Int8", "Int16", "Int32", "Int64", "UInt8", "UInt16", "UInt32", "UInt64", "Float", "Double"}; + const std::map<std::string, std::set<std::string>> exceptions = { + {"Int8", numerics}, + {"Int16", numerics}, + {"Int32", numerics}, + {"Int64", numerics}, + {"UInt8", numerics}, + {"UInt16", numerics}, + {"UInt32", numerics}, + {"UInt64", numerics}, + {"Float", numerics}, + {"Double", numerics}, + {"String", {"Utf8"}}, + {"Utf8", {"String"}}, + }; +#else std::map<std::string, std::set<std::string>> exceptions = { {"Int8", {"Int16", "Int32"}}, {"Int16", {"Int8", "Int32"}}, @@ -4726,9 +4742,8 @@ Y_UNIT_TEST_SUITE(KqpOlap) { {"UInt32", {"UInt8", "UInt16"}}, {"String", {"Utf8"}}, {"Utf8", {"String", "Json", "Yson"}}, - {"Json", {"Utf8", "Yson"}}, - {"Yson", {"Utf8", "Json"}}, }; +#endif std::vector<std::string> allTypes = { //"Bool", diff --git a/ydb/core/kqp/ut/query/kqp_explain_ut.cpp b/ydb/core/kqp/ut/query/kqp_explain_ut.cpp index f469f396e4..0a101ca689 100644 --- a/ydb/core/kqp/ut/query/kqp_explain_ut.cpp +++ b/ydb/core/kqp/ut/query/kqp_explain_ut.cpp @@ -855,7 +855,7 @@ Y_UNIT_TEST_SUITE(KqpExplain) { NJson::ReadJsonTree(*streamRes.PlanJson, &plan, true); UNIT_ASSERT(ValidatePlanNodeIds(plan)); - auto readNode = FindPlanNodeByKv(plan, "Node Type", "Filter-TableFullScan"); + auto readNode = FindPlanNodeByKv(plan, "Node Type", "TableFullScan"); UNIT_ASSERT(readNode.IsDefined()); auto& operators = readNode.GetMapSafe().at("Operators").GetArraySafe(); diff --git a/ydb/library/yql/providers/common/pushdown/collection.cpp b/ydb/library/yql/providers/common/pushdown/collection.cpp index c2bff37aa1..9251530220 100644 --- a/ydb/library/yql/providers/common/pushdown/collection.cpp +++ b/ydb/library/yql/providers/common/pushdown/collection.cpp @@ -382,18 +382,21 @@ bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExpr return false; } - bool equality = compare.Maybe<TCoCmpEqual>() || compare.Maybe<TCoCmpNotEqual>(); - auto leftList = GetComparisonNodes(compare.Left()); - auto rightList = GetComparisonNodes(compare.Right()); + const auto leftList = GetComparisonNodes(compare.Left()); + const auto rightList = GetComparisonNodes(compare.Right()); YQL_ENSURE(leftList.size() == rightList.size(), "Different sizes of lists in comparison!"); for (size_t i = 0; i < leftList.size(); ++i) { if (!CheckExpressionNodeForPushdown(leftList[i], lambdaArg, settings) || !CheckExpressionNodeForPushdown(rightList[i], lambdaArg, settings)) { return false; } - if (!IsComparableTypes(leftList[i], rightList[i], equality, inputType, settings)) { - return false; + + if (!settings.IsEnabled(TSettings::EFeatureFlag::DoNotCheckCompareArgumentsTypes)) { + if (!IsComparableTypes(leftList[i], rightList[i], compare.Maybe<TCoCmpEqual>() || compare.Maybe<TCoCmpNotEqual>(), inputType, settings)) { + return false; + } } + if (IsLikeOperator(compare) && settings.IsEnabled(TSettings::EFeatureFlag::LikeOperatorOnlyForUtf8) && !IsSupportedLikeForUtf8(leftList[i], rightList[i])) { // (KQP OLAP) If SSA_RUNTIME_VERSION == 2 Column Shard doesn't have LIKE kernel for binary strings return false; diff --git a/ydb/library/yql/providers/common/pushdown/settings.h b/ydb/library/yql/providers/common/pushdown/settings.h index caa8258ed1..10c2b7822c 100644 --- a/ydb/library/yql/providers/common/pushdown/settings.h +++ b/ydb/library/yql/providers/common/pushdown/settings.h @@ -23,6 +23,7 @@ struct TSettings { DyNumberType = 1 << 13, ImplicitConversionToInt64 = 1 << 14, // Allow implicit conversions to 64-bits integers from other types of integers UnaryOperators = 1 << 15, // -, Abs, Size + DoNotCheckCompareArgumentsTypes = 1 << 16 }; explicit TSettings(NLog::EComponent logComponent) |