aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTony-Romanov <150126326+Tony-Romanov@users.noreply.github.com>2024-01-23 19:10:55 +0100
committerGitHub <noreply@github.com>2024-01-23 19:10:55 +0100
commitde0a267b1e4f45b380c51818d82e0a2d98267a71 (patch)
tree94d23529859967dd67977dc45982c4bda8242ca0
parentbc9a0eeb1457f6fd82c25eaab5b9878598509909 (diff)
downloadydb-de0a267b1e4f45b380c51818d82e0a2d98267a71.tar.gz
Check types for pushdown on OLAP level. (#1144)
* Check types for pushdown on OLAP level. * Update test.
-rw-r--r--ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp119
-rw-r--r--ydb/core/kqp/ut/olap/kqp_olap_ut.cpp47
-rw-r--r--ydb/core/kqp/ut/query/kqp_explain_ut.cpp2
-rw-r--r--ydb/library/yql/providers/common/pushdown/collection.cpp13
-rw-r--r--ydb/library/yql/providers/common/pushdown/settings.h1
5 files changed, 135 insertions, 47 deletions
diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp
index 1b5bec6be4..f65324c621 100644
--- a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp
+++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp
@@ -33,7 +33,7 @@ struct TPushdownSettings : public NPushdown::TSettings {
Enable(EFlag::LikeOperator, NSsa::RuntimeVersion >= 2U);
Enable(EFlag::LikeOperatorOnlyForUtf8, NSsa::RuntimeVersion < 3U);
Enable(EFlag::JsonQueryOperators | EFlag::JsonExistsOperator, NSsa::RuntimeVersion >= 3U);
- Enable(EFlag::ArithmeticalExpressions | EFlag::UnaryOperators, NSsa::RuntimeVersion >= 4U);
+ Enable(EFlag::ArithmeticalExpressions | EFlag::UnaryOperators | EFlag::DoNotCheckCompareArgumentsTypes, NSsa::RuntimeVersion >= 4U);
Enable(EFlag::LogicalXorOperator
| EFlag::ParameterExpression
| EFlag::CastExpression
@@ -61,7 +61,7 @@ struct TFilterOpsLevels {
}
}
- bool IsValid() {
+ bool IsValid() const {
return FirstLevelOps.IsValid() || SecondLevelOps.IsValid();
}
@@ -152,10 +152,6 @@ TMaybeNode<TExprBase> YqlCoalescePushdown(const TCoCoalesce& coalesce, TExprCont
return NullNode;
}
-bool IsGoodTypeForPushdown(const TTypeAnnotationNode& type) {
- return NUdf::EDataTypeFeatures::IntegralType & NUdf::GetDataTypeInfo(RemoveOptionality(type).Cast<TDataExprType>()->GetSlot()).Features;
-}
-
std::vector<TExprBase> ConvertComparisonNode(const TExprBase& nodeIn, TExprContext& ctx, TPositionHandle pos)
{
std::vector<TExprBase> out;
@@ -203,27 +199,25 @@ std::vector<TExprBase> ConvertComparisonNode(const TExprBase& nodeIn, TExprConte
if constexpr (NKikimr::NSsa::RuntimeVersion >= 4U) {
if (const auto maybeArithmetic = node.Maybe<TCoBinaryArithmetic>()) {
- if (const auto arithmetic = maybeArithmetic.Cast(); IsGoodTypeForPushdown(*arithmetic.Ref().GetTypeAnn()) && !arithmetic.Maybe<TCoAggrAdd>()) {
- if (const auto params = ExtractBinaryFunctionParameters(arithmetic, ctx, pos)) {
- return Build<TKqpOlapFilterBinaryOp>(ctx, pos)
- .Operator().Value(arithmetic.Ref().Content(), TNodeFlags::Default).Build()
- .Left(params->first)
- .Right(params->second)
- .Done();
- }
+ const auto arithmetic = maybeArithmetic.Cast();
+ if (const auto params = ExtractBinaryFunctionParameters(arithmetic, ctx, pos)) {
+ return Build<TKqpOlapFilterBinaryOp>(ctx, pos)
+ .Operator().Value(arithmetic.Ref().Content(), TNodeFlags::Default).Build()
+ .Left(params->first)
+ .Right(params->second)
+ .Done();
}
}
if (const auto maybeArithmetic = node.Maybe<TCoUnaryArithmetic>()) {
- if (const auto arithmetic = maybeArithmetic.Cast(); IsGoodTypeForPushdown(*arithmetic.Ref().GetTypeAnn())) {
- if (const auto params = ConvertComparisonNode(arithmetic.Arg(), ctx, pos); 1U == params.size()) {
- TString oper(arithmetic.Ref().Content());
- YQL_ENSURE(oper.to_lower());
- return Build<TKqpOlapFilterUnaryOp>(ctx, pos)
- .Operator().Value(oper, TNodeFlags::Default).Build()
- .Arg(params.front())
- .Done();
- }
+ const auto arithmetic = maybeArithmetic.Cast();
+ if (const auto params = ConvertComparisonNode(arithmetic.Arg(), ctx, pos); 1U == params.size()) {
+ TString oper(arithmetic.Ref().Content());
+ YQL_ENSURE(oper.to_lower());
+ return Build<TKqpOlapFilterUnaryOp>(ctx, pos)
+ .Operator().Value(oper, TNodeFlags::Default).Build()
+ .Arg(params.front())
+ .Done();
}
}
@@ -653,6 +647,80 @@ void SplitForPartialPushdown(const NPushdown::TPredicateNode& predicateTree, NPu
remainingPredicates.SetPredicates(remaining, ctx, pos);
}
+bool IsGoodTypeForPushdown(const TTypeAnnotationNode& type) {
+ return NUdf::EDataTypeFeatures::IntegralType & NUdf::GetDataTypeInfo(RemoveOptionality(type).Cast<TDataExprType>()->GetSlot()).Features;
+}
+
+bool IsGoodTypesForPushdownCompare(const TTypeAnnotationNode& typeOne, const TTypeAnnotationNode& typeTwo) {
+ const auto& rawOne = RemoveOptionality(typeOne);
+ const auto& rawTwo = RemoveOptionality(typeTwo);
+ if (IsSameAnnotation(rawOne, rawTwo))
+ return true;
+
+ const auto kindOne = rawOne.GetKind();
+ const auto kindTwo = rawTwo.GetKind();
+ if (ETypeAnnotationKind::Null == kindOne || ETypeAnnotationKind::Null == kindTwo)
+ return true;
+
+ if (kindTwo != kindOne)
+ return false;
+
+ switch (kindOne) {
+ case ETypeAnnotationKind::Tuple: {
+ const auto& itemsOne = rawOne.Cast<TTupleExprType>()->GetItems();
+ const auto& itemsTwo = rawTwo.Cast<TTupleExprType>()->GetItems();
+ const auto size = itemsOne.size();
+ if (size != itemsTwo.size())
+ return false;
+ for (auto i = 0U; i < size; ++i) {
+ if (!IsGoodTypesForPushdownCompare(*itemsOne[i], *itemsTwo[i])) {
+ return false;
+ }
+ }
+ return true;
+ }
+ case ETypeAnnotationKind::Data: {
+ const auto fOne = NUdf::GetDataTypeInfo(rawOne.Cast<TDataExprType>()->GetSlot()).Features;
+ const auto fTwo = NUdf::GetDataTypeInfo(rawTwo.Cast<TDataExprType>()->GetSlot()).Features;
+ return ((NUdf::EDataTypeFeatures::NumericType | NUdf::EDataTypeFeatures::StringType) & fOne) && (NUdf::EDataTypeFeatures::CanCompare & fOne)
+ && ((NUdf::EDataTypeFeatures::NumericType | NUdf::EDataTypeFeatures::StringType) & fTwo) && (NUdf::EDataTypeFeatures::CanCompare & fTwo);
+ }
+ default: break;
+ }
+ return false;
+}
+
+bool IsGoodNodeForPushdown(const TExprBase& node) {
+ if (const auto maybeCompare = node.Maybe<TCoCompare>()) {
+ const auto compare = maybeCompare.Cast();
+ return IsGoodTypesForPushdownCompare(*compare.Left().Ref().GetTypeAnn(), *compare.Right().Ref().GetTypeAnn())
+ && IsGoodNodeForPushdown(compare.Left()) && IsGoodNodeForPushdown(compare.Right());
+ } else if (const auto maybeUnaryOp = node.Maybe<TCoUnaryArithmetic>()) {
+ return IsGoodTypeForPushdown(*node.Ref().GetTypeAnn()) && IsGoodNodeForPushdown(maybeUnaryOp.Cast().Arg());
+ } else if (const auto maybeBinaryOp = node.Maybe<TCoBinaryArithmetic>()) {
+ const auto binaryOp = maybeBinaryOp.Cast();
+ return IsGoodTypeForPushdown(*binaryOp.Ref().GetTypeAnn()) && !binaryOp.Maybe<TCoAggrAdd>()
+ && IsGoodNodeForPushdown(binaryOp.Left()) && IsGoodNodeForPushdown(binaryOp.Right());
+ } else if (const auto maybeCoalesce = node.Maybe<TCoCoalesce>()) {
+ const auto coalesce = maybeCoalesce.Cast();
+ return IsGoodNodeForPushdown(coalesce.Predicate()) && IsGoodNodeForPushdown(coalesce.Value());
+ }
+
+ return true;
+}
+
+void UpdatePushableFlagWithOlapSpecific(NPushdown::TPredicateNode& tree) {
+ if constexpr (NSsa::RuntimeVersion < 4U)
+ return;
+
+ std::for_each(tree.Children.begin(), tree.Children.end(), std::bind(&UpdatePushableFlagWithOlapSpecific, std::placeholders::_1));
+ tree.CanBePushed = tree.CanBePushed && std::all_of(tree.Children.cbegin(), tree.Children.cend(), [](const NPushdown::TPredicateNode& node) { return node.CanBePushed; });
+
+ if (tree.CanBePushed && NPushdown::EBoolOp::Undefined == tree.Op) {
+ tree.CanBePushed = IsGoodNodeForPushdown(tree.ExprNode.Cast());
+ }
+}
+
} // anonymous namespace end
TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx,
@@ -685,10 +753,11 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz
return node;
}
- auto optionalIf = maybeOptionalIf.Cast();
+ const auto optionalIf = maybeOptionalIf.Cast();
NPushdown::TPredicateNode predicateTree(optionalIf.Predicate());
CollectPredicates(optionalIf.Predicate(), predicateTree, lambdaArg, read.Process().Body(), TPushdownSettings());
YQL_ENSURE(predicateTree.IsValid(), "Collected OLAP predicates are invalid");
+ UpdatePushableFlagWithOlapSpecific(predicateTree);
NPushdown::TPredicateNode predicatesToPush;
NPushdown::TPredicateNode remainingPredicates;
@@ -700,7 +769,7 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz
YQL_ENSURE(predicatesToPush.IsValid(), "Predicates to push is invalid");
YQL_ENSURE(remainingPredicates.IsValid(), "Remaining predicates is invalid");
- auto pushedFilters = PredicatePushdown(predicatesToPush.ExprNode.Cast(), ctx, node.Pos());
+ const auto pushedFilters = PredicatePushdown(predicatesToPush.ExprNode.Cast(), ctx, node.Pos());
YQL_ENSURE(pushedFilters.IsValid(), "Pushed predicate should be always valid!");
TMaybeNode<TExprBase> olapFilter;
diff --git a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp
index 66781c6dfa..adff2eb29e 100644
--- a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp
+++ b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp
@@ -1595,12 +1595,12 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
R"(`level` % 3 != 1)",
R"(-`level` < -2)",
R"(Abs(`level` - 3) >= 1)",
- R"(LENGTH(`message`) > 1037U)",
- R"(LENGTH(`uid`) > 1U OR `resource_id` = "10001")",
- R"((LENGTH(`uid`) > 2U AND `resource_id` = "10001") OR `resource_id` = "10002")",
- R"((LENGTH(`uid`) > 3U OR `resource_id` = "10002") AND (LENGTH(`uid`) < 15 OR `resource_id` = "10001"))",
- R"(NOT(LENGTH(`uid`) > 0U AND `resource_id` = "10001"))",
- R"(NOT(LENGTH(`uid`) > 0U OR `resource_id` = "10001"))",
+ R"(LENGTH(`message`) > 1037)",
+ R"(LENGTH(`uid`) > 1 OR `resource_id` = "10001")",
+ R"((LENGTH(`uid`) > 2 AND `resource_id` = "10001") OR `resource_id` = "10002")",
+ R"((LENGTH(`uid`) > 3 OR `resource_id` = "10002") AND (LENGTH(`uid`) < 15 OR `resource_id` = "10001"))",
+ R"(NOT(LENGTH(`uid`) > 0 AND `resource_id` = "10001"))",
+ R"(NOT(LENGTH(`uid`) > 0 OR `resource_id` = "10001"))",
R"(`level` IS NULL OR `message` IS NULL)",
R"(`level` IS NOT NULL AND `message` IS NULL)",
R"(`level` IS NULL AND `message` IS NOT NULL)",
@@ -1667,14 +1667,6 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
R"(`level` >= CAST("2" As Uint32))",
R"(`level` = NULL)",
R"(`level` > NULL)",
- R"(LENGTH(`uid`) > 0 OR `resource_id` = "10001")",
- R"((LENGTH(`uid`) > 0 AND `resource_id` = "10001") OR `resource_id` = "10002")",
- R"((LENGTH(`uid`) > 0 OR `resource_id` = "10002") AND (LENGTH(`uid`) < 15 OR `resource_id` = "10001"))",
- R"(NOT(LENGTH(`uid`) > 0 AND `resource_id` = "10001"))",
- // Not strict function in the beginning causes to disable pushdown
- R"(Unwrap(`level`/1) = `level` AND `resource_id` = "10001")",
- // We can handle this case in future
- R"(NOT(LENGTH(`uid`) > 0 OR `resource_id` = "10001"))",
R"(`level` * 3.14 > 4)",
#if SSA_RUNTIME_VERSION < 2U
R"(`uid` LIKE "%30000%")",
@@ -1683,6 +1675,12 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
R"(`uid` LIKE "uid%001")",
#endif
#if SSA_RUNTIME_VERSION < 4U
+ R"(LENGTH(`uid`) > 0 OR `resource_id` = "10001")",
+ R"((LENGTH(`uid`) > 0 AND `resource_id` = "10001") OR `resource_id` = "10002")",
+ R"((LENGTH(`uid`) > 0 OR `resource_id` = "10002") AND (LENGTH(`uid`) < 15 OR `resource_id` = "10001"))",
+ R"(NOT(LENGTH(`uid`) > 0 AND `resource_id` = "10001"))",
+ R"(Unwrap(`level`/1) = `level` AND `resource_id` = "10001")",
+ R"(NOT(LENGTH(`uid`) > 0 OR `resource_id` = "10001"))",
R"(`level` + 2 < 5)",
R"(`level` - 2 >= 1)",
R"(`level` * 3 > 4)",
@@ -4705,6 +4703,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
);
}
*/
+
Y_UNIT_TEST(PredicatePushdownCastErrors) {
auto settings = TKikimrSettings()
.SetWithSampleTables(false);
@@ -4717,6 +4716,23 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
auto tableClient = kikimr.GetTableClient();
+#if SSA_RUNTIME_VERSION >= 4U
+ const std::set<std::string> numerics = {"Int8", "Int16", "Int32", "Int64", "UInt8", "UInt16", "UInt32", "UInt64", "Float", "Double"};
+ const std::map<std::string, std::set<std::string>> exceptions = {
+ {"Int8", numerics},
+ {"Int16", numerics},
+ {"Int32", numerics},
+ {"Int64", numerics},
+ {"UInt8", numerics},
+ {"UInt16", numerics},
+ {"UInt32", numerics},
+ {"UInt64", numerics},
+ {"Float", numerics},
+ {"Double", numerics},
+ {"String", {"Utf8"}},
+ {"Utf8", {"String"}},
+ };
+#else
std::map<std::string, std::set<std::string>> exceptions = {
{"Int8", {"Int16", "Int32"}},
{"Int16", {"Int8", "Int32"}},
@@ -4726,9 +4742,8 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
{"UInt32", {"UInt8", "UInt16"}},
{"String", {"Utf8"}},
{"Utf8", {"String", "Json", "Yson"}},
- {"Json", {"Utf8", "Yson"}},
- {"Yson", {"Utf8", "Json"}},
};
+#endif
std::vector<std::string> allTypes = {
//"Bool",
diff --git a/ydb/core/kqp/ut/query/kqp_explain_ut.cpp b/ydb/core/kqp/ut/query/kqp_explain_ut.cpp
index f469f396e4..0a101ca689 100644
--- a/ydb/core/kqp/ut/query/kqp_explain_ut.cpp
+++ b/ydb/core/kqp/ut/query/kqp_explain_ut.cpp
@@ -855,7 +855,7 @@ Y_UNIT_TEST_SUITE(KqpExplain) {
NJson::ReadJsonTree(*streamRes.PlanJson, &plan, true);
UNIT_ASSERT(ValidatePlanNodeIds(plan));
- auto readNode = FindPlanNodeByKv(plan, "Node Type", "Filter-TableFullScan");
+ auto readNode = FindPlanNodeByKv(plan, "Node Type", "TableFullScan");
UNIT_ASSERT(readNode.IsDefined());
auto& operators = readNode.GetMapSafe().at("Operators").GetArraySafe();
diff --git a/ydb/library/yql/providers/common/pushdown/collection.cpp b/ydb/library/yql/providers/common/pushdown/collection.cpp
index c2bff37aa1..9251530220 100644
--- a/ydb/library/yql/providers/common/pushdown/collection.cpp
+++ b/ydb/library/yql/providers/common/pushdown/collection.cpp
@@ -382,18 +382,21 @@ bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExpr
return false;
}
- bool equality = compare.Maybe<TCoCmpEqual>() || compare.Maybe<TCoCmpNotEqual>();
- auto leftList = GetComparisonNodes(compare.Left());
- auto rightList = GetComparisonNodes(compare.Right());
+ const auto leftList = GetComparisonNodes(compare.Left());
+ const auto rightList = GetComparisonNodes(compare.Right());
YQL_ENSURE(leftList.size() == rightList.size(), "Different sizes of lists in comparison!");
for (size_t i = 0; i < leftList.size(); ++i) {
if (!CheckExpressionNodeForPushdown(leftList[i], lambdaArg, settings) || !CheckExpressionNodeForPushdown(rightList[i], lambdaArg, settings)) {
return false;
}
- if (!IsComparableTypes(leftList[i], rightList[i], equality, inputType, settings)) {
- return false;
+
+ if (!settings.IsEnabled(TSettings::EFeatureFlag::DoNotCheckCompareArgumentsTypes)) {
+ if (!IsComparableTypes(leftList[i], rightList[i], compare.Maybe<TCoCmpEqual>() || compare.Maybe<TCoCmpNotEqual>(), inputType, settings)) {
+ return false;
+ }
}
+
if (IsLikeOperator(compare) && settings.IsEnabled(TSettings::EFeatureFlag::LikeOperatorOnlyForUtf8) && !IsSupportedLikeForUtf8(leftList[i], rightList[i])) {
// (KQP OLAP) If SSA_RUNTIME_VERSION == 2 Column Shard doesn't have LIKE kernel for binary strings
return false;
diff --git a/ydb/library/yql/providers/common/pushdown/settings.h b/ydb/library/yql/providers/common/pushdown/settings.h
index caa8258ed1..10c2b7822c 100644
--- a/ydb/library/yql/providers/common/pushdown/settings.h
+++ b/ydb/library/yql/providers/common/pushdown/settings.h
@@ -23,6 +23,7 @@ struct TSettings {
DyNumberType = 1 << 13,
ImplicitConversionToInt64 = 1 << 14, // Allow implicit conversions to 64-bits integers from other types of integers
UnaryOperators = 1 << 15, // -, Abs, Size
+ DoNotCheckCompareArgumentsTypes = 1 << 16
};
explicit TSettings(NLog::EComponent logComponent)