aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDmitry Kardymon <kardymon-d@ydb.tech>2024-11-08 18:03:32 +0300
committerGitHub <noreply@github.com>2024-11-08 15:03:32 +0000
commit9fb36d174ef8c97322150f9f01a5e2e182af358c (patch)
tree80b5528e833bd1025fc8b26797e730decd9a03ee
parent65dc86e989300e998b03fb414beccebed687d785 (diff)
downloadydb-9fb36d174ef8c97322150f9f01a5e2e182af358c.tar.gz
YQ-3824 Shared reading: fix `is not distinct from` with optional field (#11378)
-rw-r--r--ydb/library/yql/providers/common/pushdown/collection.cpp20
-rw-r--r--ydb/library/yql/providers/common/pushdown/settings.h3
-rw-r--r--ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.cpp17
-rw-r--r--ydb/library/yql/providers/pq/provider/yql_pq_logical_opt.cpp2
-rw-r--r--ydb/tests/fq/yds/test_row_dispatcher.py20
5 files changed, 50 insertions, 12 deletions
diff --git a/ydb/library/yql/providers/common/pushdown/collection.cpp b/ydb/library/yql/providers/common/pushdown/collection.cpp
index c3536ba31d0..d18b8ae2ede 100644
--- a/ydb/library/yql/providers/common/pushdown/collection.cpp
+++ b/ydb/library/yql/providers/common/pushdown/collection.cpp
@@ -456,6 +456,23 @@ bool SqlInCanBePushed(const TCoSqlIn& sqlIn, const TExprNode* lambdaArg, const T
return true;
}
+bool IsDistinctCanBePushed(const TExprBase& predicate, const TExprNode* lambdaArg, const TExprBase& lambdaBody, const TSettings& settings) {
+ if (predicate.Ref().ChildrenSize() != 2 ) {
+ return false;
+ }
+ auto expr1 = TExprBase(predicate.Ref().Child(0));
+ auto expr2 = TExprBase(predicate.Ref().Child(1));
+ if (!CheckExpressionNodeForPushdown(expr1, lambdaArg, settings)
+ || !CheckExpressionNodeForPushdown(expr2, lambdaArg, settings)) {
+ return false;
+ }
+ if (!settings.IsEnabled(TSettings::EFeatureFlag::DoNotCheckCompareArgumentsTypes)
+ && !IsComparableTypes(expr1, expr2, false, lambdaBody.Ptr()->GetTypeAnn(), settings)) {
+ return false;
+ }
+ return true;
+}
+
bool SafeCastCanBePushed(const TCoFlatMap& flatmap, const TExprNode* lambdaArg, const TSettings& settings) {
/*
* There are three ways of comparison in following format:
@@ -595,6 +612,9 @@ void CollectPredicates(const TExprBase& predicate, TPredicateNode& predicateTree
} else if (settings.IsEnabled(TSettings::EFeatureFlag::InOperator) && predicate.Maybe<TCoSqlIn>()) {
auto sqlIn = predicate.Cast<TCoSqlIn>();
predicateTree.CanBePushed = SqlInCanBePushed(sqlIn, lambdaArg, lambdaBody, settings);
+ } else if (settings.IsEnabled(TSettings::EFeatureFlag::IsDistinctOperator) &&
+ (predicate.Ref().IsCallable({"IsNotDistinctFrom", "IsDistinctFrom"}))) {
+ predicateTree.CanBePushed = IsDistinctCanBePushed(predicate, lambdaArg, lambdaBody, settings);
} else {
predicateTree.CanBePushed = false;
}
diff --git a/ydb/library/yql/providers/common/pushdown/settings.h b/ydb/library/yql/providers/common/pushdown/settings.h
index 1ee6abf8765..df0164b9d5f 100644
--- a/ydb/library/yql/providers/common/pushdown/settings.h
+++ b/ydb/library/yql/providers/common/pushdown/settings.h
@@ -26,7 +26,8 @@ struct TSettings {
DoNotCheckCompareArgumentsTypes = 1 << 16,
TimestampCtor = 1 << 17,
JustPassthroughOperators = 1 << 18, // if + coalesce + just
- InOperator = 1 << 19 // IN()
+ InOperator = 1 << 19, // IN()
+ IsDistinctOperator = 1 << 20 // IS NOT DISTINCT FROM / IS DISTINCT FROM
};
explicit TSettings(NLog::EComponent logComponent)
diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.cpp
index e08e09c9a54..7d86b754b73 100644
--- a/ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.cpp
+++ b/ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.cpp
@@ -193,6 +193,17 @@ namespace NYql {
return true;
}
+ bool SerializeIsNotDistinctFrom(const TExprBase& predicate, TPredicate* predicateProto, const TCoArgument& arg, TStringBuilder& err, bool invert) {
+ if (predicate.Ref().ChildrenSize() != 2) {
+ err << "unknown predicate, expected 2, children size " << predicate.Ref().ChildrenSize();
+ return false;
+ }
+ TPredicate::TComparison* proto = predicateProto->mutable_comparison();
+ proto->set_operation(!invert ? TPredicate::TComparison::IND : TPredicate::TComparison::ID);
+ return SerializeExpression(TExprBase(predicate.Ref().Child(0)), proto->mutable_left_value(), arg, err)
+ && SerializeExpression(TExprBase(predicate.Ref().Child(1)), proto->mutable_right_value(), arg, err);
+ }
+
bool SerializeAnd(const TCoAnd& andExpr, TPredicate* proto, const TCoArgument& arg, TStringBuilder& err, ui64 depth) {
auto* dstProto = proto->mutable_conjunction();
for (const auto& child : andExpr.Ptr()->Children()) {
@@ -251,6 +262,12 @@ namespace NYql {
if (auto sqlIn = predicate.Maybe<TCoSqlIn>()) {
return SerializeSqlIn(sqlIn.Cast(), proto, arg, err);
}
+ if (predicate.Ref().IsCallable("IsNotDistinctFrom")) {
+ return SerializeIsNotDistinctFrom(predicate, proto, arg, err, false);
+ }
+ if (predicate.Ref().IsCallable("IsDistinctFrom")) {
+ return SerializeIsNotDistinctFrom(predicate, proto, arg, err, true);
+ }
if (auto sqlIf = predicate.Maybe<TCoIf>()) {
return SerializeSqlIf(sqlIf.Cast(), proto, arg, err, depth);
}
diff --git a/ydb/library/yql/providers/pq/provider/yql_pq_logical_opt.cpp b/ydb/library/yql/providers/pq/provider/yql_pq_logical_opt.cpp
index 7b3b22d50db..da8fa9923fd 100644
--- a/ydb/library/yql/providers/pq/provider/yql_pq_logical_opt.cpp
+++ b/ydb/library/yql/providers/pq/provider/yql_pq_logical_opt.cpp
@@ -30,7 +30,7 @@ namespace {
: NPushdown::TSettings(NLog::EComponent::ProviderGeneric)
{
using EFlag = NPushdown::TSettings::EFeatureFlag;
- Enable(EFlag::ExpressionAsPredicate | EFlag::ArithmeticalExpressions | EFlag::ImplicitConversionToInt64 | EFlag::StringTypes | EFlag::LikeOperator | EFlag::DoNotCheckCompareArgumentsTypes | EFlag::InOperator | EFlag::JustPassthroughOperators);
+ Enable(EFlag::ExpressionAsPredicate | EFlag::ArithmeticalExpressions | EFlag::ImplicitConversionToInt64 | EFlag::StringTypes | EFlag::LikeOperator | EFlag::DoNotCheckCompareArgumentsTypes | EFlag::InOperator | EFlag::IsDistinctOperator | EFlag::JustPassthroughOperators);
}
};
diff --git a/ydb/tests/fq/yds/test_row_dispatcher.py b/ydb/tests/fq/yds/test_row_dispatcher.py
index 36e58b57bfe..3e59b894d8f 100644
--- a/ydb/tests/fq/yds/test_row_dispatcher.py
+++ b/ydb/tests/fq/yds/test_row_dispatcher.py
@@ -349,20 +349,20 @@ class TestPqRowDispatcher(TestYdsBase):
self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `data` = \\"hello2\\"')
filter = 'flag'
self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `flag`')
- # filter = ' event IS NOT DISTINCT FROM "event2"'
- # self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `event` IS NOT DISTINCT FROM \\"event2\\"')
- # filter = ' event IS DISTINCT FROM "event1"'
- # self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `event` IS DISTINCT FROM \\"event1\\"')
- # filter = ' field1 IS DISTINCT FROM field2'
- # self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `field1` IS DISTINCT FROM `field2`')
+ filter = ' event IS NOT DISTINCT FROM "event2"'
+ self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `event` IS NOT DISTINCT FROM \\"event2\\"')
+ filter = ' event IS DISTINCT FROM "event1"'
+ self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `event` IS DISTINCT FROM \\"event1\\"')
+ filter = ' field1 IS DISTINCT FROM field2'
+ self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `field1` IS DISTINCT FROM `field2`')
filter = 'event IN ("event2")'
self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `event` IN (\\"event2\\")')
filter = 'event IN ("1", "2", "3", "4", "5", "6", "7", "event2")'
self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `event` IN (\\"1\\"')
- # filter = ' event IS DISTINCT FROM data AND event IN ("1", "2", "3", "4", "5", "6", "7", "event2")'
- # self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (`event` IS DISTINCT FROM `data` AND `event` IN (\\"1\\"')
- # filter = ' IF(event == "event2", event IS DISTINCT FROM data, FALSE)'
- # self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE IF(`event` == "event2", `event` IS DISTINCT FROM `data`, FALSE)')
+ filter = ' event IS DISTINCT FROM data AND event IN ("1", "2", "3", "4", "5", "6", "7", "event2")'
+ self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (`event` IS DISTINCT FROM `data` AND COALESCE(`event` IN (\\"1\\"')
+ filter = ' IF(event == "event2", event IS DISTINCT FROM data, FALSE)'
+ self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE IF(COALESCE(`event` = \\"event2\\", FALSE), `event` IS DISTINCT FROM `data`, FALSE)')
filter = ' COALESCE(event = "event2", TRUE)'
self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE COALESCE(`event` = \\"event2\\", TRUE)')
filter = ' COALESCE(event = "event2", data = "hello2", TRUE)'