diff options
author | Dmitry Kardymon <kardymon-d@ydb.tech> | 2024-11-08 18:03:32 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-08 15:03:32 +0000 |
commit | 9fb36d174ef8c97322150f9f01a5e2e182af358c (patch) | |
tree | 80b5528e833bd1025fc8b26797e730decd9a03ee | |
parent | 65dc86e989300e998b03fb414beccebed687d785 (diff) | |
download | ydb-9fb36d174ef8c97322150f9f01a5e2e182af358c.tar.gz |
YQ-3824 Shared reading: fix `is not distinct from` with optional field (#11378)
5 files changed, 50 insertions, 12 deletions
diff --git a/ydb/library/yql/providers/common/pushdown/collection.cpp b/ydb/library/yql/providers/common/pushdown/collection.cpp index c3536ba31d0..d18b8ae2ede 100644 --- a/ydb/library/yql/providers/common/pushdown/collection.cpp +++ b/ydb/library/yql/providers/common/pushdown/collection.cpp @@ -456,6 +456,23 @@ bool SqlInCanBePushed(const TCoSqlIn& sqlIn, const TExprNode* lambdaArg, const T return true; } +bool IsDistinctCanBePushed(const TExprBase& predicate, const TExprNode* lambdaArg, const TExprBase& lambdaBody, const TSettings& settings) { + if (predicate.Ref().ChildrenSize() != 2 ) { + return false; + } + auto expr1 = TExprBase(predicate.Ref().Child(0)); + auto expr2 = TExprBase(predicate.Ref().Child(1)); + if (!CheckExpressionNodeForPushdown(expr1, lambdaArg, settings) + || !CheckExpressionNodeForPushdown(expr2, lambdaArg, settings)) { + return false; + } + if (!settings.IsEnabled(TSettings::EFeatureFlag::DoNotCheckCompareArgumentsTypes) + && !IsComparableTypes(expr1, expr2, false, lambdaBody.Ptr()->GetTypeAnn(), settings)) { + return false; + } + return true; +} + bool SafeCastCanBePushed(const TCoFlatMap& flatmap, const TExprNode* lambdaArg, const TSettings& settings) { /* * There are three ways of comparison in following format: @@ -595,6 +612,9 @@ void CollectPredicates(const TExprBase& predicate, TPredicateNode& predicateTree } else if (settings.IsEnabled(TSettings::EFeatureFlag::InOperator) && predicate.Maybe<TCoSqlIn>()) { auto sqlIn = predicate.Cast<TCoSqlIn>(); predicateTree.CanBePushed = SqlInCanBePushed(sqlIn, lambdaArg, lambdaBody, settings); + } else if (settings.IsEnabled(TSettings::EFeatureFlag::IsDistinctOperator) && + (predicate.Ref().IsCallable({"IsNotDistinctFrom", "IsDistinctFrom"}))) { + predicateTree.CanBePushed = IsDistinctCanBePushed(predicate, lambdaArg, lambdaBody, settings); } else { predicateTree.CanBePushed = false; } diff --git a/ydb/library/yql/providers/common/pushdown/settings.h b/ydb/library/yql/providers/common/pushdown/settings.h index 1ee6abf8765..df0164b9d5f 100644 --- a/ydb/library/yql/providers/common/pushdown/settings.h +++ b/ydb/library/yql/providers/common/pushdown/settings.h @@ -26,7 +26,8 @@ struct TSettings { DoNotCheckCompareArgumentsTypes = 1 << 16, TimestampCtor = 1 << 17, JustPassthroughOperators = 1 << 18, // if + coalesce + just - InOperator = 1 << 19 // IN() + InOperator = 1 << 19, // IN() + IsDistinctOperator = 1 << 20 // IS NOT DISTINCT FROM / IS DISTINCT FROM }; explicit TSettings(NLog::EComponent logComponent) diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.cpp index e08e09c9a54..7d86b754b73 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.cpp +++ b/ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.cpp @@ -193,6 +193,17 @@ namespace NYql { return true; } + bool SerializeIsNotDistinctFrom(const TExprBase& predicate, TPredicate* predicateProto, const TCoArgument& arg, TStringBuilder& err, bool invert) { + if (predicate.Ref().ChildrenSize() != 2) { + err << "unknown predicate, expected 2, children size " << predicate.Ref().ChildrenSize(); + return false; + } + TPredicate::TComparison* proto = predicateProto->mutable_comparison(); + proto->set_operation(!invert ? TPredicate::TComparison::IND : TPredicate::TComparison::ID); + return SerializeExpression(TExprBase(predicate.Ref().Child(0)), proto->mutable_left_value(), arg, err) + && SerializeExpression(TExprBase(predicate.Ref().Child(1)), proto->mutable_right_value(), arg, err); + } + bool SerializeAnd(const TCoAnd& andExpr, TPredicate* proto, const TCoArgument& arg, TStringBuilder& err, ui64 depth) { auto* dstProto = proto->mutable_conjunction(); for (const auto& child : andExpr.Ptr()->Children()) { @@ -251,6 +262,12 @@ namespace NYql { if (auto sqlIn = predicate.Maybe<TCoSqlIn>()) { return SerializeSqlIn(sqlIn.Cast(), proto, arg, err); } + if (predicate.Ref().IsCallable("IsNotDistinctFrom")) { + return SerializeIsNotDistinctFrom(predicate, proto, arg, err, false); + } + if (predicate.Ref().IsCallable("IsDistinctFrom")) { + return SerializeIsNotDistinctFrom(predicate, proto, arg, err, true); + } if (auto sqlIf = predicate.Maybe<TCoIf>()) { return SerializeSqlIf(sqlIf.Cast(), proto, arg, err, depth); } diff --git a/ydb/library/yql/providers/pq/provider/yql_pq_logical_opt.cpp b/ydb/library/yql/providers/pq/provider/yql_pq_logical_opt.cpp index 7b3b22d50db..da8fa9923fd 100644 --- a/ydb/library/yql/providers/pq/provider/yql_pq_logical_opt.cpp +++ b/ydb/library/yql/providers/pq/provider/yql_pq_logical_opt.cpp @@ -30,7 +30,7 @@ namespace { : NPushdown::TSettings(NLog::EComponent::ProviderGeneric) { using EFlag = NPushdown::TSettings::EFeatureFlag; - Enable(EFlag::ExpressionAsPredicate | EFlag::ArithmeticalExpressions | EFlag::ImplicitConversionToInt64 | EFlag::StringTypes | EFlag::LikeOperator | EFlag::DoNotCheckCompareArgumentsTypes | EFlag::InOperator | EFlag::JustPassthroughOperators); + Enable(EFlag::ExpressionAsPredicate | EFlag::ArithmeticalExpressions | EFlag::ImplicitConversionToInt64 | EFlag::StringTypes | EFlag::LikeOperator | EFlag::DoNotCheckCompareArgumentsTypes | EFlag::InOperator | EFlag::IsDistinctOperator | EFlag::JustPassthroughOperators); } }; diff --git a/ydb/tests/fq/yds/test_row_dispatcher.py b/ydb/tests/fq/yds/test_row_dispatcher.py index 36e58b57bfe..3e59b894d8f 100644 --- a/ydb/tests/fq/yds/test_row_dispatcher.py +++ b/ydb/tests/fq/yds/test_row_dispatcher.py @@ -349,20 +349,20 @@ class TestPqRowDispatcher(TestYdsBase): self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `data` = \\"hello2\\"') filter = 'flag' self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `flag`') - # filter = ' event IS NOT DISTINCT FROM "event2"' - # self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `event` IS NOT DISTINCT FROM \\"event2\\"') - # filter = ' event IS DISTINCT FROM "event1"' - # self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `event` IS DISTINCT FROM \\"event1\\"') - # filter = ' field1 IS DISTINCT FROM field2' - # self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `field1` IS DISTINCT FROM `field2`') + filter = ' event IS NOT DISTINCT FROM "event2"' + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `event` IS NOT DISTINCT FROM \\"event2\\"') + filter = ' event IS DISTINCT FROM "event1"' + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `event` IS DISTINCT FROM \\"event1\\"') + filter = ' field1 IS DISTINCT FROM field2' + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `field1` IS DISTINCT FROM `field2`') filter = 'event IN ("event2")' self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `event` IN (\\"event2\\")') filter = 'event IN ("1", "2", "3", "4", "5", "6", "7", "event2")' self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `event` IN (\\"1\\"') - # filter = ' event IS DISTINCT FROM data AND event IN ("1", "2", "3", "4", "5", "6", "7", "event2")' - # self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (`event` IS DISTINCT FROM `data` AND `event` IN (\\"1\\"') - # filter = ' IF(event == "event2", event IS DISTINCT FROM data, FALSE)' - # self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE IF(`event` == "event2", `event` IS DISTINCT FROM `data`, FALSE)') + filter = ' event IS DISTINCT FROM data AND event IN ("1", "2", "3", "4", "5", "6", "7", "event2")' + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (`event` IS DISTINCT FROM `data` AND COALESCE(`event` IN (\\"1\\"') + filter = ' IF(event == "event2", event IS DISTINCT FROM data, FALSE)' + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE IF(COALESCE(`event` = \\"event2\\", FALSE), `event` IS DISTINCT FROM `data`, FALSE)') filter = ' COALESCE(event = "event2", TRUE)' self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE COALESCE(`event` = \\"event2\\", TRUE)') filter = ' COALESCE(event = "event2", data = "hello2", TRUE)' |