diff options
author | pavelvelikhov <pavelvelikhov@ydb.tech> | 2023-12-11 15:39:20 +0300 |
---|---|---|
committer | pavelvelikhov <pavelvelikhov@ydb.tech> | 2023-12-11 17:13:40 +0300 |
commit | 73482b1b1fe9fb375b188bccb8c03d43035b9117 (patch) | |
tree | 45b3fa475082b8110ac93c8834aa2cdfc1b8f2f7 | |
parent | 892cdd564988d4e52307bdec58a5b7be336b91cd (diff) | |
download | ydb-73482b1b1fe9fb375b188bccb8c03d43035b9117.tar.gz |
Added support for optional for predicate selectivity
Added support for optional for predicate selectivity
-rw-r--r-- | ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp | 79 | ||||
-rw-r--r-- | ydb/core/kqp/ut/common/CMakeLists.darwin-arm64.txt | 1 | ||||
-rw-r--r-- | ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt | 1 | ||||
-rw-r--r-- | ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/core/kqp/ut/common/datetime2_udf.cpp | 9 | ||||
-rw-r--r-- | ydb/core/kqp/ut/common/ya.make | 1 | ||||
-rw-r--r-- | ydb/core/kqp/ut/join/kqp_join_order_ut.cpp | 31 | ||||
-rw-r--r-- | ydb/library/yql/dq/opt/dq_opt_predicate_selectivity.cpp | 40 | ||||
-rw-r--r-- | ydb/library/yql/dq/opt/dq_opt_stat.cpp | 79 | ||||
-rw-r--r-- | ydb/library/yql/dq/opt/dq_opt_stat.h | 2 |
12 files changed, 132 insertions, 114 deletions
diff --git a/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp b/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp index 73de91941e..4736895c9d 100644 --- a/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp +++ b/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp @@ -1,5 +1,6 @@ #include "kqp_constant_folding_transformer.h" +#include <ydb/library/yql/dq/opt/dq_opt_stat.h> #include <ydb/library/yql/utils/log/log.h> #include <ydb/library/yql/core/yql_expr_type_annotation.h> @@ -9,84 +10,6 @@ using namespace NKikimr::NKqp; using namespace NYql::NDq; namespace { - - /*** - * We maintain a white list of callables that we consider part of constant expressions - * All other callables will not be evaluated - */ - THashSet<TString> constantFoldingWhiteList = { - "Concat", "Just", "Optional","SafeCast", - "+", "-", "*", "/", "%"}; - - bool NeedCalc(NNodes::TExprBase node) { - auto type = node.Ref().GetTypeAnn(); - if (type->IsSingleton()) { - return false; - } - - if (type->GetKind() == ETypeAnnotationKind::Optional) { - if (node.Maybe<TCoNothing>()) { - return false; - } - if (auto maybeJust = node.Maybe<TCoJust>()) { - return NeedCalc(maybeJust.Cast().Input()); - } - return true; - } - - if (type->GetKind() == ETypeAnnotationKind::Tuple) { - if (auto maybeTuple = node.Maybe<TExprList>()) { - return AnyOf(maybeTuple.Cast(), [](const auto& item) { return NeedCalc(item); }); - } - return true; - } - - if (type->GetKind() == ETypeAnnotationKind::List) { - if (node.Maybe<TCoList>()) { - YQL_ENSURE(node.Ref().ChildrenSize() == 1, "Should be rewritten to AsList"); - return false; - } - if (auto maybeAsList = node.Maybe<TCoAsList>()) { - return AnyOf(maybeAsList.Cast().Args(), [](const auto& item) { return NeedCalc(NNodes::TExprBase(item)); }); - } - return true; - } - - YQL_ENSURE(type->GetKind() == ETypeAnnotationKind::Data, - "Object of type " << *type << " should not be considered for calculation"); - - return !node.Maybe<TCoDataCtor>(); - } - - /*** - * Check if the expression is a constant expression - * Its type annotation need to specify that its a data type, and then we check: - * - If its a literal, its a constant expression - * - If its a callable in the while list and all children are constant expressions, then its a constant expression - * - If one of the child is a type expression, it also passes the check - */ - bool IsConstantExpr(const TExprNode::TPtr& input) { - if (!IsDataOrOptionalOfData(input->GetTypeAnn())) { - return false; - } - - if (!NeedCalc(TExprBase(input))) { - return true; - } - - else if (input->IsCallable(constantFoldingWhiteList)) { - for (size_t i = 0; i < input->ChildrenSize(); i++) { - auto callableInput = input->Child(i); - if (callableInput->GetTypeAnn()->GetKind() != ETypeAnnotationKind::Type && !IsConstantExpr(callableInput)) { - return false; - } - } - return true; - } - - return false; - } - /** * Traverse a lambda and create a mapping from nodes to nodes wrapped in EvaluateExpr callable * We check for literals specifically, since they shouldn't be evaluated diff --git a/ydb/core/kqp/ut/common/CMakeLists.darwin-arm64.txt b/ydb/core/kqp/ut/common/CMakeLists.darwin-arm64.txt index c10448c806..c2759015fa 100644 --- a/ydb/core/kqp/ut/common/CMakeLists.darwin-arm64.txt +++ b/ydb/core/kqp/ut/common/CMakeLists.darwin-arm64.txt @@ -33,4 +33,5 @@ target_sources(kqp-ut-common PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/string_udf.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/columnshard.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/datetime2_udf.cpp ) diff --git a/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt b/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt index c10448c806..c2759015fa 100644 --- a/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt +++ b/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt @@ -33,4 +33,5 @@ target_sources(kqp-ut-common PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/string_udf.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/columnshard.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/datetime2_udf.cpp ) diff --git a/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt b/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt index dcc6b6dfc5..6321770afa 100644 --- a/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt +++ b/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt @@ -34,4 +34,5 @@ target_sources(kqp-ut-common PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/string_udf.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/columnshard.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/datetime2_udf.cpp ) diff --git a/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt b/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt index dcc6b6dfc5..6321770afa 100644 --- a/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt +++ b/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt @@ -34,4 +34,5 @@ target_sources(kqp-ut-common PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/string_udf.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/columnshard.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/datetime2_udf.cpp ) diff --git a/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt b/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt index c10448c806..c2759015fa 100644 --- a/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt +++ b/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt @@ -33,4 +33,5 @@ target_sources(kqp-ut-common PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/string_udf.cpp ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/columnshard.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/datetime2_udf.cpp ) diff --git a/ydb/core/kqp/ut/common/datetime2_udf.cpp b/ydb/core/kqp/ut/common/datetime2_udf.cpp new file mode 100644 index 0000000000..a372750f01 --- /dev/null +++ b/ydb/core/kqp/ut/common/datetime2_udf.cpp @@ -0,0 +1,9 @@ +#include <ydb/library/yql/udfs/common/datetime2/datetime_udf.cpp> + +namespace NKikimr::NKqp { + +NYql::NUdf::TUniquePtr<NYql::NUdf::IUdfModule> CreateDateTime2Module() { + return new ::TDateTime2Module(); +} + +} // namespace NKikimr::NKqp
\ No newline at end of file diff --git a/ydb/core/kqp/ut/common/ya.make b/ydb/core/kqp/ut/common/ya.make index 2b377f5163..d42a4a94e5 100644 --- a/ydb/core/kqp/ut/common/ya.make +++ b/ydb/core/kqp/ut/common/ya.make @@ -7,6 +7,7 @@ SRCS( re2_udf.cpp string_udf.cpp columnshard.cpp + datetime2_udf.cpp ) PEERDIR( diff --git a/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp b/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp index 2aad2e7654..f2e780e406 100644 --- a/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp +++ b/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp @@ -19,6 +19,7 @@ static void CreateSampleTable(TSession session) { CREATE TABLE `/Root/R` ( id Int32, payload1 String, + ts Date, PRIMARY KEY (id) ); )").GetValueSync().IsSuccess()); @@ -57,8 +58,8 @@ static void CreateSampleTable(TSession session) { UNIT_ASSERT(session.ExecuteDataQuery(R"( - REPLACE INTO `/Root/R` (id, payload1) VALUES - (1, "blah"); + REPLACE INTO `/Root/R` (id, payload1, ts) VALUES + (1, "blah", CAST("1998-12-01" AS Date) ); REPLACE INTO `/Root/S` (id, payload2) VALUES (1, "blah"); @@ -431,6 +432,32 @@ Y_UNIT_TEST_SUITE(KqpJoinOrder) { Cout << result.GetPlan(); } } + + Y_UNIT_TEST(DatetimeConstantFold) { + + auto kikimr = GetKikimrWithJoinSettings(); + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + CreateSampleTable(session); + + /* join with parameters */ + { + const TString query = Q_(R"( + SELECT * + FROM `/Root/R` as R + WHERE CAST(R.ts AS Timestamp) = (CAST('1998-12-01' AS Date) - Interval("P100D")) + )"); + + auto result = session.ExplainDataQuery(query).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); + + NJson::TJsonValue plan; + NJson::ReadJsonTree(result.GetPlan(), &plan, true); + Cout << result.GetPlan(); + } + } } } diff --git a/ydb/library/yql/dq/opt/dq_opt_predicate_selectivity.cpp b/ydb/library/yql/dq/opt/dq_opt_predicate_selectivity.cpp index 8ed335e94d..e3a02268f0 100644 --- a/ydb/library/yql/dq/opt/dq_opt_predicate_selectivity.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_predicate_selectivity.cpp @@ -8,8 +8,6 @@ using namespace NYql::NNodes; namespace { - THashSet<TString> exprCallables = {"SafeCast"}; - /** * Check if a callable is an attribute of some table * Currently just return a boolean and cover only basic cases @@ -22,34 +20,8 @@ namespace { return IsAttribute(cast.Cast().Value(), attributeName); } else if (auto ifPresent = input.Maybe<TCoIfPresent>()) { return IsAttribute(ifPresent.Cast().Optional(), attributeName); - } - - return false; - } - - /** - * Check that the expression is a constant expression - * We use a whitelist of callables - */ - bool IsConstant(const TExprBase& input) { - if (input.Maybe<TCoDataCtor>()){ - return true; - } else if (input.Ref().IsCallable(exprCallables)) { - if (input.Ref().ChildrenSize() >= 1) { - for (size_t i = 0; i < input.Ref().ChildrenSize(); i++) { - auto callableInput = TExprBase(input.Ref().Child(i)); - if (!IsConstant(callableInput)) { - return false; - } - } - return true; - } else { - return false; - } - } else if (auto op = input.Maybe<TCoBinaryArithmetic>()) { - auto left = op.Cast().Left(); - auto right = op.Cast().Right(); - return IsConstant(left) && IsConstant(right); + } else if (auto just = input.Maybe<TCoJust>()) { + return IsAttribute(just.Cast().Input(), attributeName); } return false; @@ -100,7 +72,7 @@ double NYql::NDq::ComputePredicateSelectivity(const TExprBase& input, const std: TString attributeName; - if (IsAttribute(right, attributeName) && IsConstant(left)) { + if (IsAttribute(right, attributeName) && IsConstantExpr(left.Ptr())) { std::swap(left, right); } @@ -114,7 +86,7 @@ double NYql::NDq::ComputePredicateSelectivity(const TExprBase& input, const std: // In case the right side is a constant that can be extracted, compute the selectivity using statistics // Currently, with the basic statistics we just return 1/nRows - else if (IsConstant(right)) { + else if (IsConstantExpr(right.Ptr())) { if (stats->KeyColumns.size()==1 && attributeName==stats->KeyColumns[0]) { if (stats->Nrows > 1) { result = 1.0 / stats->Nrows; @@ -141,7 +113,7 @@ double NYql::NDq::ComputePredicateSelectivity(const TExprBase& input, const std: auto right = comparison.Cast().Right(); TString attributeName; - if (IsAttribute(right, attributeName) && IsConstant(left)) { + if (IsAttribute(right, attributeName) && IsConstantExpr(left.Ptr())) { std::swap(left, right); } @@ -152,7 +124,7 @@ double NYql::NDq::ComputePredicateSelectivity(const TExprBase& input, const std: } // In case the right side is a constant that can be extracted, compute the selectivity using statistics // Currently, with the basic statistics we just return 0.5 - else if (IsConstant(right)) { + else if (IsConstantExpr(right.Ptr())) { result = 0.5; } } diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.cpp b/ydb/library/yql/dq/opt/dq_opt_stat.cpp index 99516169b0..755b60dc0c 100644 --- a/ydb/library/yql/dq/opt/dq_opt_stat.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_stat.cpp @@ -3,12 +3,91 @@ #include <ydb/library/yql/core/yql_opt_utils.h> #include <ydb/library/yql/core/yql_cost_function.h> #include <ydb/library/yql/utils/log/log.h> +#include <ydb/library/yql/core/yql_expr_type_annotation.h> namespace NYql::NDq { using namespace NNodes; +namespace { + /*** + * We maintain a white list of callables that we consider part of constant expressions + * All other callables will not be evaluated + */ + THashSet<TString> constantFoldingWhiteList = { + "Concat", "Just", "Optional","SafeCast", + "+", "-", "*", "/", "%"}; +} + +bool NeedCalc(NNodes::TExprBase node) { + auto type = node.Ref().GetTypeAnn(); + if (type->IsSingleton()) { + return false; + } + + if (type->GetKind() == ETypeAnnotationKind::Optional) { + if (node.Maybe<TCoNothing>()) { + return false; + } + if (auto maybeJust = node.Maybe<TCoJust>()) { + return NeedCalc(maybeJust.Cast().Input()); + } + return true; + } + + if (type->GetKind() == ETypeAnnotationKind::Tuple) { + if (auto maybeTuple = node.Maybe<TExprList>()) { + return AnyOf(maybeTuple.Cast(), [](const auto& item) { return NeedCalc(item); }); + } + return true; + } + + if (type->GetKind() == ETypeAnnotationKind::List) { + if (node.Maybe<TCoList>()) { + YQL_ENSURE(node.Ref().ChildrenSize() == 1, "Should be rewritten to AsList"); + return false; + } + if (auto maybeAsList = node.Maybe<TCoAsList>()) { + return AnyOf(maybeAsList.Cast().Args(), [](const auto& item) { return NeedCalc(NNodes::TExprBase(item)); }); + } + return true; + } + + YQL_ENSURE(type->GetKind() == ETypeAnnotationKind::Data, + "Object of type " << *type << " should not be considered for calculation"); + + return !node.Maybe<TCoDataCtor>(); +} + +/*** + * Check if the expression is a constant expression + * Its type annotation need to specify that its a data type, and then we check: + * - If its a literal, its a constant expression + * - If its a callable in the while list and all children are constant expressions, then its a constant expression + * - If one of the child is a type expression, it also passes the check + */ +bool IsConstantExpr(const TExprNode::TPtr& input) { + if (!IsDataOrOptionalOfData(input->GetTypeAnn())) { + return false; + } + + if (!NeedCalc(TExprBase(input))) { + return true; + } + + else if (input->IsCallable(constantFoldingWhiteList)) { + for (size_t i = 0; i < input->ChildrenSize(); i++) { + auto callableInput = input->Child(i); + if (callableInput->GetTypeAnn()->GetKind() != ETypeAnnotationKind::Type && !IsConstantExpr(callableInput)) { + return false; + } + } + return true; + } + + return false; +} /** * Compute statistics for map join diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.h b/ydb/library/yql/dq/opt/dq_opt_stat.h index 81c6000947..7a5f954276 100644 --- a/ydb/library/yql/dq/opt/dq_opt_stat.h +++ b/ydb/library/yql/dq/opt/dq_opt_stat.h @@ -17,5 +17,7 @@ void InferStatisticsForDqSource(const TExprNode::TPtr& input, TTypeAnnotationCon void InferStatisticsForGraceJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx); void InferStatisticsForMapJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx); double ComputePredicateSelectivity(const NNodes::TExprBase& input, const std::shared_ptr<TOptimizerStatistics>& stats); +bool NeedCalc(NNodes::TExprBase node); +bool IsConstantExpr(const TExprNode::TPtr& input); } // namespace NYql::NDq { |