aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpavelvelikhov <pavelvelikhov@ydb.tech>2023-12-11 15:39:20 +0300
committerpavelvelikhov <pavelvelikhov@ydb.tech>2023-12-11 17:13:40 +0300
commit73482b1b1fe9fb375b188bccb8c03d43035b9117 (patch)
tree45b3fa475082b8110ac93c8834aa2cdfc1b8f2f7
parent892cdd564988d4e52307bdec58a5b7be336b91cd (diff)
downloadydb-73482b1b1fe9fb375b188bccb8c03d43035b9117.tar.gz
Added support for optional for predicate selectivity
Added support for optional for predicate selectivity
-rw-r--r--ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp79
-rw-r--r--ydb/core/kqp/ut/common/CMakeLists.darwin-arm64.txt1
-rw-r--r--ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt1
-rw-r--r--ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt1
-rw-r--r--ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt1
-rw-r--r--ydb/core/kqp/ut/common/datetime2_udf.cpp9
-rw-r--r--ydb/core/kqp/ut/common/ya.make1
-rw-r--r--ydb/core/kqp/ut/join/kqp_join_order_ut.cpp31
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_predicate_selectivity.cpp40
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_stat.cpp79
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_stat.h2
12 files changed, 132 insertions, 114 deletions
diff --git a/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp b/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp
index 73de91941e..4736895c9d 100644
--- a/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp
+++ b/ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp
@@ -1,5 +1,6 @@
#include "kqp_constant_folding_transformer.h"
+#include <ydb/library/yql/dq/opt/dq_opt_stat.h>
#include <ydb/library/yql/utils/log/log.h>
#include <ydb/library/yql/core/yql_expr_type_annotation.h>
@@ -9,84 +10,6 @@ using namespace NKikimr::NKqp;
using namespace NYql::NDq;
namespace {
-
- /***
- * We maintain a white list of callables that we consider part of constant expressions
- * All other callables will not be evaluated
- */
- THashSet<TString> constantFoldingWhiteList = {
- "Concat", "Just", "Optional","SafeCast",
- "+", "-", "*", "/", "%"};
-
- bool NeedCalc(NNodes::TExprBase node) {
- auto type = node.Ref().GetTypeAnn();
- if (type->IsSingleton()) {
- return false;
- }
-
- if (type->GetKind() == ETypeAnnotationKind::Optional) {
- if (node.Maybe<TCoNothing>()) {
- return false;
- }
- if (auto maybeJust = node.Maybe<TCoJust>()) {
- return NeedCalc(maybeJust.Cast().Input());
- }
- return true;
- }
-
- if (type->GetKind() == ETypeAnnotationKind::Tuple) {
- if (auto maybeTuple = node.Maybe<TExprList>()) {
- return AnyOf(maybeTuple.Cast(), [](const auto& item) { return NeedCalc(item); });
- }
- return true;
- }
-
- if (type->GetKind() == ETypeAnnotationKind::List) {
- if (node.Maybe<TCoList>()) {
- YQL_ENSURE(node.Ref().ChildrenSize() == 1, "Should be rewritten to AsList");
- return false;
- }
- if (auto maybeAsList = node.Maybe<TCoAsList>()) {
- return AnyOf(maybeAsList.Cast().Args(), [](const auto& item) { return NeedCalc(NNodes::TExprBase(item)); });
- }
- return true;
- }
-
- YQL_ENSURE(type->GetKind() == ETypeAnnotationKind::Data,
- "Object of type " << *type << " should not be considered for calculation");
-
- return !node.Maybe<TCoDataCtor>();
- }
-
- /***
- * Check if the expression is a constant expression
- * Its type annotation need to specify that its a data type, and then we check:
- * - If its a literal, its a constant expression
- * - If its a callable in the while list and all children are constant expressions, then its a constant expression
- * - If one of the child is a type expression, it also passes the check
- */
- bool IsConstantExpr(const TExprNode::TPtr& input) {
- if (!IsDataOrOptionalOfData(input->GetTypeAnn())) {
- return false;
- }
-
- if (!NeedCalc(TExprBase(input))) {
- return true;
- }
-
- else if (input->IsCallable(constantFoldingWhiteList)) {
- for (size_t i = 0; i < input->ChildrenSize(); i++) {
- auto callableInput = input->Child(i);
- if (callableInput->GetTypeAnn()->GetKind() != ETypeAnnotationKind::Type && !IsConstantExpr(callableInput)) {
- return false;
- }
- }
- return true;
- }
-
- return false;
- }
-
/**
* Traverse a lambda and create a mapping from nodes to nodes wrapped in EvaluateExpr callable
* We check for literals specifically, since they shouldn't be evaluated
diff --git a/ydb/core/kqp/ut/common/CMakeLists.darwin-arm64.txt b/ydb/core/kqp/ut/common/CMakeLists.darwin-arm64.txt
index c10448c806..c2759015fa 100644
--- a/ydb/core/kqp/ut/common/CMakeLists.darwin-arm64.txt
+++ b/ydb/core/kqp/ut/common/CMakeLists.darwin-arm64.txt
@@ -33,4 +33,5 @@ target_sources(kqp-ut-common PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp
${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/string_udf.cpp
${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/columnshard.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/datetime2_udf.cpp
)
diff --git a/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt b/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt
index c10448c806..c2759015fa 100644
--- a/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt
+++ b/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt
@@ -33,4 +33,5 @@ target_sources(kqp-ut-common PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp
${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/string_udf.cpp
${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/columnshard.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/datetime2_udf.cpp
)
diff --git a/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt b/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt
index dcc6b6dfc5..6321770afa 100644
--- a/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt
+++ b/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt
@@ -34,4 +34,5 @@ target_sources(kqp-ut-common PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp
${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/string_udf.cpp
${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/columnshard.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/datetime2_udf.cpp
)
diff --git a/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt b/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt
index dcc6b6dfc5..6321770afa 100644
--- a/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt
+++ b/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt
@@ -34,4 +34,5 @@ target_sources(kqp-ut-common PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp
${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/string_udf.cpp
${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/columnshard.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/datetime2_udf.cpp
)
diff --git a/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt b/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt
index c10448c806..c2759015fa 100644
--- a/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt
+++ b/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt
@@ -33,4 +33,5 @@ target_sources(kqp-ut-common PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp
${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/string_udf.cpp
${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/columnshard.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/datetime2_udf.cpp
)
diff --git a/ydb/core/kqp/ut/common/datetime2_udf.cpp b/ydb/core/kqp/ut/common/datetime2_udf.cpp
new file mode 100644
index 0000000000..a372750f01
--- /dev/null
+++ b/ydb/core/kqp/ut/common/datetime2_udf.cpp
@@ -0,0 +1,9 @@
+#include <ydb/library/yql/udfs/common/datetime2/datetime_udf.cpp>
+
+namespace NKikimr::NKqp {
+
+NYql::NUdf::TUniquePtr<NYql::NUdf::IUdfModule> CreateDateTime2Module() {
+ return new ::TDateTime2Module();
+}
+
+} // namespace NKikimr::NKqp \ No newline at end of file
diff --git a/ydb/core/kqp/ut/common/ya.make b/ydb/core/kqp/ut/common/ya.make
index 2b377f5163..d42a4a94e5 100644
--- a/ydb/core/kqp/ut/common/ya.make
+++ b/ydb/core/kqp/ut/common/ya.make
@@ -7,6 +7,7 @@ SRCS(
re2_udf.cpp
string_udf.cpp
columnshard.cpp
+ datetime2_udf.cpp
)
PEERDIR(
diff --git a/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp b/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp
index 2aad2e7654..f2e780e406 100644
--- a/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp
+++ b/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp
@@ -19,6 +19,7 @@ static void CreateSampleTable(TSession session) {
CREATE TABLE `/Root/R` (
id Int32,
payload1 String,
+ ts Date,
PRIMARY KEY (id)
);
)").GetValueSync().IsSuccess());
@@ -57,8 +58,8 @@ static void CreateSampleTable(TSession session) {
UNIT_ASSERT(session.ExecuteDataQuery(R"(
- REPLACE INTO `/Root/R` (id, payload1) VALUES
- (1, "blah");
+ REPLACE INTO `/Root/R` (id, payload1, ts) VALUES
+ (1, "blah", CAST("1998-12-01" AS Date) );
REPLACE INTO `/Root/S` (id, payload2) VALUES
(1, "blah");
@@ -431,6 +432,32 @@ Y_UNIT_TEST_SUITE(KqpJoinOrder) {
Cout << result.GetPlan();
}
}
+
+ Y_UNIT_TEST(DatetimeConstantFold) {
+
+ auto kikimr = GetKikimrWithJoinSettings();
+ auto db = kikimr.GetTableClient();
+ auto session = db.CreateSession().GetValueSync().GetSession();
+
+ CreateSampleTable(session);
+
+ /* join with parameters */
+ {
+ const TString query = Q_(R"(
+ SELECT *
+ FROM `/Root/R` as R
+ WHERE CAST(R.ts AS Timestamp) = (CAST('1998-12-01' AS Date) - Interval("P100D"))
+ )");
+
+ auto result = session.ExplainDataQuery(query).ExtractValueSync();
+
+ UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS);
+
+ NJson::TJsonValue plan;
+ NJson::ReadJsonTree(result.GetPlan(), &plan, true);
+ Cout << result.GetPlan();
+ }
+ }
}
}
diff --git a/ydb/library/yql/dq/opt/dq_opt_predicate_selectivity.cpp b/ydb/library/yql/dq/opt/dq_opt_predicate_selectivity.cpp
index 8ed335e94d..e3a02268f0 100644
--- a/ydb/library/yql/dq/opt/dq_opt_predicate_selectivity.cpp
+++ b/ydb/library/yql/dq/opt/dq_opt_predicate_selectivity.cpp
@@ -8,8 +8,6 @@ using namespace NYql::NNodes;
namespace {
- THashSet<TString> exprCallables = {"SafeCast"};
-
/**
* Check if a callable is an attribute of some table
* Currently just return a boolean and cover only basic cases
@@ -22,34 +20,8 @@ namespace {
return IsAttribute(cast.Cast().Value(), attributeName);
} else if (auto ifPresent = input.Maybe<TCoIfPresent>()) {
return IsAttribute(ifPresent.Cast().Optional(), attributeName);
- }
-
- return false;
- }
-
- /**
- * Check that the expression is a constant expression
- * We use a whitelist of callables
- */
- bool IsConstant(const TExprBase& input) {
- if (input.Maybe<TCoDataCtor>()){
- return true;
- } else if (input.Ref().IsCallable(exprCallables)) {
- if (input.Ref().ChildrenSize() >= 1) {
- for (size_t i = 0; i < input.Ref().ChildrenSize(); i++) {
- auto callableInput = TExprBase(input.Ref().Child(i));
- if (!IsConstant(callableInput)) {
- return false;
- }
- }
- return true;
- } else {
- return false;
- }
- } else if (auto op = input.Maybe<TCoBinaryArithmetic>()) {
- auto left = op.Cast().Left();
- auto right = op.Cast().Right();
- return IsConstant(left) && IsConstant(right);
+ } else if (auto just = input.Maybe<TCoJust>()) {
+ return IsAttribute(just.Cast().Input(), attributeName);
}
return false;
@@ -100,7 +72,7 @@ double NYql::NDq::ComputePredicateSelectivity(const TExprBase& input, const std:
TString attributeName;
- if (IsAttribute(right, attributeName) && IsConstant(left)) {
+ if (IsAttribute(right, attributeName) && IsConstantExpr(left.Ptr())) {
std::swap(left, right);
}
@@ -114,7 +86,7 @@ double NYql::NDq::ComputePredicateSelectivity(const TExprBase& input, const std:
// In case the right side is a constant that can be extracted, compute the selectivity using statistics
// Currently, with the basic statistics we just return 1/nRows
- else if (IsConstant(right)) {
+ else if (IsConstantExpr(right.Ptr())) {
if (stats->KeyColumns.size()==1 && attributeName==stats->KeyColumns[0]) {
if (stats->Nrows > 1) {
result = 1.0 / stats->Nrows;
@@ -141,7 +113,7 @@ double NYql::NDq::ComputePredicateSelectivity(const TExprBase& input, const std:
auto right = comparison.Cast().Right();
TString attributeName;
- if (IsAttribute(right, attributeName) && IsConstant(left)) {
+ if (IsAttribute(right, attributeName) && IsConstantExpr(left.Ptr())) {
std::swap(left, right);
}
@@ -152,7 +124,7 @@ double NYql::NDq::ComputePredicateSelectivity(const TExprBase& input, const std:
}
// In case the right side is a constant that can be extracted, compute the selectivity using statistics
// Currently, with the basic statistics we just return 0.5
- else if (IsConstant(right)) {
+ else if (IsConstantExpr(right.Ptr())) {
result = 0.5;
}
}
diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.cpp b/ydb/library/yql/dq/opt/dq_opt_stat.cpp
index 99516169b0..755b60dc0c 100644
--- a/ydb/library/yql/dq/opt/dq_opt_stat.cpp
+++ b/ydb/library/yql/dq/opt/dq_opt_stat.cpp
@@ -3,12 +3,91 @@
#include <ydb/library/yql/core/yql_opt_utils.h>
#include <ydb/library/yql/core/yql_cost_function.h>
#include <ydb/library/yql/utils/log/log.h>
+#include <ydb/library/yql/core/yql_expr_type_annotation.h>
namespace NYql::NDq {
using namespace NNodes;
+namespace {
+ /***
+ * We maintain a white list of callables that we consider part of constant expressions
+ * All other callables will not be evaluated
+ */
+ THashSet<TString> constantFoldingWhiteList = {
+ "Concat", "Just", "Optional","SafeCast",
+ "+", "-", "*", "/", "%"};
+}
+
+bool NeedCalc(NNodes::TExprBase node) {
+ auto type = node.Ref().GetTypeAnn();
+ if (type->IsSingleton()) {
+ return false;
+ }
+
+ if (type->GetKind() == ETypeAnnotationKind::Optional) {
+ if (node.Maybe<TCoNothing>()) {
+ return false;
+ }
+ if (auto maybeJust = node.Maybe<TCoJust>()) {
+ return NeedCalc(maybeJust.Cast().Input());
+ }
+ return true;
+ }
+
+ if (type->GetKind() == ETypeAnnotationKind::Tuple) {
+ if (auto maybeTuple = node.Maybe<TExprList>()) {
+ return AnyOf(maybeTuple.Cast(), [](const auto& item) { return NeedCalc(item); });
+ }
+ return true;
+ }
+
+ if (type->GetKind() == ETypeAnnotationKind::List) {
+ if (node.Maybe<TCoList>()) {
+ YQL_ENSURE(node.Ref().ChildrenSize() == 1, "Should be rewritten to AsList");
+ return false;
+ }
+ if (auto maybeAsList = node.Maybe<TCoAsList>()) {
+ return AnyOf(maybeAsList.Cast().Args(), [](const auto& item) { return NeedCalc(NNodes::TExprBase(item)); });
+ }
+ return true;
+ }
+
+ YQL_ENSURE(type->GetKind() == ETypeAnnotationKind::Data,
+ "Object of type " << *type << " should not be considered for calculation");
+
+ return !node.Maybe<TCoDataCtor>();
+}
+
+/***
+ * Check if the expression is a constant expression
+ * Its type annotation need to specify that its a data type, and then we check:
+ * - If its a literal, its a constant expression
+ * - If its a callable in the while list and all children are constant expressions, then its a constant expression
+ * - If one of the child is a type expression, it also passes the check
+ */
+bool IsConstantExpr(const TExprNode::TPtr& input) {
+ if (!IsDataOrOptionalOfData(input->GetTypeAnn())) {
+ return false;
+ }
+
+ if (!NeedCalc(TExprBase(input))) {
+ return true;
+ }
+
+ else if (input->IsCallable(constantFoldingWhiteList)) {
+ for (size_t i = 0; i < input->ChildrenSize(); i++) {
+ auto callableInput = input->Child(i);
+ if (callableInput->GetTypeAnn()->GetKind() != ETypeAnnotationKind::Type && !IsConstantExpr(callableInput)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ return false;
+}
/**
* Compute statistics for map join
diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.h b/ydb/library/yql/dq/opt/dq_opt_stat.h
index 81c6000947..7a5f954276 100644
--- a/ydb/library/yql/dq/opt/dq_opt_stat.h
+++ b/ydb/library/yql/dq/opt/dq_opt_stat.h
@@ -17,5 +17,7 @@ void InferStatisticsForDqSource(const TExprNode::TPtr& input, TTypeAnnotationCon
void InferStatisticsForGraceJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx);
void InferStatisticsForMapJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx);
double ComputePredicateSelectivity(const NNodes::TExprBase& input, const std::shared_ptr<TOptimizerStatistics>& stats);
+bool NeedCalc(NNodes::TExprBase node);
+bool IsConstantExpr(const TExprNode::TPtr& input);
} // namespace NYql::NDq {