aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorziganshinmr <ziganshinmr@yandex-team.com>2025-02-21 17:06:11 +0300
committerziganshinmr <ziganshinmr@yandex-team.com>2025-02-21 18:42:38 +0300
commit538f420e8da04056c95eb9ed00c88f1691eddf0f (patch)
tree038ddf7c73c4104c5d5acf07f29c8a3dbf5694c2
parente59786e8682d59647cd75be70468b1bdc3a755c0 (diff)
downloadydb-538f420e8da04056c95eb9ed00c88f1691eddf0f.tar.gz
KeyFilter2 pushdown fix
commit_hash:a2fbba8fa0771d8186ad495cd8ccae8731c8aa44
-rw-r--r--yt/yql/providers/yt/common/yql_configuration.h2
-rw-r--r--yt/yql/providers/yt/common/yql_yt_settings.cpp1
-rw-r--r--yt/yql/providers/yt/common/yql_yt_settings.h1
-rw-r--r--yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.h1
-rw-r--r--yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_push.cpp101
-rw-r--r--yt/yql/tests/sql/suites/key_filter/yql-19420-input1.txt3
-rw-r--r--yt/yql/tests/sql/suites/key_filter/yql-19420-input1.txt.attr65
-rw-r--r--yt/yql/tests/sql/suites/key_filter/yql-19420-input2.txt3
-rw-r--r--yt/yql/tests/sql/suites/key_filter/yql-19420-input2.txt.attr45
-rw-r--r--yt/yql/tests/sql/suites/key_filter/yql-19420.cfg2
-rw-r--r--yt/yql/tests/sql/suites/key_filter/yql-19420.sql20
11 files changed, 240 insertions, 4 deletions
diff --git a/yt/yql/providers/yt/common/yql_configuration.h b/yt/yql/providers/yt/common/yql_configuration.h
index 8347c784344..e4aaa3b2816 100644
--- a/yt/yql/providers/yt/common/yql_configuration.h
+++ b/yt/yql/providers/yt/common/yql_configuration.h
@@ -116,4 +116,6 @@ constexpr bool DEFAULT_ENABLE_DQ_WRITE_CONSTRAINTS = false;
constexpr bool DEFAULT_USE_QL_FILTER = false;
constexpr bool DEFAULT_PRUNE_QL_FILTER_LAMBDA = true;
+constexpr bool DEFAULT_DROP_UNUSED_KEYS_FROM_KEY_FILTER = false;
+
} // NYql
diff --git a/yt/yql/providers/yt/common/yql_yt_settings.cpp b/yt/yql/providers/yt/common/yql_yt_settings.cpp
index 03443e6409c..f173b462082 100644
--- a/yt/yql/providers/yt/common/yql_yt_settings.cpp
+++ b/yt/yql/providers/yt/common/yql_yt_settings.cpp
@@ -528,6 +528,7 @@ TYtConfiguration::TYtConfiguration(TTypeAnnotationContext& typeCtx)
REGISTER_SETTING(*this, JobBlockOutput).Parser([](const TString& v) { return FromString<EBlockOutputMode>(v); });
REGISTER_SETTING(*this, _EnableYtDqProcessWriteConstraints);
REGISTER_SETTING(*this, CompactForDistinct);
+ REGISTER_SETTING(*this, DropUnusedKeysFromKeyFilter);
}
EReleaseTempDataMode GetReleaseTempDataMode(const TYtSettings& settings) {
diff --git a/yt/yql/providers/yt/common/yql_yt_settings.h b/yt/yql/providers/yt/common/yql_yt_settings.h
index 88b12afff22..0dfcbc8f568 100644
--- a/yt/yql/providers/yt/common/yql_yt_settings.h
+++ b/yt/yql/providers/yt/common/yql_yt_settings.h
@@ -302,6 +302,7 @@ struct TYtSettings {
NCommon::TConfSetting<TSet<NUdf::EDataSlot>, false> JobBlockOutputSupportedDataTypes;
NCommon::TConfSetting<bool, false> _EnableYtDqProcessWriteConstraints;
NCommon::TConfSetting<bool, false> CompactForDistinct;
+ NCommon::TConfSetting<bool, false> DropUnusedKeysFromKeyFilter;
};
EReleaseTempDataMode GetReleaseTempDataMode(const TYtSettings& settings);
diff --git a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.h b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.h
index 1b55458f8c7..c0780e072c0 100644
--- a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.h
+++ b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.h
@@ -158,6 +158,7 @@ private:
TMaybe<bool> CanFuseLambdas(const NNodes::TCoLambda& innerLambda, const NNodes::TCoLambda& outerLambda, TExprContext& ctx) const;
+ NNodes::TExprBase RebuildKeyFilterAfterPushDown(NNodes::TExprBase filter, size_t usedKeysCount, TExprContext& ctx) const;
private:
const TYtState::TPtr State_;
diff --git a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_push.cpp b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_push.cpp
index bf30c27f301..e2a1e815e30 100644
--- a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_push.cpp
+++ b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_push.cpp
@@ -72,6 +72,60 @@ TMaybeNode<TExprBase> TYtPhysicalOptProposalTransformer::PushMergeLimitToInput(T
.Done();
}
+TExprBase TYtPhysicalOptProposalTransformer::RebuildKeyFilterAfterPushDown(TExprBase filter, size_t usedKeysCount, TExprContext& ctx) const {
+ auto origBoundTupleType = filter.Ref().GetTypeAnn()->Cast<TListExprType>()->GetItemType()->Cast<TTupleExprType>()->GetItems()[0]->Cast<TTupleExprType>();
+ auto origBoundTupleKeyCount = origBoundTupleType->GetSize() - 1;
+
+ auto origBoundTupleArg = ctx.NewArgument(filter.Pos(), "boundTuple");
+ TExprNode::TListType newBoundTupleItems;
+ for (size_t i = 0; i < usedKeysCount; i++) {
+ newBoundTupleItems.push_back(
+ Build<TCoNth>(ctx, filter.Pos())
+ .Tuple(origBoundTupleArg)
+ .Index(ctx.NewAtom(filter.Pos(), i))
+ .Done()
+ .Ptr()
+ );
+ }
+ newBoundTupleItems.push_back(
+ Build<TCoNth>(ctx, filter.Pos())
+ .Tuple(origBoundTupleArg)
+ .Index(ctx.NewAtom(filter.Pos(), origBoundTupleKeyCount))
+ .Done()
+ .Ptr()
+ );
+
+ auto handleBoundTuple = Build<TCoLambda>(ctx, filter.Pos())
+ .Args({origBoundTupleArg})
+ .Body<TExprList>()
+ .Add(std::move(newBoundTupleItems))
+ .Build()
+ .Done();
+
+ return Build<TCoMap>(ctx, filter.Pos())
+ .Input(filter)
+ .Lambda<TCoLambda>()
+ .Args({"boundTuple"})
+ .Body<TExprList>()
+ .Add<TExprApplier>()
+ .Apply(handleBoundTuple)
+ .With<TCoNth>(0)
+ .Tuple("boundTuple")
+ .Index(ctx.NewAtom(filter.Pos(), 0))
+ .Build()
+ .Build()
+ .Add<TExprApplier>()
+ .Apply(handleBoundTuple)
+ .With<TCoNth>(0)
+ .Tuple("boundTuple")
+ .Index(ctx.NewAtom(filter.Pos(), 1))
+ .Build()
+ .Build()
+ .Build()
+ .Build()
+ .Done();
+}
+
TMaybeNode<TExprBase> TYtPhysicalOptProposalTransformer::PushDownKeyExtract(TExprBase node, TExprContext& ctx) const {
if (node.Ref().HasResult() && node.Ref().GetResult().Type() != TExprNode::World) {
return node;
@@ -172,6 +226,9 @@ TMaybeNode<TExprBase> TYtPhysicalOptProposalTransformer::PushDownKeyExtract(TExp
const auto kfColumns = GetKeyFilterColumns(section, kfType);
YQL_ENSURE(!kfColumns.empty());
for (auto path: section.Paths()) {
+ TYtPathInfo pathInfo(path);
+ auto pathRowSpec = pathInfo.Table->RowSpec;
+
if (auto maybeOp = getInnerOpForUpdate(path, kfColumns)) {
auto innerOp = maybeOp.Cast();
if (kfType == EYtSettingType::KeyFilter2) {
@@ -203,10 +260,46 @@ TMaybeNode<TExprBase> TYtPhysicalOptProposalTransformer::PushDownKeyExtract(TExp
}
auto innerOpSection = innerOp.Input().Item(0);
- auto updatedSection = Build<TYtSection>(ctx, innerOpSection.Pos())
- .InitFrom(innerOpSection)
- .Settings(NYql::MergeSettings(innerOpSection.Settings().Ref(), *NYql::KeepOnlySettings(section.Settings().Ref(), EYtSettingType::KeyFilter | EYtSettingType::KeyFilter2, ctx), ctx))
- .Done();
+ TExprNode::TPtr updatedSection;
+ if (kfType == EYtSettingType::KeyFilter2 && State_->Configuration->DropUnusedKeysFromKeyFilter.Get().GetOrElse(DEFAULT_DROP_UNUSED_KEYS_FROM_KEY_FILTER)) {
+ for (auto innerOpPath: innerOpSection.Paths()) {
+ TYtPathInfo innerOpPathInfo(innerOpPath);
+ auto innerOpPathRowSpec = innerOpPathInfo.Table->RowSpec;
+
+ YQL_ENSURE(kfColumns.size() <= innerOpPathRowSpec->SortedBy.size());
+ for (size_t i = 0; i < kfColumns.size(); i++) {
+ YQL_ENSURE(innerOpPathRowSpec->SortedBy[i] == pathRowSpec->SortedBy[i]);
+ }
+ }
+
+ TExprNode::TListType rebuiltKeyFilters;
+ for (auto filter : keyFilters) {
+ YQL_ENSURE(filter->ChildrenSize() == 2);
+ auto rebuiltFilter = RebuildKeyFilterAfterPushDown(TExprBase(filter->HeadPtr()), kfColumns.size(), ctx);
+ rebuiltKeyFilters.push_back(Build<TCoNameValueTuple>(ctx, innerOpSection.Settings().Pos())
+ .Name().Build("keyFilter2")
+ .Value<TExprList>()
+ .Add(rebuiltFilter)
+ .Add(filter->Child(1))
+ .Build()
+ .Done()
+ .Ptr()
+ );
+ }
+
+ updatedSection = Build<TYtSection>(ctx, innerOpSection.Pos())
+ .InitFrom(innerOpSection)
+ .Settings(NYql::MergeSettings(innerOpSection.Settings().Ref(), *ctx.NewList(innerOpSection.Settings().Pos(), std::move(rebuiltKeyFilters)), ctx))
+ .Done()
+ .Ptr();
+
+ } else {
+ updatedSection = Build<TYtSection>(ctx, innerOpSection.Pos())
+ .InitFrom(innerOpSection)
+ .Settings(NYql::MergeSettings(innerOpSection.Settings().Ref(), *NYql::KeepOnlySettings(section.Settings().Ref(), EYtSettingType::KeyFilter | EYtSettingType::KeyFilter2, ctx), ctx))
+ .Done()
+ .Ptr();
+ }
auto updatedSectionList = Build<TYtSectionList>(ctx, innerOp.Input().Pos()).Add(updatedSection).Done();
auto updatedInnerOp = ctx.ChangeChild(innerOp.Ref(), TYtTransientOpBase::idx_Input, updatedSectionList.Ptr());
diff --git a/yt/yql/tests/sql/suites/key_filter/yql-19420-input1.txt b/yt/yql/tests/sql/suites/key_filter/yql-19420-input1.txt
new file mode 100644
index 00000000000..4c10c06cf33
--- /dev/null
+++ b/yt/yql/tests/sql/suites/key_filter/yql-19420-input1.txt
@@ -0,0 +1,3 @@
+{"key1"="1";"key2"="aaa";"key3"=1};
+{"key1"="2";"key2"="aab";"key3"=2};
+{"key1"="3";"key2"=#;"key3"=3};
diff --git a/yt/yql/tests/sql/suites/key_filter/yql-19420-input1.txt.attr b/yt/yql/tests/sql/suites/key_filter/yql-19420-input1.txt.attr
new file mode 100644
index 00000000000..2fbe20d866b
--- /dev/null
+++ b/yt/yql/tests/sql/suites/key_filter/yql-19420-input1.txt.attr
@@ -0,0 +1,65 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "key1";
+ [
+ "DataType";
+ "String";
+ ];
+ ];
+ [
+ "key2";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String";
+ ];
+ ];
+ ];
+ [
+ "key3";
+ [
+ "DataType";
+ "Int32";
+ ];
+ ];
+ ];
+ ];
+ "SortDirections" = [
+ 1;
+ 1;
+ 1;
+ ];
+ "SortMembers" = [
+ "key1";
+ "key2";
+ "key3";
+ ];
+ "SortedBy" = [
+ "key1";
+ "key2";
+ "key3";
+ ];
+ "SortedByTypes" = [
+ [
+ "DataType";
+ "String";
+ ];
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String";
+ ];
+ ];
+ [
+ "DataType";
+ "Int32";
+ ];
+ ];
+ }
+}
diff --git a/yt/yql/tests/sql/suites/key_filter/yql-19420-input2.txt b/yt/yql/tests/sql/suites/key_filter/yql-19420-input2.txt
new file mode 100644
index 00000000000..1b5d541c5ce
--- /dev/null
+++ b/yt/yql/tests/sql/suites/key_filter/yql-19420-input2.txt
@@ -0,0 +1,3 @@
+{"key1"="4";"key2"="aad";};
+{"key1"="5";"key2"="aae";};
+{"key1"="6";"key2"="aaf";};
diff --git a/yt/yql/tests/sql/suites/key_filter/yql-19420-input2.txt.attr b/yt/yql/tests/sql/suites/key_filter/yql-19420-input2.txt.attr
new file mode 100644
index 00000000000..4e51119e466
--- /dev/null
+++ b/yt/yql/tests/sql/suites/key_filter/yql-19420-input2.txt.attr
@@ -0,0 +1,45 @@
+{
+ "_yql_row_spec" = {
+ "Type" = [
+ "StructType";
+ [
+ [
+ "key1";
+ [
+ "DataType";
+ "String";
+ ];
+ ];
+ [
+ "key2";
+ [
+ "DataType";
+ "String";
+ ];
+ ];
+ ];
+ ];
+ "SortDirections" = [
+ 1;
+ 1;
+ ];
+ "SortMembers" = [
+ "key1";
+ "key2";
+ ];
+ "SortedBy" = [
+ "key1";
+ "key2";
+ ];
+ "SortedByTypes" = [
+ [
+ "DataType";
+ "String";
+ ];
+ [
+ "DataType";
+ "String";
+ ];
+ ];
+ }
+}
diff --git a/yt/yql/tests/sql/suites/key_filter/yql-19420.cfg b/yt/yql/tests/sql/suites/key_filter/yql-19420.cfg
new file mode 100644
index 00000000000..ea8b3ac173a
--- /dev/null
+++ b/yt/yql/tests/sql/suites/key_filter/yql-19420.cfg
@@ -0,0 +1,2 @@
+in Input1 yql-19420-input1.txt
+in Input2 yql-19420-input2.txt
diff --git a/yt/yql/tests/sql/suites/key_filter/yql-19420.sql b/yt/yql/tests/sql/suites/key_filter/yql-19420.sql
new file mode 100644
index 00000000000..8b823a08865
--- /dev/null
+++ b/yt/yql/tests/sql/suites/key_filter/yql-19420.sql
@@ -0,0 +1,20 @@
+/* postgres can not */
+USE plato;
+
+PRAGMA yt.DropUnusedKeysFromKeyFilter="1";
+
+$src = (
+ SELECT
+ key1,
+ key2
+ FROM RANGE("", "Input1", "Input2")
+);
+
+SELECT
+ key2
+FROM $src
+WHERE key1 BETWEEN '2' AND '5';
+
+SELECT
+ key2
+FROM $src;