diff options
author | ziganshinmr <ziganshinmr@yandex-team.com> | 2025-02-21 17:06:11 +0300 |
---|---|---|
committer | ziganshinmr <ziganshinmr@yandex-team.com> | 2025-02-21 18:42:38 +0300 |
commit | 538f420e8da04056c95eb9ed00c88f1691eddf0f (patch) | |
tree | 038ddf7c73c4104c5d5acf07f29c8a3dbf5694c2 | |
parent | e59786e8682d59647cd75be70468b1bdc3a755c0 (diff) | |
download | ydb-538f420e8da04056c95eb9ed00c88f1691eddf0f.tar.gz |
KeyFilter2 pushdown fix
commit_hash:a2fbba8fa0771d8186ad495cd8ccae8731c8aa44
11 files changed, 240 insertions, 4 deletions
diff --git a/yt/yql/providers/yt/common/yql_configuration.h b/yt/yql/providers/yt/common/yql_configuration.h index 8347c784344..e4aaa3b2816 100644 --- a/yt/yql/providers/yt/common/yql_configuration.h +++ b/yt/yql/providers/yt/common/yql_configuration.h @@ -116,4 +116,6 @@ constexpr bool DEFAULT_ENABLE_DQ_WRITE_CONSTRAINTS = false; constexpr bool DEFAULT_USE_QL_FILTER = false; constexpr bool DEFAULT_PRUNE_QL_FILTER_LAMBDA = true; +constexpr bool DEFAULT_DROP_UNUSED_KEYS_FROM_KEY_FILTER = false; + } // NYql diff --git a/yt/yql/providers/yt/common/yql_yt_settings.cpp b/yt/yql/providers/yt/common/yql_yt_settings.cpp index 03443e6409c..f173b462082 100644 --- a/yt/yql/providers/yt/common/yql_yt_settings.cpp +++ b/yt/yql/providers/yt/common/yql_yt_settings.cpp @@ -528,6 +528,7 @@ TYtConfiguration::TYtConfiguration(TTypeAnnotationContext& typeCtx) REGISTER_SETTING(*this, JobBlockOutput).Parser([](const TString& v) { return FromString<EBlockOutputMode>(v); }); REGISTER_SETTING(*this, _EnableYtDqProcessWriteConstraints); REGISTER_SETTING(*this, CompactForDistinct); + REGISTER_SETTING(*this, DropUnusedKeysFromKeyFilter); } EReleaseTempDataMode GetReleaseTempDataMode(const TYtSettings& settings) { diff --git a/yt/yql/providers/yt/common/yql_yt_settings.h b/yt/yql/providers/yt/common/yql_yt_settings.h index 88b12afff22..0dfcbc8f568 100644 --- a/yt/yql/providers/yt/common/yql_yt_settings.h +++ b/yt/yql/providers/yt/common/yql_yt_settings.h @@ -302,6 +302,7 @@ struct TYtSettings { NCommon::TConfSetting<TSet<NUdf::EDataSlot>, false> JobBlockOutputSupportedDataTypes; NCommon::TConfSetting<bool, false> _EnableYtDqProcessWriteConstraints; NCommon::TConfSetting<bool, false> CompactForDistinct; + NCommon::TConfSetting<bool, false> DropUnusedKeysFromKeyFilter; }; EReleaseTempDataMode GetReleaseTempDataMode(const TYtSettings& settings); diff --git a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.h b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.h index 1b55458f8c7..c0780e072c0 100644 --- a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.h +++ b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.h @@ -158,6 +158,7 @@ private: TMaybe<bool> CanFuseLambdas(const NNodes::TCoLambda& innerLambda, const NNodes::TCoLambda& outerLambda, TExprContext& ctx) const; + NNodes::TExprBase RebuildKeyFilterAfterPushDown(NNodes::TExprBase filter, size_t usedKeysCount, TExprContext& ctx) const; private: const TYtState::TPtr State_; diff --git a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_push.cpp b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_push.cpp index bf30c27f301..e2a1e815e30 100644 --- a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_push.cpp +++ b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_push.cpp @@ -72,6 +72,60 @@ TMaybeNode<TExprBase> TYtPhysicalOptProposalTransformer::PushMergeLimitToInput(T .Done(); } +TExprBase TYtPhysicalOptProposalTransformer::RebuildKeyFilterAfterPushDown(TExprBase filter, size_t usedKeysCount, TExprContext& ctx) const { + auto origBoundTupleType = filter.Ref().GetTypeAnn()->Cast<TListExprType>()->GetItemType()->Cast<TTupleExprType>()->GetItems()[0]->Cast<TTupleExprType>(); + auto origBoundTupleKeyCount = origBoundTupleType->GetSize() - 1; + + auto origBoundTupleArg = ctx.NewArgument(filter.Pos(), "boundTuple"); + TExprNode::TListType newBoundTupleItems; + for (size_t i = 0; i < usedKeysCount; i++) { + newBoundTupleItems.push_back( + Build<TCoNth>(ctx, filter.Pos()) + .Tuple(origBoundTupleArg) + .Index(ctx.NewAtom(filter.Pos(), i)) + .Done() + .Ptr() + ); + } + newBoundTupleItems.push_back( + Build<TCoNth>(ctx, filter.Pos()) + .Tuple(origBoundTupleArg) + .Index(ctx.NewAtom(filter.Pos(), origBoundTupleKeyCount)) + .Done() + .Ptr() + ); + + auto handleBoundTuple = Build<TCoLambda>(ctx, filter.Pos()) + .Args({origBoundTupleArg}) + .Body<TExprList>() + .Add(std::move(newBoundTupleItems)) + .Build() + .Done(); + + return Build<TCoMap>(ctx, filter.Pos()) + .Input(filter) + .Lambda<TCoLambda>() + .Args({"boundTuple"}) + .Body<TExprList>() + .Add<TExprApplier>() + .Apply(handleBoundTuple) + .With<TCoNth>(0) + .Tuple("boundTuple") + .Index(ctx.NewAtom(filter.Pos(), 0)) + .Build() + .Build() + .Add<TExprApplier>() + .Apply(handleBoundTuple) + .With<TCoNth>(0) + .Tuple("boundTuple") + .Index(ctx.NewAtom(filter.Pos(), 1)) + .Build() + .Build() + .Build() + .Build() + .Done(); +} + TMaybeNode<TExprBase> TYtPhysicalOptProposalTransformer::PushDownKeyExtract(TExprBase node, TExprContext& ctx) const { if (node.Ref().HasResult() && node.Ref().GetResult().Type() != TExprNode::World) { return node; @@ -172,6 +226,9 @@ TMaybeNode<TExprBase> TYtPhysicalOptProposalTransformer::PushDownKeyExtract(TExp const auto kfColumns = GetKeyFilterColumns(section, kfType); YQL_ENSURE(!kfColumns.empty()); for (auto path: section.Paths()) { + TYtPathInfo pathInfo(path); + auto pathRowSpec = pathInfo.Table->RowSpec; + if (auto maybeOp = getInnerOpForUpdate(path, kfColumns)) { auto innerOp = maybeOp.Cast(); if (kfType == EYtSettingType::KeyFilter2) { @@ -203,10 +260,46 @@ TMaybeNode<TExprBase> TYtPhysicalOptProposalTransformer::PushDownKeyExtract(TExp } auto innerOpSection = innerOp.Input().Item(0); - auto updatedSection = Build<TYtSection>(ctx, innerOpSection.Pos()) - .InitFrom(innerOpSection) - .Settings(NYql::MergeSettings(innerOpSection.Settings().Ref(), *NYql::KeepOnlySettings(section.Settings().Ref(), EYtSettingType::KeyFilter | EYtSettingType::KeyFilter2, ctx), ctx)) - .Done(); + TExprNode::TPtr updatedSection; + if (kfType == EYtSettingType::KeyFilter2 && State_->Configuration->DropUnusedKeysFromKeyFilter.Get().GetOrElse(DEFAULT_DROP_UNUSED_KEYS_FROM_KEY_FILTER)) { + for (auto innerOpPath: innerOpSection.Paths()) { + TYtPathInfo innerOpPathInfo(innerOpPath); + auto innerOpPathRowSpec = innerOpPathInfo.Table->RowSpec; + + YQL_ENSURE(kfColumns.size() <= innerOpPathRowSpec->SortedBy.size()); + for (size_t i = 0; i < kfColumns.size(); i++) { + YQL_ENSURE(innerOpPathRowSpec->SortedBy[i] == pathRowSpec->SortedBy[i]); + } + } + + TExprNode::TListType rebuiltKeyFilters; + for (auto filter : keyFilters) { + YQL_ENSURE(filter->ChildrenSize() == 2); + auto rebuiltFilter = RebuildKeyFilterAfterPushDown(TExprBase(filter->HeadPtr()), kfColumns.size(), ctx); + rebuiltKeyFilters.push_back(Build<TCoNameValueTuple>(ctx, innerOpSection.Settings().Pos()) + .Name().Build("keyFilter2") + .Value<TExprList>() + .Add(rebuiltFilter) + .Add(filter->Child(1)) + .Build() + .Done() + .Ptr() + ); + } + + updatedSection = Build<TYtSection>(ctx, innerOpSection.Pos()) + .InitFrom(innerOpSection) + .Settings(NYql::MergeSettings(innerOpSection.Settings().Ref(), *ctx.NewList(innerOpSection.Settings().Pos(), std::move(rebuiltKeyFilters)), ctx)) + .Done() + .Ptr(); + + } else { + updatedSection = Build<TYtSection>(ctx, innerOpSection.Pos()) + .InitFrom(innerOpSection) + .Settings(NYql::MergeSettings(innerOpSection.Settings().Ref(), *NYql::KeepOnlySettings(section.Settings().Ref(), EYtSettingType::KeyFilter | EYtSettingType::KeyFilter2, ctx), ctx)) + .Done() + .Ptr(); + } auto updatedSectionList = Build<TYtSectionList>(ctx, innerOp.Input().Pos()).Add(updatedSection).Done(); auto updatedInnerOp = ctx.ChangeChild(innerOp.Ref(), TYtTransientOpBase::idx_Input, updatedSectionList.Ptr()); diff --git a/yt/yql/tests/sql/suites/key_filter/yql-19420-input1.txt b/yt/yql/tests/sql/suites/key_filter/yql-19420-input1.txt new file mode 100644 index 00000000000..4c10c06cf33 --- /dev/null +++ b/yt/yql/tests/sql/suites/key_filter/yql-19420-input1.txt @@ -0,0 +1,3 @@ +{"key1"="1";"key2"="aaa";"key3"=1}; +{"key1"="2";"key2"="aab";"key3"=2}; +{"key1"="3";"key2"=#;"key3"=3}; diff --git a/yt/yql/tests/sql/suites/key_filter/yql-19420-input1.txt.attr b/yt/yql/tests/sql/suites/key_filter/yql-19420-input1.txt.attr new file mode 100644 index 00000000000..2fbe20d866b --- /dev/null +++ b/yt/yql/tests/sql/suites/key_filter/yql-19420-input1.txt.attr @@ -0,0 +1,65 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "key1"; + [ + "DataType"; + "String"; + ]; + ]; + [ + "key2"; + [ + "OptionalType"; + [ + "DataType"; + "String"; + ]; + ]; + ]; + [ + "key3"; + [ + "DataType"; + "Int32"; + ]; + ]; + ]; + ]; + "SortDirections" = [ + 1; + 1; + 1; + ]; + "SortMembers" = [ + "key1"; + "key2"; + "key3"; + ]; + "SortedBy" = [ + "key1"; + "key2"; + "key3"; + ]; + "SortedByTypes" = [ + [ + "DataType"; + "String"; + ]; + [ + "OptionalType"; + [ + "DataType"; + "String"; + ]; + ]; + [ + "DataType"; + "Int32"; + ]; + ]; + } +} diff --git a/yt/yql/tests/sql/suites/key_filter/yql-19420-input2.txt b/yt/yql/tests/sql/suites/key_filter/yql-19420-input2.txt new file mode 100644 index 00000000000..1b5d541c5ce --- /dev/null +++ b/yt/yql/tests/sql/suites/key_filter/yql-19420-input2.txt @@ -0,0 +1,3 @@ +{"key1"="4";"key2"="aad";}; +{"key1"="5";"key2"="aae";}; +{"key1"="6";"key2"="aaf";}; diff --git a/yt/yql/tests/sql/suites/key_filter/yql-19420-input2.txt.attr b/yt/yql/tests/sql/suites/key_filter/yql-19420-input2.txt.attr new file mode 100644 index 00000000000..4e51119e466 --- /dev/null +++ b/yt/yql/tests/sql/suites/key_filter/yql-19420-input2.txt.attr @@ -0,0 +1,45 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "key1"; + [ + "DataType"; + "String"; + ]; + ]; + [ + "key2"; + [ + "DataType"; + "String"; + ]; + ]; + ]; + ]; + "SortDirections" = [ + 1; + 1; + ]; + "SortMembers" = [ + "key1"; + "key2"; + ]; + "SortedBy" = [ + "key1"; + "key2"; + ]; + "SortedByTypes" = [ + [ + "DataType"; + "String"; + ]; + [ + "DataType"; + "String"; + ]; + ]; + } +} diff --git a/yt/yql/tests/sql/suites/key_filter/yql-19420.cfg b/yt/yql/tests/sql/suites/key_filter/yql-19420.cfg new file mode 100644 index 00000000000..ea8b3ac173a --- /dev/null +++ b/yt/yql/tests/sql/suites/key_filter/yql-19420.cfg @@ -0,0 +1,2 @@ +in Input1 yql-19420-input1.txt +in Input2 yql-19420-input2.txt diff --git a/yt/yql/tests/sql/suites/key_filter/yql-19420.sql b/yt/yql/tests/sql/suites/key_filter/yql-19420.sql new file mode 100644 index 00000000000..8b823a08865 --- /dev/null +++ b/yt/yql/tests/sql/suites/key_filter/yql-19420.sql @@ -0,0 +1,20 @@ +/* postgres can not */ +USE plato; + +PRAGMA yt.DropUnusedKeysFromKeyFilter="1"; + +$src = ( + SELECT + key1, + key2 + FROM RANGE("", "Input1", "Input2") +); + +SELECT + key2 +FROM $src +WHERE key1 BETWEEN '2' AND '5'; + +SELECT + key2 +FROM $src; |