diff options
author | ssmike <ssmike@ydb.tech> | 2023-07-11 22:49:42 +0300 |
---|---|---|
committer | ssmike <ssmike@ydb.tech> | 2023-07-11 22:49:42 +0300 |
commit | f7576173ed3e091de24c8e38f3412523005f9b3e (patch) | |
tree | 74e7158ad8aa0af99013b1d5aeae9894e4c31619 | |
parent | 4ea04c90b14d468bcdae5f98f2e1692f65285129 (diff) | |
download | ydb-f7576173ed3e091de24c8e38f3412523005f9b3e.tar.gz |
Prepare to remove fallback on kqp pushdown optimizer
17 files changed, 762 insertions, 119 deletions
diff --git a/ydb/core/kqp/common/kqp_yql.cpp b/ydb/core/kqp/common/kqp_yql.cpp index 32294b86b90..27443b43238 100644 --- a/ydb/core/kqp/common/kqp_yql.cpp +++ b/ydb/core/kqp/common/kqp_yql.cpp @@ -310,17 +310,25 @@ TCoNameValueTupleList TKqpReadTableExplainPrompt::BuildNode(TExprContext& ctx, T .Done() ); - if (!ExpectedMaxRanges.empty()) { + if (ExpectedMaxRanges) { prompt.emplace_back( Build<TCoNameValueTuple>(ctx, pos) .Name() .Build(ExpectedMaxRangesName) .Value<TCoAtom>() - .Build(ExpectedMaxRanges) + .Build(ToString(*ExpectedMaxRanges)) .Done() ); } + prompt.emplace_back( + Build<TCoNameValueTuple>(ctx, pos) + .Name() + .Build(PointPrefixLenName) + .Value<TCoAtom>() + .Build(ToString(PointPrefixLen)) + .Done()); + return Build<TCoNameValueTupleList>(ctx, pos) .Add(prompt) .Done(); @@ -345,8 +353,13 @@ TKqpReadTableExplainPrompt TKqpReadTableExplainPrompt::Parse(const NNodes::TCoNa } if (name == TKqpReadTableExplainPrompt::ExpectedMaxRangesName) { - prompt.ExpectedMaxRanges = TString(tuple.Value().template Cast<TCoAtom>()); - continue; + prompt.ExpectedMaxRanges = FromString<ui64>(TString(tuple.Value().template Cast<TCoAtom>())); + continue; + } + + if (name == TKqpReadTableExplainPrompt::PointPrefixLenName) { + prompt.PointPrefixLen = FromString<ui64>(TString(tuple.Value().template Cast<TCoAtom>())); + continue; } YQL_ENSURE(false, "Unknown KqpReadTableRanges explain prompt name '" << name << "'"); diff --git a/ydb/core/kqp/common/kqp_yql.h b/ydb/core/kqp/common/kqp_yql.h index 293f98c90db..08636cbe1d6 100644 --- a/ydb/core/kqp/common/kqp_yql.h +++ b/ydb/core/kqp/common/kqp_yql.h @@ -85,16 +85,22 @@ struct TKqpUpsertRowsSettings { struct TKqpReadTableExplainPrompt { static constexpr TStringBuf UsedKeyColumnsName = "UsedKeyColumns"; static constexpr TStringBuf ExpectedMaxRangesName = "ExpectedMaxRanges"; + static constexpr TStringBuf PointPrefixLenName = "PointPrefixLen"; TVector<TString> UsedKeyColumns; - TString ExpectedMaxRanges; + TMaybe<ui64> ExpectedMaxRanges; + ui64 PointPrefixLen = 0; void SetUsedKeyColumns(TVector<TString> columns) { UsedKeyColumns = columns; } void SetExpectedMaxRanges(size_t count) { - ExpectedMaxRanges = ToString(count); + ExpectedMaxRanges = count; + } + + void SetPointPrefixLen(size_t len) { + PointPrefixLen = len; } NNodes::TCoNameValueTupleList BuildNode(TExprContext& ctx, TPositionHandle pos) const; diff --git a/ydb/core/kqp/compile_service/kqp_compile_actor.cpp b/ydb/core/kqp/compile_service/kqp_compile_actor.cpp index 18b0f0b2ba9..2f3b744c0df 100644 --- a/ydb/core/kqp/compile_service/kqp_compile_actor.cpp +++ b/ydb/core/kqp/compile_service/kqp_compile_actor.cpp @@ -407,6 +407,7 @@ void ApplyServiceConfig(TKikimrConfiguration& kqpConfig, const TTableServiceConf kqpConfig.EnablePreparedDdl = serviceConfig.GetEnablePreparedDdl(); kqpConfig.EnableSequences = serviceConfig.GetEnableSequences(); kqpConfig.BindingsMode = RemapBindingsMode(serviceConfig.GetBindingsMode()); + kqpConfig.PredicateExtract20 = serviceConfig.GetPredicateExtract20(); } IActor* CreateKqpCompileActor(const TActorId& owner, const TKqpSettings::TConstPtr& kqpSettings, diff --git a/ydb/core/kqp/compile_service/kqp_compile_service.cpp b/ydb/core/kqp/compile_service/kqp_compile_service.cpp index a81fe507e68..cc920e29952 100644 --- a/ydb/core/kqp/compile_service/kqp_compile_service.cpp +++ b/ydb/core/kqp/compile_service/kqp_compile_service.cpp @@ -370,6 +370,7 @@ private: bool enableKqpDataQueryPredicateExtract = Config.GetEnablePredicateExtractForDataQueries(); bool enableKqpScanQueryPredicateExtract = Config.GetEnablePredicateExtractForScanQueries(); + bool predicateExtract20 = Config.GetPredicateExtract20(); bool enableSequentialReads = Config.GetEnableSequentialReads(); bool defaultSyntaxVersion = Config.GetSqlVersion(); @@ -389,6 +390,7 @@ private: Config.GetEnableKqpScanQuerySourceRead() != enableKqpScanQuerySourceRead || Config.GetEnablePredicateExtractForDataQueries() != enableKqpDataQueryPredicateExtract || Config.GetEnablePredicateExtractForScanQueries() != enableKqpScanQueryPredicateExtract || + Config.GetPredicateExtract20() != predicateExtract20 || Config.GetEnableSequentialReads() != enableSequentialReads || Config.GetEnableKqpImmediateEffects() != enableKqpImmediateEffects) { diff --git a/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json b/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json index bb2b916b29e..622675b009f 100644 --- a/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json +++ b/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json @@ -102,7 +102,10 @@ { "Name": "TKqlReadTableRanges", "Base": "TKqlReadTableRangesBase", - "Match": {"Type": "Callable", "Name": "KqlReadTableRanges"} + "Match": {"Type": "Callable", "Name": "KqlReadTableRanges"}, + "Children": [ + {"Index": 5, "Name": "PrefixPointsExpr", "Type": "TExprBase", "Optional": true} + ] }, { "Name": "TKqpReadTableRanges", @@ -127,7 +130,8 @@ "Base": "TKqlReadTableRangesBase", "Match": {"Type": "Callable", "Name": "TKqlReadTableIndexRanges"}, "Children": [ - {"Index": 5, "Name": "Index", "Type": "TCoAtom"} + {"Index": 5, "Name": "Index", "Type": "TCoAtom"}, + {"Index": 6, "Name": "PrefixPointsExpr", "Type": "TExprBase", "Optional": true} ] }, { diff --git a/ydb/core/kqp/host/kqp_type_ann.cpp b/ydb/core/kqp/host/kqp_type_ann.cpp index 9d34c2f4668..7211e6dbf32 100644 --- a/ydb/core/kqp/host/kqp_type_ann.cpp +++ b/ydb/core/kqp/host/kqp_type_ann.cpp @@ -303,7 +303,8 @@ TStatus AnnotateReadTableRanges(const TExprNode::TPtr& node, TExprContext& ctx, size_t argCount = (olapTable || index) ? 6 : 5; - if (!EnsureArgsCount(*node, argCount, ctx)) { + // prefix + if (!EnsureMinArgsCount(*node, argCount, ctx) && EnsureMaxArgsCount(*node, argCount + 1, ctx)) { return TStatus::Error; } diff --git a/ydb/core/kqp/opt/kqp_query_plan.cpp b/ydb/core/kqp/opt/kqp_query_plan.cpp index 7183872618d..b29c09596a2 100644 --- a/ydb/core/kqp/opt/kqp_query_plan.cpp +++ b/ydb/core/kqp/opt/kqp_query_plan.cpp @@ -795,9 +795,11 @@ private: } if (explainPrompt.ExpectedMaxRanges) { - op.Properties["ReadRangesExpectedSize"] = explainPrompt.ExpectedMaxRanges; + op.Properties["ReadRangesExpectedSize"] = ToString(*explainPrompt.ExpectedMaxRanges); } + op.Properties["ReadRangesPointPrefixLen"] = ToString(explainPrompt.PointPrefixLen); + auto& columns = op.Properties["ReadColumns"]; for (const auto& col : sourceSettings.Columns()) { readInfo.Columns.emplace_back(TString(col.Value())); @@ -1254,7 +1256,7 @@ private: } if (explainPrompt.ExpectedMaxRanges) { - op.Properties["ReadRangesExpectedSize"] = explainPrompt.ExpectedMaxRanges; + op.Properties["ReadRangesExpectedSize"] = *explainPrompt.ExpectedMaxRanges; } auto& columns = op.Properties["ReadColumns"]; diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp index 1e720360171..dafc1368488 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp @@ -28,6 +28,7 @@ public: { #define HNDL(name) "KqpLogical-"#name, Hndl(&TKqpLogicalOptTransformer::name) AddHandler(0, &TCoFlatMap::Match, HNDL(PushPredicateToReadTable)); + AddHandler(0, &TCoFlatMap::Match, HNDL(PushExtractedPredicateToReadTable)); AddHandler(0, &TCoAggregate::Match, HNDL(RewriteAggregate)); AddHandler(0, &TCoTake::Match, HNDL(RewriteTakeSortToTopSort)); AddHandler(0, &TCoFlatMap::Match, HNDL(RewriteSqlInToEquiJoin)); @@ -36,7 +37,7 @@ public: AddHandler(0, &TDqJoin::Match, HNDL(JoinToIndexLookup)); AddHandler(0, &TCoCalcOverWindowBase::Match, HNDL(ExpandWindowFunctions)); AddHandler(0, &TCoCalcOverWindowGroup::Match, HNDL(ExpandWindowFunctions)); - AddHandler(0, &TCoFlatMap::Match, HNDL(PushExtractedPredicateToReadTable)); + AddHandler(0, &TCoFlatMap::Match, HNDL(LatePushExtractedPredicateToReadTable)); AddHandler(0, &TCoTop::Match, HNDL(RewriteTopSortOverIndexRead)); AddHandler(0, &TCoTopSort::Match, HNDL(RewriteTopSortOverIndexRead)); AddHandler(0, &TCoTake::Match, HNDL(RewriteTakeOverIndexRead)); @@ -69,19 +70,34 @@ public: } protected: + TMaybeNode<TExprBase> PushPredicateToReadTable(TExprBase node, TExprContext& ctx) { + if (KqpCtx.Config->PredicateExtract20) { + return node; + } + TExprBase output = KqpPushPredicateToReadTable(node, ctx, KqpCtx); + DumpAppliedRule("PushPredicateToReadTable", node.Ptr(), output.Ptr(), ctx); + return output; + } TMaybeNode<TExprBase> PushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx) { + if (!KqpCtx.Config->PredicateExtract20) { + return node; + } TExprBase output = KqpPushExtractedPredicateToReadTable(node, ctx, KqpCtx, TypesCtx); DumpAppliedRule("PushExtractedPredicateToReadTable", node.Ptr(), output.Ptr(), ctx); return output; } - TMaybeNode<TExprBase> PushPredicateToReadTable(TExprBase node, TExprContext& ctx) { - TExprBase output = KqpPushPredicateToReadTable(node, ctx, KqpCtx); - DumpAppliedRule("PushPredicateToReadTable", node.Ptr(), output.Ptr(), ctx); + TMaybeNode<TExprBase> LatePushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx) { + if (KqpCtx.Config->PredicateExtract20) { + return node; + } + TExprBase output = KqpPushExtractedPredicateToReadTable(node, ctx, KqpCtx, TypesCtx); + DumpAppliedRule("PushExtractedPredicateToReadTable", node.Ptr(), output.Ptr(), ctx); return output; } + TMaybeNode<TExprBase> RewriteAggregate(TExprBase node, TExprContext& ctx) { TExprBase output = DqRewriteAggregate(node, ctx, TypesCtx, false, KqpCtx.Config->HasOptEnableOlapPushdown(), KqpCtx.Config->HasOptUseFinalizeByKey()); DumpAppliedRule("RewriteAggregate", node.Ptr(), output.Ptr(), ctx); diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp index f3619293894..38de2ebb3e9 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp @@ -187,79 +187,81 @@ TMaybeNode<TKqlKeyInc> GetRightTableKeyPrefix(const TKqlKeyRange& range) { return rangeFrom; } -TExprBase BuildLookupIndex(TExprContext& ctx, const TPositionHandle pos, const TKqlReadTableBase& read, +TExprBase BuildLookupIndex(TExprContext& ctx, const TPositionHandle pos, + const TKqpTable& table, const TCoAtomList& columns, const TExprBase& keysToLookup, const TVector<TCoAtom>& lookupNames, const TString& indexName, const TKqpOptimizeContext& kqpCtx) { if (kqpCtx.IsScanQuery()) { YQL_ENSURE(kqpCtx.Config->EnableKqpScanQueryStreamIdxLookupJoin, "Stream lookup is not enabled for index lookup join"); return Build<TKqlStreamLookupIndex>(ctx, pos) - .Table(read.Table()) + .Table(table) .LookupKeys<TCoSkipNullMembers>() .Input(keysToLookup) .Members() .Add(lookupNames) .Build() .Build() - .Columns(read.Columns()) + .Columns(columns) .Index() .Build(indexName) .Done(); } return Build<TKqlLookupIndex>(ctx, pos) - .Table(read.Table()) + .Table(table) .LookupKeys<TCoSkipNullMembers>() .Input(keysToLookup) .Members() .Add(lookupNames) .Build() .Build() - .Columns(read.Columns()) + .Columns(columns) .Index() .Build(indexName) .Done(); } -TExprBase BuildLookupTable(TExprContext& ctx, const TPositionHandle pos, const TKqlReadTableBase& read, +TExprBase BuildLookupTable(TExprContext& ctx, const TPositionHandle pos, + const TKqpTable& table, const TCoAtomList& columns, const TExprBase& keysToLookup, const TVector<TCoAtom>& lookupNames, const TKqpOptimizeContext& kqpCtx) { if (kqpCtx.IsScanQuery()) { YQL_ENSURE(kqpCtx.Config->EnableKqpScanQueryStreamIdxLookupJoin, "Stream lookup is not enabled for index lookup join"); return Build<TKqlStreamLookupTable>(ctx, pos) - .Table(read.Table()) + .Table(table) .LookupKeys<TCoSkipNullMembers>() .Input(keysToLookup) .Members() .Add(lookupNames) .Build() .Build() - .Columns(read.Columns()) + .Columns(columns) .Done(); } if (kqpCtx.Config->EnableKqpDataQueryStreamLookup) { return Build<TKqlStreamLookupTable>(ctx, pos) - .Table(read.Table()) + .Table(table) .LookupKeys<TCoSkipNullMembers>() .Input(keysToLookup) .Members() .Add(lookupNames) .Build() .Build() - .Columns(read.Columns()) + .Columns(columns) .Done(); } return Build<TKqlLookupTable>(ctx, pos) - .Table(read.Table()) + .Table(table) .LookupKeys<TCoSkipNullMembers>() .Input(keysToLookup) .Members() .Add(lookupNames) .Build() .Build() - .Columns(read.Columns()) + .Columns(columns) .Done(); } @@ -336,7 +338,10 @@ bool IsParameterToListOfStructsRepack(const TExprBase& expr) { //#define DBG(...) YQL_CLOG(DEBUG, ProviderKqp) << __VA_ARGS__ #define DBG(...) +template<typename ReadType> TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx) { + static_assert(std::is_same_v<ReadType, TKqlReadTableBase> || std::is_same_v<ReadType, TKqlReadTableRangesBase>, "unsupported read type"); + if (!join.RightLabel().Maybe<TCoAtom>()) { // Lookup only in tables return {}; @@ -350,64 +355,97 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext TString lookupTable; TString indexName; - auto rightReadMatch = MatchRead<TKqlReadTableBase>(join.RightInput()); - if (!rightReadMatch) { - if (auto readRangesMatch = MatchRead<TKqlReadTableRangesBase>(join.RightInput())) { - // for now only fullscans are supported - auto read = readRangesMatch->Read.Cast<TKqlReadTableRangesBase>(); - if (TCoVoid::Match(read.Ranges().Raw())) { - rightReadMatch = readRangesMatch; - rightReadMatch->Read = - Build<TKqlReadTable>(ctx, join.Pos()) - .Settings(read.Settings()) - .Table(read.Table()) - .Columns(read.Columns()) - .Range<TKqlKeyRange>() - .From<TKqlKeyInc>().Build() - .To<TKqlKeyInc>().Build() - .Build() - .Done(); - if (auto indexRead = read.Maybe<TKqlReadTableIndexRanges>()) { - const auto& tableDesc = GetTableData(*kqpCtx.Tables, kqpCtx.Cluster, read.Table().Path()); - const auto& [indexMeta, _ ] = tableDesc.Metadata->GetIndexMetadata(indexRead.Index().Cast().StringValue()); - lookupTable = indexMeta->Name; - indexName = indexRead.Cast().Index().StringValue(); - } - } else { - return {}; - } - } else { + auto rightReadMatch = MatchRead<ReadType>(join.RightInput()); + if (!rightReadMatch || rightReadMatch->FlatMap && !IsPassthroughFlatMap(rightReadMatch->FlatMap.Cast(), nullptr)) { + return {}; + } + + auto rightRead = rightReadMatch->Read.template Cast<ReadType>(); + + TMaybeNode<TCoAtomList> lookupColumns; + TMaybe<TKqlKeyInc> rightTableKeyPrefix; + if constexpr (std::is_same_v<ReadType, TKqlReadTableBase>) { + Y_ENSURE(rightRead.template Maybe<TKqlReadTable>() || rightRead.template Maybe<TKqlReadTableIndex>()); + const TKqlReadTableBase read = rightRead; + if (!read.Table().SysView().Value().empty()) { + // Can't lookup in system views return {}; } - } - if (rightReadMatch->FlatMap && !IsPassthroughFlatMap(rightReadMatch->FlatMap.Cast(), nullptr)) { - return {}; - } + auto maybeRightTableKeyPrefix = GetRightTableKeyPrefix(read.Range()); + if (!maybeRightTableKeyPrefix) { + return {}; + } - auto rightRead = rightReadMatch->Read.Cast<TKqlReadTableBase>(); + lookupColumns = read.Columns(); + rightTableKeyPrefix = maybeRightTableKeyPrefix.Cast(); - Y_ENSURE(rightRead.Maybe<TKqlReadTable>() || rightRead.Maybe<TKqlReadTableIndex>()); + if (auto indexRead = rightRead.template Maybe<TKqlReadTableIndex>()) { + indexName = indexRead.Cast().Index().StringValue(); + lookupTable = GetIndexMetadata(indexRead.Cast(), *kqpCtx.Tables, kqpCtx.Cluster)->Name; + } else { + lookupTable = read.Table().Path().StringValue(); + } + } else if constexpr (std::is_same_v<ReadType, TKqlReadTableRangesBase>){ + auto read = rightReadMatch->Read.template Cast<TKqlReadTableRangesBase>(); + lookupColumns = read.Columns(); + + if (auto indexRead = read.template Maybe<TKqlReadTableIndexRanges>()) { + const auto& tableDesc = GetTableData(*kqpCtx.Tables, kqpCtx.Cluster, read.Table().Path()); + const auto& [indexMeta, _ ] = tableDesc.Metadata->GetIndexMetadata(indexRead.Index().Cast().StringValue()); + lookupTable = indexMeta->Name; + indexName = indexRead.Cast().Index().StringValue(); + } else { + lookupTable = read.Table().Path().StringValue(); + } - const TKqlReadTableBase read = rightRead; - if (!read.Table().SysView().Value().empty()) { - // Can't lookup in system views - return {}; - } + const auto& rightTableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, lookupTable); - auto maybeRightTableKeyPrefix = GetRightTableKeyPrefix(read.Range()); - if (!maybeRightTableKeyPrefix) { - return {}; - } - auto rightTableKeyPrefix = maybeRightTableKeyPrefix.Cast(); + if (TCoVoid::Match(read.Ranges().Raw())) { + rightTableKeyPrefix = Build<TKqlKeyInc>(ctx, read.Ranges().Pos()).Done(); + } else { + auto prompt = TKqpReadTableExplainPrompt::Parse(read); + if (prompt.ExpectedMaxRanges != TMaybe<ui64>(1)) { + return {}; + } + + TMaybeNode<TExprBase> row; + if (read.template Maybe<TKqlReadTableRanges>()) { + row = read.template Cast<TKqlReadTableRanges>().PrefixPointsExpr(); + } + if (rightRead.template Maybe<TKqlReadTableIndexRanges>()) { + row = read.template Cast<TKqlReadTableIndexRanges>().PrefixPointsExpr(); + } + if (!row.IsValid()) { + return {}; + } + row = Build<TCoHead>(ctx, read.Ranges().Pos()).Input(row.Cast()).Done(); - if (auto indexRead = rightRead.Maybe<TKqlReadTableIndex>()) { - indexName = indexRead.Cast().Index().StringValue(); - lookupTable = GetIndexMetadata(indexRead.Cast(), *kqpCtx.Tables, kqpCtx.Cluster)->Name; - } else if (!indexName) { - lookupTable = read.Table().Path().StringValue(); + size_t prefixLen = prompt.PointPrefixLen; + TVector<TString> keyColumns; + for (size_t i = 0; i < prefixLen; ++i) { + YQL_ENSURE(i < rightTableDesc.Metadata->KeyColumnNames.size()); + keyColumns.push_back(rightTableDesc.Metadata->KeyColumnNames[i]); + } + + TVector<TExprBase> components; + for (auto column : keyColumns) { + TCoAtom columnAtom(ctx.NewAtom(read.Ranges().Pos(), column)); + components.push_back( + Build<TCoMember>(ctx, read.Ranges().Pos()) + .Struct(row.Cast()) + .Name(columnAtom) + .Done()); + } + + rightTableKeyPrefix = Build<TKqlKeyInc>(ctx, read.Ranges().Pos()) + .Add(components) + .Done(); + } } + Y_ENSURE(rightTableKeyPrefix); + const auto& rightTableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, lookupTable); TMap<std::string_view, TString> rightJoinKeyToLeft; @@ -446,12 +484,12 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext auto leftColumn = rightJoinKeyToLeft.FindPtr(rightColumnName); - if (fixedPrefix < rightTableKeyPrefix.ArgCount()) { + if (fixedPrefix < rightTableKeyPrefix->ArgCount()) { if (leftColumn) { return {}; } - member = rightTableKeyPrefix.Arg(fixedPrefix).Ptr(); + member = rightTableKeyPrefix->Arg(fixedPrefix).Ptr(); fixedPrefix++; } else { if (!leftColumn) { @@ -562,8 +600,8 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext .Done(); TExprBase lookup = indexName - ? BuildLookupIndex(ctx, join.Pos(), read, keysToLookup, lookupNames, indexName, kqpCtx) - : BuildLookupTable(ctx, join.Pos(), read, keysToLookup, lookupNames, kqpCtx); + ? BuildLookupIndex(ctx, join.Pos(), rightRead.Table(), rightRead.Columns(), keysToLookup, lookupNames, indexName, kqpCtx) + : BuildLookupTable(ctx, join.Pos(), rightRead.Table(), rightRead.Columns(), keysToLookup, lookupNames, kqpCtx); // Skip null keys in lookup part as for equijoin semantics null != null, // so we can't have nulls in lookup part @@ -574,7 +612,6 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext .Build() .Done(); - auto lookupColumns = read.Columns(); if (rightReadMatch->ExtractMembers) { lookupColumns = rightReadMatch->ExtractMembers.Cast().Members(); } @@ -585,7 +622,7 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext auto arg = TCoArgument(ctx.NewArgument(join.Pos(), "row")); auto rightLabel = join.RightLabel().Cast<TCoAtom>().Value(); - TVector<TExprBase> renames = CreateRenames(rightReadMatch->FlatMap, lookupColumns, arg, rightLabel, + TVector<TExprBase> renames = CreateRenames(rightReadMatch->FlatMap, lookupColumns.Cast(), arg, rightLabel, join.Pos(), ctx); lookup = Build<TCoMap>(ctx, join.Pos()) @@ -628,12 +665,16 @@ TExprBase KqpJoinToIndexLookup(const TExprBase& node, TExprContext& ctx, const T auto flipJoin = FlipLeftSemiJoin(join, ctx); DBG("-- Flip join"); - if (auto indexLookupJoin = KqpJoinToIndexLookupImpl(flipJoin, ctx, kqpCtx)) { + if (auto indexLookupJoin = KqpJoinToIndexLookupImpl<TKqlReadTableBase>(flipJoin, ctx, kqpCtx)) { + return indexLookupJoin.Cast(); + } else if (auto indexLookupJoin = KqpJoinToIndexLookupImpl<TKqlReadTableRangesBase>(flipJoin, ctx, kqpCtx)) { return indexLookupJoin.Cast(); } } - if (auto indexLookupJoin = KqpJoinToIndexLookupImpl(join, ctx, kqpCtx)) { + if (auto indexLookupJoin = KqpJoinToIndexLookupImpl<TKqlReadTableBase>(join, ctx, kqpCtx)) { + return indexLookupJoin.Cast(); + } else if (auto indexLookupJoin = KqpJoinToIndexLookupImpl<TKqlReadTableRangesBase>(join, ctx, kqpCtx)) { return indexLookupJoin.Cast(); } diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp index fc9cc2c70f6..1d7a1f8a2b4 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp @@ -10,6 +10,7 @@ #include <ydb/library/yql/providers/common/provider/yql_table_lookup.h> #include <ydb/library/yql/core/extract_predicate/extract_predicate.h> + namespace NKikimr::NKqp::NOpt { using namespace NYql; @@ -150,6 +151,19 @@ TMaybeNode<TExprBase> TryBuildTrivialReadTable(TCoFlatMap& flatmap, TKqlReadTabl .Done(); } +TMaybe<size_t> EstimateSqlInCollectionSize(const NYql::TExprNode::TPtr& collection) { + NYql::TExprNode::TPtr curr = collection; + if (curr->IsCallable("Just")) { + curr = curr->HeadPtr(); + } + + if (curr->GetTypeAnn()->GetKind() == NYql::ETypeAnnotationKind::Tuple || curr->IsCallable({"AsList", "AsDict", "AsSet"})) { + return std::max<size_t>(curr->ChildrenSize(), 1); + } + + return {}; +} + } // namespace TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx, @@ -207,28 +221,62 @@ TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx const auto& mainTableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, read.Table().Path()); auto& tableDesc = indexName ? kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, mainTableDesc.Metadata->GetIndexMetadata(TString(indexName.Cast())).first->Name) : mainTableDesc; - // test for trivial cases (explicit literals or parameters) - if (auto expr = TryBuildTrivialReadTable(flatmap, read, *readMatch, tableDesc, ctx, kqpCtx, indexName)) { - return expr.Cast(); - } - THashSet<TString> possibleKeys; TPredicateExtractorSettings settings; settings.MergeAdjacentPointRanges = true; settings.HaveNextValueCallable = true; + settings.BuildLiteralRange = true; + + if (!kqpCtx.Config->PredicateExtract20) { + // test for trivial cases (explicit literals or parameters) + if (auto expr = TryBuildTrivialReadTable(flatmap, read, *readMatch, tableDesc, ctx, kqpCtx, indexName)) { + return expr.Cast(); + } + } else { + settings.IsValidForRange = [&] (const TExprNode::TPtr& node) -> bool { + TExprBase expr(node); + if (auto sqlin = expr.Maybe<TCoSqlIn>()) { + if (!EstimateSqlInCollectionSize(sqlin.Cast().Collection().Ptr())) { + return false; + } + } + + return true; + }; + } + auto extractor = MakePredicateRangeExtractor(settings); YQL_ENSURE(tableDesc.SchemeNode); bool prepareSuccess = extractor->Prepare(flatmap.Lambda().Ptr(), *mainTableDesc.SchemeNode, possibleKeys, ctx, typesCtx); YQL_ENSURE(prepareSuccess); - auto buildResult = extractor->BuildComputeNode(tableDesc.Metadata->KeyColumnNames, ctx); + auto buildResult = extractor->BuildComputeNode(tableDesc.Metadata->KeyColumnNames, ctx, typesCtx); + TExprNode::TPtr ranges = buildResult.ComputeNode; if (!ranges) { return node; } + TExprNode::TPtr prefixPointsExpr; + IPredicateRangeExtractor::TBuildResult pointsExtractionResult; + + if (buildResult.PointPrefixLen > 0 && buildResult.ExpectedMaxRanges) { + TPredicateExtractorSettings pointSettings = settings; + pointSettings.MergeAdjacentPointRanges = false; + pointSettings.HaveNextValueCallable = false; + TVector<TString> pointKeys; + for (size_t i = 0; i < buildResult.PointPrefixLen; ++i) { + pointKeys.push_back(tableDesc.Metadata->KeyColumnNames[i]); + } + auto extractor = MakePredicateRangeExtractor(pointSettings); + YQL_ENSURE(extractor->Prepare(flatmap.Lambda().Ptr(), *mainTableDesc.SchemeNode, possibleKeys, ctx, typesCtx)); + + pointsExtractionResult = extractor->BuildComputeNode(pointKeys, ctx, typesCtx); + prefixPointsExpr = BuildPointsList(pointsExtractionResult, pointKeys, ctx); + } + TExprNode::TPtr residualLambda = buildResult.PrunedLambda; TVector<TString> usedColumns; @@ -248,23 +296,123 @@ TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx YQL_CLOG(DEBUG, ProviderKqp) << "Residual lambda: " << KqpExprToPrettyString(*residualLambda, ctx); TMaybe<TExprBase> input; - if (indexName) { - input = Build<TKqlReadTableIndexRanges>(ctx, read.Pos()) - .Table(read.Table()) - .Ranges(ranges) - .Columns(read.Columns()) - .Settings(read.Settings()) - .ExplainPrompt(prompt.BuildNode(ctx, read.Pos())) - .Index(indexName.Cast()) - .Done(); - } else { - input = Build<TKqlReadTableRanges>(ctx, read.Pos()) - .Table(read.Table()) - .Ranges(ranges) - .Columns(read.Columns()) - .Settings(read.Settings()) - .ExplainPrompt(prompt.BuildNode(ctx, read.Pos())) - .Done(); + if (kqpCtx.Config->PredicateExtract20 && + (tableDesc.Metadata->Kind == EKikimrTableKind::Datashard || + tableDesc.Metadata->Kind == EKikimrTableKind::SysView)) + { + auto buildLookup = [&] (TExprNode::TPtr keys, TMaybe<TExprBase>& result) { + if (indexName) { + if (kqpCtx.IsDataQuery()) { + result = Build<TKqlLookupIndex>(ctx, node.Pos()) + .Table(read.Table()) + .Columns(read.Columns()) + .LookupKeys(keys) + .Index(indexName.Cast()) + .Done(); + } else if (kqpCtx.IsScanQuery() && kqpCtx.Config->EnableKqpScanQueryStreamLookup) { + result = Build<TKqlStreamLookupIndex>(ctx, node.Pos()) + .Table(read.Table()) + .Columns(read.Columns()) + .LookupKeys(keys) + .Index(indexName.Cast()) + .LookupKeys(keys) + .Done(); + } + } else if (kqpCtx.IsDataQuery()) { + result = Build<TKqlLookupTable>(ctx, node.Pos()) + .Table(read.Table()) + .Columns(read.Columns()) + .LookupKeys(keys) + .Done(); + } else if (kqpCtx.IsScanQuery() && kqpCtx.Config->EnableKqpScanQueryStreamLookup) { + result = Build<TKqlStreamLookupTable>(ctx, node.Pos()) + .Table(read.Table()) + .Columns(read.Columns()) + .LookupKeys(keys) + .Done(); + } + }; + + if (buildResult.LiteralRange) { + bool ispoint = buildResult.PointPrefixLen == tableDesc.Metadata->KeyColumnNames.size(); + if (ispoint) { + TVector<TExprBase> structMembers; + for (size_t i = 0; i < tableDesc.Metadata->KeyColumnNames.size(); ++i) { + auto member = Build<TCoNameValueTuple>(ctx, node.Pos()) + .Name().Build(tableDesc.Metadata->KeyColumnNames[i]) + .Value(buildResult.LiteralRange->Left.Columns[i]) + .Done(); + + structMembers.push_back(member); + } + TExprBase keys = Build<TCoAsList>(ctx, node.Pos()) + .Add<TCoAsStruct>() + .Add(structMembers) + .Build() + .Done(); + + buildLookup(keys.Ptr(), input); + } else { + auto fromExpr = buildResult.LiteralRange->Left.Inclusive + ? Build<TKqlKeyInc>(ctx, read.Pos()).Add(buildResult.LiteralRange->Left.Columns).Done().Cast<TKqlKeyTuple>() + : Build<TKqlKeyExc>(ctx, read.Pos()).Add(buildResult.LiteralRange->Left.Columns).Done().Cast<TKqlKeyTuple>(); + + auto toExpr = buildResult.LiteralRange->Right.Inclusive + ? Build<TKqlKeyInc>(ctx, read.Pos()).Add(buildResult.LiteralRange->Right.Columns).Done().Cast<TKqlKeyTuple>() + : Build<TKqlKeyExc>(ctx, read.Pos()).Add(buildResult.LiteralRange->Right.Columns).Done().Cast<TKqlKeyTuple>(); + + auto keyRange = Build<TKqlKeyRange>(ctx, read.Pos()) + .From(fromExpr) + .To(toExpr) + .Done(); + + if (indexName) { + input = Build<TKqlReadTableIndex>(ctx, read.Pos()) + .Table(read.Table()) + .Columns(read.Columns()) + .Settings(read.Settings()) + .Range(keyRange) + .Index(indexName.Cast()) + .Done(); + } else { + input = Build<TKqlReadTable>(ctx, read.Pos()) + .Table(read.Table()) + .Columns(read.Columns()) + .Settings(read.Settings()) + .Range(keyRange) + .Done(); + } + } + } else if (buildResult.PointPrefixLen == tableDesc.Metadata->KeyColumnNames.size()) { + YQL_ENSURE(prefixPointsExpr); + residualLambda = pointsExtractionResult.PrunedLambda; + buildLookup(prefixPointsExpr, input); + } + } + + if (!input) { + TMaybeNode<TExprBase> prefix = prefixPointsExpr; + + if (indexName) { + input = Build<TKqlReadTableIndexRanges>(ctx, read.Pos()) + .Table(read.Table()) + .Ranges(ranges) + .Columns(read.Columns()) + .Settings(read.Settings()) + .ExplainPrompt(prompt.BuildNode(ctx, read.Pos())) + .Index(indexName.Cast()) + .PrefixPointsExpr(prefix) + .Done(); + } else { + input = Build<TKqlReadTableRanges>(ctx, read.Pos()) + .Table(read.Table()) + .Ranges(ranges) + .Columns(read.Columns()) + .Settings(read.Settings()) + .ExplainPrompt(prompt.BuildNode(ctx, read.Pos())) + .PrefixPointsExpr(prefix) + .Done(); + } } *input = readMatch->BuildProcessNodes(*input, ctx); diff --git a/ydb/core/kqp/provider/yql_kikimr_settings.h b/ydb/core/kqp/provider/yql_kikimr_settings.h index 91f1135a075..aabf39f8170 100644 --- a/ydb/core/kqp/provider/yql_kikimr_settings.h +++ b/ydb/core/kqp/provider/yql_kikimr_settings.h @@ -141,6 +141,7 @@ struct TKikimrConfiguration : public TKikimrSettings, public NCommon::TSettingDi bool EnableKqpScanQueryStreamIdxLookupJoin = false; bool EnablePredicateExtractForScanQuery = true; bool EnablePredicateExtractForDataQuery = false; + bool PredicateExtract20 = false; bool EnableKqpImmediateEffects = false; bool EnableSequentialReads = false; bool EnablePreparedDdl = false; diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto index 2b952a6b630..8c10d52ef8f 100644 --- a/ydb/core/protos/config.proto +++ b/ydb/core/protos/config.proto @@ -1320,6 +1320,7 @@ message TTableServiceConfig { optional EBindingsMode BindingsMode = 40 [default = BM_ENABLED]; optional TIteratorReadsRetrySettings IteratorReadsRetrySettings = 41; + optional bool PredicateExtract20 = 44 [default = false]; }; // Config describes immediate controls and allows diff --git a/ydb/library/yql/core/extract_predicate/extract_predicate.h b/ydb/library/yql/core/extract_predicate/extract_predicate.h index e4509474239..83fc16110d3 100644 --- a/ydb/library/yql/core/extract_predicate/extract_predicate.h +++ b/ydb/library/yql/core/extract_predicate/extract_predicate.h @@ -9,6 +9,8 @@ struct TPredicateExtractorSettings { size_t MaxRanges = 10000; bool MergeAdjacentPointRanges = true; bool HaveNextValueCallable = false; + bool BuildLiteralRange = false; + std::function<bool(const NYql::TExprNode::TPtr&)> IsValidForRange; }; class IPredicateRangeExtractor { @@ -24,13 +26,28 @@ public: size_t UsedPrefixLen = 0; size_t PointPrefixLen = 0; TMaybe<size_t> ExpectedMaxRanges; + + struct TLiteralRange { + struct TLiteralRangeBound { + bool Inclusive = false; + TVector<TExprNode::TPtr> Columns; + }; + + TLiteralRangeBound Left; + TLiteralRangeBound Right; + }; + + TMaybe<TLiteralRange> LiteralRange; }; - virtual TBuildResult BuildComputeNode(const TVector<TString>& indexKeys, TExprContext& ctx) const = 0; + virtual TBuildResult BuildComputeNode(const TVector<TString>& indexKeys, TExprContext& ctx, TTypeAnnotationContext& typesCtx) const = 0; virtual ~IPredicateRangeExtractor() = default; }; IPredicateRangeExtractor::TPtr MakePredicateRangeExtractor(const TPredicateExtractorSettings& settings = {}); + +TExprNode::TPtr BuildPointsList(const IPredicateRangeExtractor::TBuildResult&, TConstArrayRef<TString> keyColumns, NYql::TExprContext& expCtx); + } diff --git a/ydb/library/yql/core/extract_predicate/extract_predicate_dbg.cpp b/ydb/library/yql/core/extract_predicate/extract_predicate_dbg.cpp index 67512884ec0..67726a54bbb 100644 --- a/ydb/library/yql/core/extract_predicate/extract_predicate_dbg.cpp +++ b/ydb/library/yql/core/extract_predicate/extract_predicate_dbg.cpp @@ -43,7 +43,7 @@ TExprNode::TPtr ExpandRangeComputeFor(const TExprNode::TPtr& node, TExprContext& } - auto buildResult = extractor->BuildComputeNode(indexKeys, ctx); + auto buildResult = extractor->BuildComputeNode(indexKeys, ctx, *typesCtx); if (!buildResult.ComputeNode) { YQL_CLOG(DEBUG, Core) << "BuildComputeNode: ranges can not be built for predicate"; return result; diff --git a/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp b/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp index d5c0c5a2915..40ef3047238 100644 --- a/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp +++ b/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp @@ -233,6 +233,9 @@ const THashMap<TStringBuf, TStringBuf> SupportedBinOps = { }; bool IsValidForRange(TExprNode::TPtr& node, const TExprNode& row, const TPredicateExtractorSettings& settings, TExprContext& ctx) { + if (settings.IsValidForRange && !settings.IsValidForRange(node)) { + return false; + } auto it = SupportedBinOps.find(node->Content()); if (it != SupportedBinOps.end()) { if (IsValidForRange(node->Head(), &node->Tail(), row)) { @@ -1340,10 +1343,268 @@ TExprNode::TPtr BuildRangeMultiply(TPositionHandle pos, size_t maxRanges, const return ctx.NewCallable(pos, "RangeMultiply", std::move(args)); } +using TRangeHint = IPredicateRangeExtractor::TBuildResult::TLiteralRange; +using TRangeBoundHint = IPredicateRangeExtractor::TBuildResult::TLiteralRange::TLiteralRangeBound; + +TMaybe<int> TryCompareColumns(const TExprNode::TPtr& fs, const TExprNode::TPtr& sc) { + if (!fs || !sc) { + return {}; + } + if (fs == sc) { + return 0; + } + + auto isNull = [](const TExprNode::TPtr& ptr) { + return ptr->IsCallable("Nothing") || (ptr->GetTypeAnn() + && ptr->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Null); + }; + + if (isNull(fs)) { + if (isNull(sc)) { + return 0; + } else { + return -1; + } + } + if (isNull(sc)) { + return 1; + } + + return {}; +} + +TMaybe<TRangeBoundHint> CompareBounds( + const TRangeBoundHint& hint1, + const TRangeBoundHint& hint2, + bool min, bool lefts) +{ + TRangeBoundHint hint; + bool uniteAreas = min == lefts; + for (size_t i = 0; ; i++) { + if (i >= hint1.Columns.size()) { + if (i >= hint2.Columns.size()) { + hint = hint1; + if (uniteAreas) { + hint.Inclusive = hint1.Inclusive || hint2.Inclusive; + } else { + hint.Inclusive = hint1.Inclusive && hint2.Inclusive; + } + } else if (hint1.Inclusive != uniteAreas) { + hint = hint2; + } else { + hint = hint1; + } + break; + } + if (i >= hint2.Columns.size()) { + if (hint2.Inclusive != uniteAreas) { + hint = hint1; + } else { + hint = hint2; + } + break; + } + + if (!hint1.Columns[i] || !hint2.Columns[i]) { + return Nothing(); + } + if (auto cmp = TryCompareColumns(hint1.Columns[i], hint2.Columns[i])) { + if ((cmp < 0) == min) { + hint = hint1; + } else { + hint = hint2; + } + + if (cmp != 0) { + break; + } + } else { + return Nothing(); + } + } + + return hint; +} + +TMaybe<TRangeHint> RangeHintIntersect(const TRangeHint& hint1, const TRangeHint& hint2) { + auto left = CompareBounds(hint1.Left, hint2.Left, /* min */ false, true); + auto right = CompareBounds(hint1.Right, hint2.Right, /* min */ true, false); + if (left && right) { + return TRangeHint{.Left = std::move(*left), .Right = std::move(*right)}; + } else { + return Nothing(); + } +} + +TMaybe<TRangeHint> RangeHintIntersect(const TMaybe<TRangeHint>& hint1, const TMaybe<TRangeHint>& hint2) { + if (hint1 && hint2) { + return RangeHintIntersect(*hint1, *hint2); + } else { + return {}; + } +} + + +TRangeHint RangeHintExtend(const TRangeHint& hint1, size_t hint1Len, const TRangeHint& hint2) { + TRangeHint hint = hint1; + if (hint.Left.Columns.size() == hint1Len && hint1.Left.Inclusive) { + hint.Left.Columns.insert(hint.Left.Columns.end(), hint2.Left.Columns.begin(), hint2.Left.Columns.end()); + hint.Left.Inclusive = hint2.Left.Inclusive; + } + if (hint.Right.Columns.size() == hint1Len && hint1.Right.Inclusive) { + hint.Right.Columns.insert(hint.Right.Columns.end(), hint2.Right.Columns.begin(), hint2.Right.Columns.end()); + hint.Right.Inclusive = hint2.Right.Inclusive; + } + return hint; +} + +TMaybe<TRangeHint> RangeHintExtend(const TMaybe<TRangeHint>& hint1, size_t hint1Len, const TMaybe<TRangeHint>& hint2) { + if (hint1 && hint2) { + return RangeHintExtend(*hint1, hint1Len, *hint2); + } else { + return {}; + } +} + +bool IsValid(const TRangeBoundHint& left, const TRangeBoundHint& right, bool acceptExclusivePoint = true) { + for (size_t i = 0; ; ++i) { + if (i >= left.Columns.size() || i >= right.Columns.size()) { + // ok, we have +-inf and sure that it's valid + return true; + } + auto cmp = TryCompareColumns(left.Columns[i], right.Columns[i]); + if (!cmp) { + return false; + } else { + if (*cmp < 0) { + return true; + } else if (*cmp > 0) { + return false; + } + } + } + return acceptExclusivePoint || left.Inclusive || right.Inclusive; +} + +TMaybe<TRangeHint> RangeHintUnion(const TRangeHint& hint1, const TRangeHint& hint2) { + if (!IsValid(hint1.Left, hint1.Right) || !IsValid(hint2.Left, hint2.Right)) { + return Nothing(); + } + + auto left = CompareBounds(hint1.Left, hint2.Left, /* min */ true, true); + auto right = CompareBounds(hint1.Right, hint2.Right, /* min */ false, false); + auto intersection = RangeHintIntersect(hint1, hint2); + if (!left || !right || !intersection) { + return Nothing(); + } + if (IsValid(intersection->Left, intersection->Right, false)) { + return TRangeHint{.Left = std::move(*left), .Right = std::move(*right)}; + } else { + return Nothing(); + } +} + +TMaybe<TRangeHint> RangeHintUnion(const TMaybe<TRangeHint>& hint1, const TMaybe<TRangeHint>& hint2) { + if (hint1 && hint2) { + return RangeHintUnion(*hint1, *hint2); + } else { + return {}; + } +} + +void TryBuildSingleRangeHint(TExprNode::TPtr range, const TStructExprType& rowType, const TVector<TString>& indexKeys, TIndexRange indexRange, TMaybe<TRangeHint>& hint, TExprContext& ctx) { + bool negated; + auto op = GetOpFromRange(*range, negated); + size_t rangeLen = indexRange.End - indexRange.Begin; + + auto idx = rowType.FindItem(indexKeys[indexRange.Begin]); + YQL_ENSURE(idx); + const TTypeAnnotationNode* firstKeyType = rowType.GetItems()[*idx]->GetItemType(); + + auto isOptional = [&](const TExprNode::TPtr& node) { + YQL_ENSURE(node->GetTypeAnn()); + return node->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Optional || node->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Null; + }; + + if (op->IsCallable("SqlIn") && !negated) { + TCoSqlIn sqlIn(op); + auto collection = sqlIn.Collection(); + if ((collection.Ptr()->IsCallable({"AsList", "AsSet", "Just"}) || + collection.Ptr()->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Tuple) && + GetSqlInCollectionSize(collection.Ptr()) == TMaybe<size_t>(1)) + { + auto item = sqlIn.Collection().Ptr()->Child(0); + if (isOptional(item)) { + return; + } + + hint.ConstructInPlace(); + hint->Left.Inclusive = hint->Right.Inclusive = true; + hint->Left.Columns = hint->Right.Columns = {item}; + } + } else if (op->IsCallable(">") || op->IsCallable(">=")) { + YQL_ENSURE(!negated); + if (isOptional(op->ChildPtr(1))) { + return; + } + + hint.ConstructInPlace(); + hint->Left.Inclusive = op->IsCallable(">="); + hint->Right.Inclusive = true; + YQL_ENSURE(rangeLen == 1); + hint->Left.Columns.push_back(op->ChildPtr(1)); + } else if (op->IsCallable("<") || op->IsCallable("<=")) { + YQL_ENSURE(!negated); + if (isOptional(op->ChildPtr(1))) { + return; + } + + hint.ConstructInPlace(); + hint->Right.Inclusive = op->IsCallable("<="); + + YQL_ENSURE(rangeLen == 1); + hint->Right.Columns.push_back(op->ChildPtr(1)); + + if (firstKeyType->GetKind() == ETypeAnnotationKind::Optional) { + auto none = Build<TCoNothing>(ctx, op->Pos()) + .OptionalType(ExpandType(op->Pos(), *firstKeyType, ctx)) + .Done(); + hint->Left.Columns.push_back(none.Ptr()); + hint->Left.Inclusive = false; + } else { + hint->Left.Inclusive = true; + } + } else if (op->IsCallable("==")) { + YQL_ENSURE(!negated); + if (isOptional(op->ChildPtr(1))) { + return; + } + + hint.ConstructInPlace(); + hint->Left.Inclusive = hint->Right.Inclusive = true; + hint->Left.Columns = hint->Right.Columns = {op->ChildPtr(1)}; + } else if (op->IsCallable("Exists")) { + YQL_ENSURE(rangeLen == 1); + hint.ConstructInPlace(); + auto none = Build<TCoNothing>(ctx, op->Pos()) + .OptionalType(ExpandType(op->Pos(), *firstKeyType, ctx)) + .Done(); + if (negated) { + hint->Left.Inclusive = hint->Right.Inclusive = true; + hint->Left.Columns.push_back(none.Ptr()); + hint->Right.Columns.push_back(none.Ptr()); + } else { + hint->Left.Inclusive = false; + hint->Left.Columns.push_back(none.Ptr()); + hint->Right.Inclusive = true; + } + } +} + TExprNode::TPtr DoBuildMultiColumnComputeNode(const TStructExprType& rowType, const TExprNode::TPtr& range, const TVector<TString>& indexKeys, const THashMap<TString, size_t>& indexKeysOrder, TExprNode::TPtr& prunedRange, TIndexRange& resultIndexRange, const TPredicateExtractorSettings& settings, - size_t usedPrefixLen, TExprContext& ctx) + size_t usedPrefixLen, TExprContext& ctx, TMaybe<TRangeHint>& hint) { prunedRange = {}; resultIndexRange = {}; @@ -1357,6 +1618,9 @@ TExprNode::TPtr DoBuildMultiColumnComputeNode(const TStructExprType& rowType, co prunedRange = (rawCols.size() == cols.size()) ? BuildRestTrue(pos, rowType, ctx) : RebuildAsRangeRest(rowType, *range, ctx); + if (settings.BuildLiteralRange) { + TryBuildSingleRangeHint(range, rowType, indexKeys, resultIndexRange, hint, ctx); + } YQL_ENSURE(usedPrefixLen > 0 && usedPrefixLen <= indexKeys.size()); return BuildSingleComputeRange(rowType, *range, indexKeysOrder, settings, indexKeys[usedPrefixLen - 1], ctx); } @@ -1389,15 +1653,18 @@ TExprNode::TPtr DoBuildMultiColumnComputeNode(const TStructExprType& rowType, co for (auto& child : range->ChildrenList()) { prunedOutput.emplace_back(); TIndexRange childIndexRange; - output.push_back(DoBuildMultiColumnComputeNode(rowType, child, indexKeys, indexKeysOrder, prunedOutput.back(), childIndexRange, settings, usedPrefixLen, ctx)); + TMaybe<TRangeHint> childHint; + output.push_back(DoBuildMultiColumnComputeNode(rowType, child, indexKeys, indexKeysOrder, prunedOutput.back(), childIndexRange, settings, usedPrefixLen, ctx, childHint)); childIndexRanges.push_back(childIndexRange); YQL_ENSURE(!childIndexRange.IsEmpty()); if (resultIndexRange.IsEmpty()) { resultIndexRange = childIndexRange; + hint = childHint; } else { YQL_ENSURE(childIndexRange.Begin == resultIndexRange.Begin); resultIndexRange.End = std::max(resultIndexRange.End, childIndexRange.End); resultIndexRange.PointPrefixLen = std::min(resultIndexRange.PointPrefixLen, childIndexRange.PointPrefixLen); + hint = RangeHintUnion(childHint, hint); } } @@ -1414,7 +1681,8 @@ TExprNode::TPtr DoBuildMultiColumnComputeNode(const TStructExprType& rowType, co for (const auto& child : range->ChildrenList()) { prunedOutput.emplace_back(); TIndexRange childIndexRange; - auto compute = DoBuildMultiColumnComputeNode(rowType, child, indexKeys, indexKeysOrder, prunedOutput.back(), childIndexRange, settings, usedPrefixLen, ctx); + TMaybe<TRangeHint> childHint; + auto compute = DoBuildMultiColumnComputeNode(rowType, child, indexKeys, indexKeysOrder, prunedOutput.back(), childIndexRange, settings, usedPrefixLen, ctx, childHint); if (!compute) { continue; } @@ -1423,9 +1691,11 @@ TExprNode::TPtr DoBuildMultiColumnComputeNode(const TStructExprType& rowType, co YQL_ENSURE(!childIndexRange.IsEmpty()); if (resultIndexRange.IsEmpty()) { resultIndexRange = childIndexRange; + hint = childHint; } else { if (childIndexRange.Begin != resultIndexRange.Begin) { YQL_ENSURE(childIndexRange.Begin == resultIndexRange.End); + hint = RangeHintExtend(hint, resultIndexRange.End - resultIndexRange.Begin, childHint); needAlign = false; if (!resultIndexRange.IsPoint()) { prunedOutput.back() = RebuildAsRangeRest(rowType, *child, ctx); @@ -1434,6 +1704,7 @@ TExprNode::TPtr DoBuildMultiColumnComputeNode(const TStructExprType& rowType, co } } else { resultIndexRange.PointPrefixLen = std::max(resultIndexRange.PointPrefixLen, childIndexRange.PointPrefixLen); + hint = RangeHintIntersect(hint, childHint); } resultIndexRange.End = std::max(resultIndexRange.End, childIndexRange.End); } @@ -1456,7 +1727,7 @@ TExprNode::TPtr DoBuildMultiColumnComputeNode(const TStructExprType& rowType, co prunedOutput.erase( std::remove_if(prunedOutput.begin(), prunedOutput.end(), [](const auto& pruned) { return IsRestTrue(*pruned); }), prunedOutput.end() - ); + ); if (prunedOutput.empty()) { prunedRange = BuildRestTrue(pos, rowType, ctx); @@ -1469,12 +1740,70 @@ TExprNode::TPtr DoBuildMultiColumnComputeNode(const TStructExprType& rowType, co return ctx.NewCallable(pos, range->IsCallable("RangeOr") ? "RangeUnion" : "RangeIntersect", std::move(output)); } +void NormalizeRangeHint(TMaybe<TRangeHint>& hint, const TVector<TString>& indexKeys, const TStructExprType& rowType, TExprContext& ctx, TTypeAnnotationContext& types) { + if (!hint) { + return; + } + + auto normTypes = [&] (TRangeBoundHint& hint) { + for (size_t i = 0; i < hint.Columns.size(); ++i) { + auto idx = rowType.FindItem(indexKeys[i]); + YQL_ENSURE(idx); + const TTypeAnnotationNode* columnType = rowType.GetItems()[*idx]->GetItemType(); + const TTypeAnnotationNode* unwrapOptional = columnType; + + if (columnType->GetKind() == ETypeAnnotationKind::Optional) { + unwrapOptional = columnType->Cast<TOptionalExprType>()->GetItemType(); + } + + TTransformationPipeline pipeline(&types); + pipeline.AddServiceTransformers(); + pipeline.AddTypeAnnotationTransformer(); + pipeline.Add(CreateFunctorTransformer( + [&](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) -> IGraphTransformer::TStatus { + output = input; + + auto status = TrySilentConvertTo(output, *unwrapOptional, ctx); + if (status == IGraphTransformer::TStatus::Error) { + output = input; + status = TrySilentConvertTo(output, *columnType, ctx); + } + + if (status == IGraphTransformer::TStatus::Repeat) { + status.HasRestart = 1; + } + return status; + } + ), "ExtractPredicate", TIssuesIds::CORE_EXEC); + + auto transformer = pipeline.BuildWithNoArgChecks(true); + + for (;;) { + auto status = InstantTransform(*transformer, hint.Columns[i], ctx, true); + if (status == IGraphTransformer::TStatus::Ok) { + break; + } + if (status == IGraphTransformer::TStatus::Error) { + return false; + } + } + } + return true; + }; + + if (!normTypes(hint->Left) || !normTypes(hint->Right)) { + hint.Clear(); + } +} + TExprNode::TPtr BuildMultiColumnComputeNode(const TStructExprType& rowType, const TExprNode::TPtr& range, const TVector<TString>& indexKeys, const THashMap<TString, size_t>& indexKeysOrder, - TExprNode::TPtr& prunedRange, const TPredicateExtractorSettings& settings, size_t usedPrefixLen, size_t& pointPrefixLen, TExprContext& ctx) + TExprNode::TPtr& prunedRange, const TPredicateExtractorSettings& settings, size_t usedPrefixLen, size_t& pointPrefixLen, + TExprContext& ctx, TTypeAnnotationContext& types, TMaybe<TRangeHint>& resultHint) { TIndexRange resultIndexRange; - auto result = DoBuildMultiColumnComputeNode(rowType, range, indexKeys, indexKeysOrder, prunedRange, resultIndexRange, settings, usedPrefixLen, ctx); + auto result = DoBuildMultiColumnComputeNode(rowType, range, indexKeys, indexKeysOrder, prunedRange, resultIndexRange, settings, usedPrefixLen, ctx, resultHint); + NormalizeRangeHint(resultHint, indexKeys, rowType, ctx, types); pointPrefixLen = resultIndexRange.PointPrefixLen; YQL_ENSURE(pointPrefixLen <= usedPrefixLen); YQL_ENSURE(prunedRange); @@ -1505,6 +1834,62 @@ TExprNode::TPtr BuildMultiColumnComputeNode(const TStructExprType& rowType, cons return result; } +NYql::NNodes::TExprBase UnpackRangePoints(NYql::NNodes::TExprBase node, TConstArrayRef<TString> keyColumns, NYql::TExprContext& expCtx, NYql::TPositionHandle pos) { + TCoArgument rangeArg = Build<TCoArgument>(expCtx, pos) + .Name("rangeArg") + .Done(); + + TVector<TExprBase> structMembers; + structMembers.reserve(keyColumns.size()); + for (size_t i = 0; i < keyColumns.size(); ++i) { + auto kth = [&] (size_t k) { + return Build<TCoUnwrap>(expCtx, pos) + .Optional<TCoNth>() + .Tuple<TCoNth>() + .Tuple(rangeArg) + .Index().Build(k) + .Build() + .Index().Build(i) + .Build() + .Done(); + }; + + auto first = kth(0); + auto second = kth(1); + + auto member = Build<TCoNameValueTuple>(expCtx, pos) + .Name().Build(keyColumns[i]) + .Value<TCoEnsure>() + .Value(first) + .Message<TCoString>().Literal().Build("invalid range bounds").Build() + .Predicate<TCoOr>() + .Add<TCoCmpEqual>() + .Left(first) + .Right(second) + .Build() + .Add<TCoAnd>() + .Add<TCoNot>().Value<TCoExists>().Optional(first).Build().Build() + .Add<TCoNot>().Value<TCoExists>().Optional(second).Build().Build() + .Build() + .Build() + .Build() + .Done(); + + structMembers.push_back(member); + } + + + return Build<TCoMap>(expCtx, pos) + .Input(node) + .Lambda() + .Args({rangeArg}) + .Body<TCoAsStruct>() + .Add(structMembers) + .Build() + .Build() + .Done(); +} + } // namespace @@ -1570,7 +1955,7 @@ bool TPredicateRangeExtractor::Prepare(const TExprNode::TPtr& filterLambdaNode, } TPredicateRangeExtractor::TBuildResult TPredicateRangeExtractor::BuildComputeNode(const TVector<TString>& indexKeys, - TExprContext& ctx) const + TExprContext& ctx, TTypeAnnotationContext& typesCtx) const { YQL_ENSURE(FilterLambda && Range && RowType, "Prepare() is not called"); @@ -1608,11 +1993,11 @@ TPredicateRangeExtractor::TBuildResult TPredicateRangeExtractor::BuildComputeNod TExprNode::TPtr rebuiltRange = RebuildRangeForIndexKeys(*RowType, Range, indexKeysOrder, result.UsedPrefixLen, ctx); TExprNode::TPtr prunedRange; result.ComputeNode = BuildMultiColumnComputeNode(*RowType, rebuiltRange, effectiveIndexKeys, indexKeysOrder, - prunedRange, Settings, result.UsedPrefixLen, result.PointPrefixLen, ctx); + prunedRange, Settings, result.UsedPrefixLen, result.PointPrefixLen, ctx, typesCtx, result.LiteralRange); + if (result.ComputeNode) { result.ExpectedMaxRanges = CalcMaxRanges(rebuiltRange, indexKeysOrder); if (result.ExpectedMaxRanges && *result.ExpectedMaxRanges < Settings.MaxRanges) { - // rebuild filter lambda with prunedRange predicate TCoLambda lambda(result.PrunedLambda); auto newPred = MakePredicateFromPrunedRange(prunedRange, lambda.Args().Arg(0).Ptr(), ctx); @@ -1635,4 +2020,9 @@ IPredicateRangeExtractor::TPtr MakePredicateRangeExtractor(const TPredicateExtra return MakeHolder<NDetail::TPredicateRangeExtractor>(settings); } + +TExprNode::TPtr BuildPointsList(const IPredicateRangeExtractor::TBuildResult& result, TConstArrayRef<TString> keyColumns, NYql::TExprContext& expCtx) { + return NDetail::UnpackRangePoints(NNodes::TExprBase(result.ComputeNode), keyColumns, expCtx, result.ComputeNode->Pos()).Ptr(); +} + } // namespace NYql diff --git a/ydb/library/yql/core/extract_predicate/extract_predicate_impl.h b/ydb/library/yql/core/extract_predicate/extract_predicate_impl.h index 8e2d693f722..68524c25d62 100644 --- a/ydb/library/yql/core/extract_predicate/extract_predicate_impl.h +++ b/ydb/library/yql/core/extract_predicate/extract_predicate_impl.h @@ -17,7 +17,7 @@ public: return Range; } - TBuildResult BuildComputeNode(const TVector<TString>& indexKeys, TExprContext& ctx) const override final; + TBuildResult BuildComputeNode(const TVector<TString>& indexKeys, TExprContext& ctx, TTypeAnnotationContext& typesCtx) const override final; private: const TPredicateExtractorSettings Settings; TExprNode::TPtr FilterLambda; diff --git a/ydb/tests/functional/suite_tests/canondata/test_sql_logic.TestSQLLogic.test_sql_suite_plan-select2-4.test_/query_100.plan b/ydb/tests/functional/suite_tests/canondata/test_sql_logic.TestSQLLogic.test_sql_suite_plan-select2-4.test_/query_100.plan index 830f2d7cf40..c6b7cadafb9 100644 --- a/ydb/tests/functional/suite_tests/canondata/test_sql_logic.TestSQLLogic.test_sql_suite_plan-select2-4.test_/query_100.plan +++ b/ydb/tests/functional/suite_tests/canondata/test_sql_logic.TestSQLLogic.test_sql_suite_plan-select2-4.test_/query_100.plan @@ -42,7 +42,7 @@ "b", "d" ], - "ReadRangesExpectedSize": "1", + "ReadRangesExpectedSize": 1, "ReadRangesKeys": [ "a" ], |