aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorssmike <ssmike@ydb.tech>2023-07-11 22:49:42 +0300
committerssmike <ssmike@ydb.tech>2023-07-11 22:49:42 +0300
commitf7576173ed3e091de24c8e38f3412523005f9b3e (patch)
tree74e7158ad8aa0af99013b1d5aeae9894e4c31619
parent4ea04c90b14d468bcdae5f98f2e1692f65285129 (diff)
downloadydb-f7576173ed3e091de24c8e38f3412523005f9b3e.tar.gz
Prepare to remove fallback on kqp pushdown optimizer
-rw-r--r--ydb/core/kqp/common/kqp_yql.cpp21
-rw-r--r--ydb/core/kqp/common/kqp_yql.h10
-rw-r--r--ydb/core/kqp/compile_service/kqp_compile_actor.cpp1
-rw-r--r--ydb/core/kqp/compile_service/kqp_compile_service.cpp2
-rw-r--r--ydb/core/kqp/expr_nodes/kqp_expr_nodes.json8
-rw-r--r--ydb/core/kqp/host/kqp_type_ann.cpp3
-rw-r--r--ydb/core/kqp/opt/kqp_query_plan.cpp6
-rw-r--r--ydb/core/kqp/opt/logical/kqp_opt_log.cpp24
-rw-r--r--ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp177
-rw-r--r--ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp194
-rw-r--r--ydb/core/kqp/provider/yql_kikimr_settings.h1
-rw-r--r--ydb/core/protos/config.proto1
-rw-r--r--ydb/library/yql/core/extract_predicate/extract_predicate.h19
-rw-r--r--ydb/library/yql/core/extract_predicate/extract_predicate_dbg.cpp2
-rw-r--r--ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp408
-rw-r--r--ydb/library/yql/core/extract_predicate/extract_predicate_impl.h2
-rw-r--r--ydb/tests/functional/suite_tests/canondata/test_sql_logic.TestSQLLogic.test_sql_suite_plan-select2-4.test_/query_100.plan2
17 files changed, 762 insertions, 119 deletions
diff --git a/ydb/core/kqp/common/kqp_yql.cpp b/ydb/core/kqp/common/kqp_yql.cpp
index 32294b86b90..27443b43238 100644
--- a/ydb/core/kqp/common/kqp_yql.cpp
+++ b/ydb/core/kqp/common/kqp_yql.cpp
@@ -310,17 +310,25 @@ TCoNameValueTupleList TKqpReadTableExplainPrompt::BuildNode(TExprContext& ctx, T
.Done()
);
- if (!ExpectedMaxRanges.empty()) {
+ if (ExpectedMaxRanges) {
prompt.emplace_back(
Build<TCoNameValueTuple>(ctx, pos)
.Name()
.Build(ExpectedMaxRangesName)
.Value<TCoAtom>()
- .Build(ExpectedMaxRanges)
+ .Build(ToString(*ExpectedMaxRanges))
.Done()
);
}
+ prompt.emplace_back(
+ Build<TCoNameValueTuple>(ctx, pos)
+ .Name()
+ .Build(PointPrefixLenName)
+ .Value<TCoAtom>()
+ .Build(ToString(PointPrefixLen))
+ .Done());
+
return Build<TCoNameValueTupleList>(ctx, pos)
.Add(prompt)
.Done();
@@ -345,8 +353,13 @@ TKqpReadTableExplainPrompt TKqpReadTableExplainPrompt::Parse(const NNodes::TCoNa
}
if (name == TKqpReadTableExplainPrompt::ExpectedMaxRangesName) {
- prompt.ExpectedMaxRanges = TString(tuple.Value().template Cast<TCoAtom>());
- continue;
+ prompt.ExpectedMaxRanges = FromString<ui64>(TString(tuple.Value().template Cast<TCoAtom>()));
+ continue;
+ }
+
+ if (name == TKqpReadTableExplainPrompt::PointPrefixLenName) {
+ prompt.PointPrefixLen = FromString<ui64>(TString(tuple.Value().template Cast<TCoAtom>()));
+ continue;
}
YQL_ENSURE(false, "Unknown KqpReadTableRanges explain prompt name '" << name << "'");
diff --git a/ydb/core/kqp/common/kqp_yql.h b/ydb/core/kqp/common/kqp_yql.h
index 293f98c90db..08636cbe1d6 100644
--- a/ydb/core/kqp/common/kqp_yql.h
+++ b/ydb/core/kqp/common/kqp_yql.h
@@ -85,16 +85,22 @@ struct TKqpUpsertRowsSettings {
struct TKqpReadTableExplainPrompt {
static constexpr TStringBuf UsedKeyColumnsName = "UsedKeyColumns";
static constexpr TStringBuf ExpectedMaxRangesName = "ExpectedMaxRanges";
+ static constexpr TStringBuf PointPrefixLenName = "PointPrefixLen";
TVector<TString> UsedKeyColumns;
- TString ExpectedMaxRanges;
+ TMaybe<ui64> ExpectedMaxRanges;
+ ui64 PointPrefixLen = 0;
void SetUsedKeyColumns(TVector<TString> columns) {
UsedKeyColumns = columns;
}
void SetExpectedMaxRanges(size_t count) {
- ExpectedMaxRanges = ToString(count);
+ ExpectedMaxRanges = count;
+ }
+
+ void SetPointPrefixLen(size_t len) {
+ PointPrefixLen = len;
}
NNodes::TCoNameValueTupleList BuildNode(TExprContext& ctx, TPositionHandle pos) const;
diff --git a/ydb/core/kqp/compile_service/kqp_compile_actor.cpp b/ydb/core/kqp/compile_service/kqp_compile_actor.cpp
index 18b0f0b2ba9..2f3b744c0df 100644
--- a/ydb/core/kqp/compile_service/kqp_compile_actor.cpp
+++ b/ydb/core/kqp/compile_service/kqp_compile_actor.cpp
@@ -407,6 +407,7 @@ void ApplyServiceConfig(TKikimrConfiguration& kqpConfig, const TTableServiceConf
kqpConfig.EnablePreparedDdl = serviceConfig.GetEnablePreparedDdl();
kqpConfig.EnableSequences = serviceConfig.GetEnableSequences();
kqpConfig.BindingsMode = RemapBindingsMode(serviceConfig.GetBindingsMode());
+ kqpConfig.PredicateExtract20 = serviceConfig.GetPredicateExtract20();
}
IActor* CreateKqpCompileActor(const TActorId& owner, const TKqpSettings::TConstPtr& kqpSettings,
diff --git a/ydb/core/kqp/compile_service/kqp_compile_service.cpp b/ydb/core/kqp/compile_service/kqp_compile_service.cpp
index a81fe507e68..cc920e29952 100644
--- a/ydb/core/kqp/compile_service/kqp_compile_service.cpp
+++ b/ydb/core/kqp/compile_service/kqp_compile_service.cpp
@@ -370,6 +370,7 @@ private:
bool enableKqpDataQueryPredicateExtract = Config.GetEnablePredicateExtractForDataQueries();
bool enableKqpScanQueryPredicateExtract = Config.GetEnablePredicateExtractForScanQueries();
+ bool predicateExtract20 = Config.GetPredicateExtract20();
bool enableSequentialReads = Config.GetEnableSequentialReads();
bool defaultSyntaxVersion = Config.GetSqlVersion();
@@ -389,6 +390,7 @@ private:
Config.GetEnableKqpScanQuerySourceRead() != enableKqpScanQuerySourceRead ||
Config.GetEnablePredicateExtractForDataQueries() != enableKqpDataQueryPredicateExtract ||
Config.GetEnablePredicateExtractForScanQueries() != enableKqpScanQueryPredicateExtract ||
+ Config.GetPredicateExtract20() != predicateExtract20 ||
Config.GetEnableSequentialReads() != enableSequentialReads ||
Config.GetEnableKqpImmediateEffects() != enableKqpImmediateEffects) {
diff --git a/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json b/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json
index bb2b916b29e..622675b009f 100644
--- a/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json
+++ b/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json
@@ -102,7 +102,10 @@
{
"Name": "TKqlReadTableRanges",
"Base": "TKqlReadTableRangesBase",
- "Match": {"Type": "Callable", "Name": "KqlReadTableRanges"}
+ "Match": {"Type": "Callable", "Name": "KqlReadTableRanges"},
+ "Children": [
+ {"Index": 5, "Name": "PrefixPointsExpr", "Type": "TExprBase", "Optional": true}
+ ]
},
{
"Name": "TKqpReadTableRanges",
@@ -127,7 +130,8 @@
"Base": "TKqlReadTableRangesBase",
"Match": {"Type": "Callable", "Name": "TKqlReadTableIndexRanges"},
"Children": [
- {"Index": 5, "Name": "Index", "Type": "TCoAtom"}
+ {"Index": 5, "Name": "Index", "Type": "TCoAtom"},
+ {"Index": 6, "Name": "PrefixPointsExpr", "Type": "TExprBase", "Optional": true}
]
},
{
diff --git a/ydb/core/kqp/host/kqp_type_ann.cpp b/ydb/core/kqp/host/kqp_type_ann.cpp
index 9d34c2f4668..7211e6dbf32 100644
--- a/ydb/core/kqp/host/kqp_type_ann.cpp
+++ b/ydb/core/kqp/host/kqp_type_ann.cpp
@@ -303,7 +303,8 @@ TStatus AnnotateReadTableRanges(const TExprNode::TPtr& node, TExprContext& ctx,
size_t argCount = (olapTable || index) ? 6 : 5;
- if (!EnsureArgsCount(*node, argCount, ctx)) {
+ // prefix
+ if (!EnsureMinArgsCount(*node, argCount, ctx) && EnsureMaxArgsCount(*node, argCount + 1, ctx)) {
return TStatus::Error;
}
diff --git a/ydb/core/kqp/opt/kqp_query_plan.cpp b/ydb/core/kqp/opt/kqp_query_plan.cpp
index 7183872618d..b29c09596a2 100644
--- a/ydb/core/kqp/opt/kqp_query_plan.cpp
+++ b/ydb/core/kqp/opt/kqp_query_plan.cpp
@@ -795,9 +795,11 @@ private:
}
if (explainPrompt.ExpectedMaxRanges) {
- op.Properties["ReadRangesExpectedSize"] = explainPrompt.ExpectedMaxRanges;
+ op.Properties["ReadRangesExpectedSize"] = ToString(*explainPrompt.ExpectedMaxRanges);
}
+ op.Properties["ReadRangesPointPrefixLen"] = ToString(explainPrompt.PointPrefixLen);
+
auto& columns = op.Properties["ReadColumns"];
for (const auto& col : sourceSettings.Columns()) {
readInfo.Columns.emplace_back(TString(col.Value()));
@@ -1254,7 +1256,7 @@ private:
}
if (explainPrompt.ExpectedMaxRanges) {
- op.Properties["ReadRangesExpectedSize"] = explainPrompt.ExpectedMaxRanges;
+ op.Properties["ReadRangesExpectedSize"] = *explainPrompt.ExpectedMaxRanges;
}
auto& columns = op.Properties["ReadColumns"];
diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp
index 1e720360171..dafc1368488 100644
--- a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp
+++ b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp
@@ -28,6 +28,7 @@ public:
{
#define HNDL(name) "KqpLogical-"#name, Hndl(&TKqpLogicalOptTransformer::name)
AddHandler(0, &TCoFlatMap::Match, HNDL(PushPredicateToReadTable));
+ AddHandler(0, &TCoFlatMap::Match, HNDL(PushExtractedPredicateToReadTable));
AddHandler(0, &TCoAggregate::Match, HNDL(RewriteAggregate));
AddHandler(0, &TCoTake::Match, HNDL(RewriteTakeSortToTopSort));
AddHandler(0, &TCoFlatMap::Match, HNDL(RewriteSqlInToEquiJoin));
@@ -36,7 +37,7 @@ public:
AddHandler(0, &TDqJoin::Match, HNDL(JoinToIndexLookup));
AddHandler(0, &TCoCalcOverWindowBase::Match, HNDL(ExpandWindowFunctions));
AddHandler(0, &TCoCalcOverWindowGroup::Match, HNDL(ExpandWindowFunctions));
- AddHandler(0, &TCoFlatMap::Match, HNDL(PushExtractedPredicateToReadTable));
+ AddHandler(0, &TCoFlatMap::Match, HNDL(LatePushExtractedPredicateToReadTable));
AddHandler(0, &TCoTop::Match, HNDL(RewriteTopSortOverIndexRead));
AddHandler(0, &TCoTopSort::Match, HNDL(RewriteTopSortOverIndexRead));
AddHandler(0, &TCoTake::Match, HNDL(RewriteTakeOverIndexRead));
@@ -69,19 +70,34 @@ public:
}
protected:
+ TMaybeNode<TExprBase> PushPredicateToReadTable(TExprBase node, TExprContext& ctx) {
+ if (KqpCtx.Config->PredicateExtract20) {
+ return node;
+ }
+ TExprBase output = KqpPushPredicateToReadTable(node, ctx, KqpCtx);
+ DumpAppliedRule("PushPredicateToReadTable", node.Ptr(), output.Ptr(), ctx);
+ return output;
+ }
TMaybeNode<TExprBase> PushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx) {
+ if (!KqpCtx.Config->PredicateExtract20) {
+ return node;
+ }
TExprBase output = KqpPushExtractedPredicateToReadTable(node, ctx, KqpCtx, TypesCtx);
DumpAppliedRule("PushExtractedPredicateToReadTable", node.Ptr(), output.Ptr(), ctx);
return output;
}
- TMaybeNode<TExprBase> PushPredicateToReadTable(TExprBase node, TExprContext& ctx) {
- TExprBase output = KqpPushPredicateToReadTable(node, ctx, KqpCtx);
- DumpAppliedRule("PushPredicateToReadTable", node.Ptr(), output.Ptr(), ctx);
+ TMaybeNode<TExprBase> LatePushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx) {
+ if (KqpCtx.Config->PredicateExtract20) {
+ return node;
+ }
+ TExprBase output = KqpPushExtractedPredicateToReadTable(node, ctx, KqpCtx, TypesCtx);
+ DumpAppliedRule("PushExtractedPredicateToReadTable", node.Ptr(), output.Ptr(), ctx);
return output;
}
+
TMaybeNode<TExprBase> RewriteAggregate(TExprBase node, TExprContext& ctx) {
TExprBase output = DqRewriteAggregate(node, ctx, TypesCtx, false, KqpCtx.Config->HasOptEnableOlapPushdown(), KqpCtx.Config->HasOptUseFinalizeByKey());
DumpAppliedRule("RewriteAggregate", node.Ptr(), output.Ptr(), ctx);
diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp
index f3619293894..38de2ebb3e9 100644
--- a/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp
+++ b/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp
@@ -187,79 +187,81 @@ TMaybeNode<TKqlKeyInc> GetRightTableKeyPrefix(const TKqlKeyRange& range) {
return rangeFrom;
}
-TExprBase BuildLookupIndex(TExprContext& ctx, const TPositionHandle pos, const TKqlReadTableBase& read,
+TExprBase BuildLookupIndex(TExprContext& ctx, const TPositionHandle pos,
+ const TKqpTable& table, const TCoAtomList& columns,
const TExprBase& keysToLookup, const TVector<TCoAtom>& lookupNames, const TString& indexName,
const TKqpOptimizeContext& kqpCtx)
{
if (kqpCtx.IsScanQuery()) {
YQL_ENSURE(kqpCtx.Config->EnableKqpScanQueryStreamIdxLookupJoin, "Stream lookup is not enabled for index lookup join");
return Build<TKqlStreamLookupIndex>(ctx, pos)
- .Table(read.Table())
+ .Table(table)
.LookupKeys<TCoSkipNullMembers>()
.Input(keysToLookup)
.Members()
.Add(lookupNames)
.Build()
.Build()
- .Columns(read.Columns())
+ .Columns(columns)
.Index()
.Build(indexName)
.Done();
}
return Build<TKqlLookupIndex>(ctx, pos)
- .Table(read.Table())
+ .Table(table)
.LookupKeys<TCoSkipNullMembers>()
.Input(keysToLookup)
.Members()
.Add(lookupNames)
.Build()
.Build()
- .Columns(read.Columns())
+ .Columns(columns)
.Index()
.Build(indexName)
.Done();
}
-TExprBase BuildLookupTable(TExprContext& ctx, const TPositionHandle pos, const TKqlReadTableBase& read,
+TExprBase BuildLookupTable(TExprContext& ctx, const TPositionHandle pos,
+ const TKqpTable& table, const TCoAtomList& columns,
const TExprBase& keysToLookup, const TVector<TCoAtom>& lookupNames, const TKqpOptimizeContext& kqpCtx)
{
if (kqpCtx.IsScanQuery()) {
YQL_ENSURE(kqpCtx.Config->EnableKqpScanQueryStreamIdxLookupJoin, "Stream lookup is not enabled for index lookup join");
return Build<TKqlStreamLookupTable>(ctx, pos)
- .Table(read.Table())
+ .Table(table)
.LookupKeys<TCoSkipNullMembers>()
.Input(keysToLookup)
.Members()
.Add(lookupNames)
.Build()
.Build()
- .Columns(read.Columns())
+ .Columns(columns)
.Done();
}
if (kqpCtx.Config->EnableKqpDataQueryStreamLookup) {
return Build<TKqlStreamLookupTable>(ctx, pos)
- .Table(read.Table())
+ .Table(table)
.LookupKeys<TCoSkipNullMembers>()
.Input(keysToLookup)
.Members()
.Add(lookupNames)
.Build()
.Build()
- .Columns(read.Columns())
+ .Columns(columns)
.Done();
}
return Build<TKqlLookupTable>(ctx, pos)
- .Table(read.Table())
+ .Table(table)
.LookupKeys<TCoSkipNullMembers>()
.Input(keysToLookup)
.Members()
.Add(lookupNames)
.Build()
.Build()
- .Columns(read.Columns())
+ .Columns(columns)
.Done();
}
@@ -336,7 +338,10 @@ bool IsParameterToListOfStructsRepack(const TExprBase& expr) {
//#define DBG(...) YQL_CLOG(DEBUG, ProviderKqp) << __VA_ARGS__
#define DBG(...)
+template<typename ReadType>
TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx) {
+ static_assert(std::is_same_v<ReadType, TKqlReadTableBase> || std::is_same_v<ReadType, TKqlReadTableRangesBase>, "unsupported read type");
+
if (!join.RightLabel().Maybe<TCoAtom>()) {
// Lookup only in tables
return {};
@@ -350,64 +355,97 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext
TString lookupTable;
TString indexName;
- auto rightReadMatch = MatchRead<TKqlReadTableBase>(join.RightInput());
- if (!rightReadMatch) {
- if (auto readRangesMatch = MatchRead<TKqlReadTableRangesBase>(join.RightInput())) {
- // for now only fullscans are supported
- auto read = readRangesMatch->Read.Cast<TKqlReadTableRangesBase>();
- if (TCoVoid::Match(read.Ranges().Raw())) {
- rightReadMatch = readRangesMatch;
- rightReadMatch->Read =
- Build<TKqlReadTable>(ctx, join.Pos())
- .Settings(read.Settings())
- .Table(read.Table())
- .Columns(read.Columns())
- .Range<TKqlKeyRange>()
- .From<TKqlKeyInc>().Build()
- .To<TKqlKeyInc>().Build()
- .Build()
- .Done();
- if (auto indexRead = read.Maybe<TKqlReadTableIndexRanges>()) {
- const auto& tableDesc = GetTableData(*kqpCtx.Tables, kqpCtx.Cluster, read.Table().Path());
- const auto& [indexMeta, _ ] = tableDesc.Metadata->GetIndexMetadata(indexRead.Index().Cast().StringValue());
- lookupTable = indexMeta->Name;
- indexName = indexRead.Cast().Index().StringValue();
- }
- } else {
- return {};
- }
- } else {
+ auto rightReadMatch = MatchRead<ReadType>(join.RightInput());
+ if (!rightReadMatch || rightReadMatch->FlatMap && !IsPassthroughFlatMap(rightReadMatch->FlatMap.Cast(), nullptr)) {
+ return {};
+ }
+
+ auto rightRead = rightReadMatch->Read.template Cast<ReadType>();
+
+ TMaybeNode<TCoAtomList> lookupColumns;
+ TMaybe<TKqlKeyInc> rightTableKeyPrefix;
+ if constexpr (std::is_same_v<ReadType, TKqlReadTableBase>) {
+ Y_ENSURE(rightRead.template Maybe<TKqlReadTable>() || rightRead.template Maybe<TKqlReadTableIndex>());
+ const TKqlReadTableBase read = rightRead;
+ if (!read.Table().SysView().Value().empty()) {
+ // Can't lookup in system views
return {};
}
- }
- if (rightReadMatch->FlatMap && !IsPassthroughFlatMap(rightReadMatch->FlatMap.Cast(), nullptr)) {
- return {};
- }
+ auto maybeRightTableKeyPrefix = GetRightTableKeyPrefix(read.Range());
+ if (!maybeRightTableKeyPrefix) {
+ return {};
+ }
- auto rightRead = rightReadMatch->Read.Cast<TKqlReadTableBase>();
+ lookupColumns = read.Columns();
+ rightTableKeyPrefix = maybeRightTableKeyPrefix.Cast();
- Y_ENSURE(rightRead.Maybe<TKqlReadTable>() || rightRead.Maybe<TKqlReadTableIndex>());
+ if (auto indexRead = rightRead.template Maybe<TKqlReadTableIndex>()) {
+ indexName = indexRead.Cast().Index().StringValue();
+ lookupTable = GetIndexMetadata(indexRead.Cast(), *kqpCtx.Tables, kqpCtx.Cluster)->Name;
+ } else {
+ lookupTable = read.Table().Path().StringValue();
+ }
+ } else if constexpr (std::is_same_v<ReadType, TKqlReadTableRangesBase>){
+ auto read = rightReadMatch->Read.template Cast<TKqlReadTableRangesBase>();
+ lookupColumns = read.Columns();
+
+ if (auto indexRead = read.template Maybe<TKqlReadTableIndexRanges>()) {
+ const auto& tableDesc = GetTableData(*kqpCtx.Tables, kqpCtx.Cluster, read.Table().Path());
+ const auto& [indexMeta, _ ] = tableDesc.Metadata->GetIndexMetadata(indexRead.Index().Cast().StringValue());
+ lookupTable = indexMeta->Name;
+ indexName = indexRead.Cast().Index().StringValue();
+ } else {
+ lookupTable = read.Table().Path().StringValue();
+ }
- const TKqlReadTableBase read = rightRead;
- if (!read.Table().SysView().Value().empty()) {
- // Can't lookup in system views
- return {};
- }
+ const auto& rightTableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, lookupTable);
- auto maybeRightTableKeyPrefix = GetRightTableKeyPrefix(read.Range());
- if (!maybeRightTableKeyPrefix) {
- return {};
- }
- auto rightTableKeyPrefix = maybeRightTableKeyPrefix.Cast();
+ if (TCoVoid::Match(read.Ranges().Raw())) {
+ rightTableKeyPrefix = Build<TKqlKeyInc>(ctx, read.Ranges().Pos()).Done();
+ } else {
+ auto prompt = TKqpReadTableExplainPrompt::Parse(read);
+ if (prompt.ExpectedMaxRanges != TMaybe<ui64>(1)) {
+ return {};
+ }
+
+ TMaybeNode<TExprBase> row;
+ if (read.template Maybe<TKqlReadTableRanges>()) {
+ row = read.template Cast<TKqlReadTableRanges>().PrefixPointsExpr();
+ }
+ if (rightRead.template Maybe<TKqlReadTableIndexRanges>()) {
+ row = read.template Cast<TKqlReadTableIndexRanges>().PrefixPointsExpr();
+ }
+ if (!row.IsValid()) {
+ return {};
+ }
+ row = Build<TCoHead>(ctx, read.Ranges().Pos()).Input(row.Cast()).Done();
- if (auto indexRead = rightRead.Maybe<TKqlReadTableIndex>()) {
- indexName = indexRead.Cast().Index().StringValue();
- lookupTable = GetIndexMetadata(indexRead.Cast(), *kqpCtx.Tables, kqpCtx.Cluster)->Name;
- } else if (!indexName) {
- lookupTable = read.Table().Path().StringValue();
+ size_t prefixLen = prompt.PointPrefixLen;
+ TVector<TString> keyColumns;
+ for (size_t i = 0; i < prefixLen; ++i) {
+ YQL_ENSURE(i < rightTableDesc.Metadata->KeyColumnNames.size());
+ keyColumns.push_back(rightTableDesc.Metadata->KeyColumnNames[i]);
+ }
+
+ TVector<TExprBase> components;
+ for (auto column : keyColumns) {
+ TCoAtom columnAtom(ctx.NewAtom(read.Ranges().Pos(), column));
+ components.push_back(
+ Build<TCoMember>(ctx, read.Ranges().Pos())
+ .Struct(row.Cast())
+ .Name(columnAtom)
+ .Done());
+ }
+
+ rightTableKeyPrefix = Build<TKqlKeyInc>(ctx, read.Ranges().Pos())
+ .Add(components)
+ .Done();
+ }
}
+ Y_ENSURE(rightTableKeyPrefix);
+
const auto& rightTableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, lookupTable);
TMap<std::string_view, TString> rightJoinKeyToLeft;
@@ -446,12 +484,12 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext
auto leftColumn = rightJoinKeyToLeft.FindPtr(rightColumnName);
- if (fixedPrefix < rightTableKeyPrefix.ArgCount()) {
+ if (fixedPrefix < rightTableKeyPrefix->ArgCount()) {
if (leftColumn) {
return {};
}
- member = rightTableKeyPrefix.Arg(fixedPrefix).Ptr();
+ member = rightTableKeyPrefix->Arg(fixedPrefix).Ptr();
fixedPrefix++;
} else {
if (!leftColumn) {
@@ -562,8 +600,8 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext
.Done();
TExprBase lookup = indexName
- ? BuildLookupIndex(ctx, join.Pos(), read, keysToLookup, lookupNames, indexName, kqpCtx)
- : BuildLookupTable(ctx, join.Pos(), read, keysToLookup, lookupNames, kqpCtx);
+ ? BuildLookupIndex(ctx, join.Pos(), rightRead.Table(), rightRead.Columns(), keysToLookup, lookupNames, indexName, kqpCtx)
+ : BuildLookupTable(ctx, join.Pos(), rightRead.Table(), rightRead.Columns(), keysToLookup, lookupNames, kqpCtx);
// Skip null keys in lookup part as for equijoin semantics null != null,
// so we can't have nulls in lookup part
@@ -574,7 +612,6 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext
.Build()
.Done();
- auto lookupColumns = read.Columns();
if (rightReadMatch->ExtractMembers) {
lookupColumns = rightReadMatch->ExtractMembers.Cast().Members();
}
@@ -585,7 +622,7 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext
auto arg = TCoArgument(ctx.NewArgument(join.Pos(), "row"));
auto rightLabel = join.RightLabel().Cast<TCoAtom>().Value();
- TVector<TExprBase> renames = CreateRenames(rightReadMatch->FlatMap, lookupColumns, arg, rightLabel,
+ TVector<TExprBase> renames = CreateRenames(rightReadMatch->FlatMap, lookupColumns.Cast(), arg, rightLabel,
join.Pos(), ctx);
lookup = Build<TCoMap>(ctx, join.Pos())
@@ -628,12 +665,16 @@ TExprBase KqpJoinToIndexLookup(const TExprBase& node, TExprContext& ctx, const T
auto flipJoin = FlipLeftSemiJoin(join, ctx);
DBG("-- Flip join");
- if (auto indexLookupJoin = KqpJoinToIndexLookupImpl(flipJoin, ctx, kqpCtx)) {
+ if (auto indexLookupJoin = KqpJoinToIndexLookupImpl<TKqlReadTableBase>(flipJoin, ctx, kqpCtx)) {
+ return indexLookupJoin.Cast();
+ } else if (auto indexLookupJoin = KqpJoinToIndexLookupImpl<TKqlReadTableRangesBase>(flipJoin, ctx, kqpCtx)) {
return indexLookupJoin.Cast();
}
}
- if (auto indexLookupJoin = KqpJoinToIndexLookupImpl(join, ctx, kqpCtx)) {
+ if (auto indexLookupJoin = KqpJoinToIndexLookupImpl<TKqlReadTableBase>(join, ctx, kqpCtx)) {
+ return indexLookupJoin.Cast();
+ } else if (auto indexLookupJoin = KqpJoinToIndexLookupImpl<TKqlReadTableRangesBase>(join, ctx, kqpCtx)) {
return indexLookupJoin.Cast();
}
diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp
index fc9cc2c70f6..1d7a1f8a2b4 100644
--- a/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp
+++ b/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp
@@ -10,6 +10,7 @@
#include <ydb/library/yql/providers/common/provider/yql_table_lookup.h>
#include <ydb/library/yql/core/extract_predicate/extract_predicate.h>
+
namespace NKikimr::NKqp::NOpt {
using namespace NYql;
@@ -150,6 +151,19 @@ TMaybeNode<TExprBase> TryBuildTrivialReadTable(TCoFlatMap& flatmap, TKqlReadTabl
.Done();
}
+TMaybe<size_t> EstimateSqlInCollectionSize(const NYql::TExprNode::TPtr& collection) {
+ NYql::TExprNode::TPtr curr = collection;
+ if (curr->IsCallable("Just")) {
+ curr = curr->HeadPtr();
+ }
+
+ if (curr->GetTypeAnn()->GetKind() == NYql::ETypeAnnotationKind::Tuple || curr->IsCallable({"AsList", "AsDict", "AsSet"})) {
+ return std::max<size_t>(curr->ChildrenSize(), 1);
+ }
+
+ return {};
+}
+
} // namespace
TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx,
@@ -207,28 +221,62 @@ TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx
const auto& mainTableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, read.Table().Path());
auto& tableDesc = indexName ? kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, mainTableDesc.Metadata->GetIndexMetadata(TString(indexName.Cast())).first->Name) : mainTableDesc;
- // test for trivial cases (explicit literals or parameters)
- if (auto expr = TryBuildTrivialReadTable(flatmap, read, *readMatch, tableDesc, ctx, kqpCtx, indexName)) {
- return expr.Cast();
- }
-
THashSet<TString> possibleKeys;
TPredicateExtractorSettings settings;
settings.MergeAdjacentPointRanges = true;
settings.HaveNextValueCallable = true;
+ settings.BuildLiteralRange = true;
+
+ if (!kqpCtx.Config->PredicateExtract20) {
+ // test for trivial cases (explicit literals or parameters)
+ if (auto expr = TryBuildTrivialReadTable(flatmap, read, *readMatch, tableDesc, ctx, kqpCtx, indexName)) {
+ return expr.Cast();
+ }
+ } else {
+ settings.IsValidForRange = [&] (const TExprNode::TPtr& node) -> bool {
+ TExprBase expr(node);
+ if (auto sqlin = expr.Maybe<TCoSqlIn>()) {
+ if (!EstimateSqlInCollectionSize(sqlin.Cast().Collection().Ptr())) {
+ return false;
+ }
+ }
+
+ return true;
+ };
+ }
+
auto extractor = MakePredicateRangeExtractor(settings);
YQL_ENSURE(tableDesc.SchemeNode);
bool prepareSuccess = extractor->Prepare(flatmap.Lambda().Ptr(), *mainTableDesc.SchemeNode, possibleKeys, ctx, typesCtx);
YQL_ENSURE(prepareSuccess);
- auto buildResult = extractor->BuildComputeNode(tableDesc.Metadata->KeyColumnNames, ctx);
+ auto buildResult = extractor->BuildComputeNode(tableDesc.Metadata->KeyColumnNames, ctx, typesCtx);
+
TExprNode::TPtr ranges = buildResult.ComputeNode;
if (!ranges) {
return node;
}
+ TExprNode::TPtr prefixPointsExpr;
+ IPredicateRangeExtractor::TBuildResult pointsExtractionResult;
+
+ if (buildResult.PointPrefixLen > 0 && buildResult.ExpectedMaxRanges) {
+ TPredicateExtractorSettings pointSettings = settings;
+ pointSettings.MergeAdjacentPointRanges = false;
+ pointSettings.HaveNextValueCallable = false;
+ TVector<TString> pointKeys;
+ for (size_t i = 0; i < buildResult.PointPrefixLen; ++i) {
+ pointKeys.push_back(tableDesc.Metadata->KeyColumnNames[i]);
+ }
+ auto extractor = MakePredicateRangeExtractor(pointSettings);
+ YQL_ENSURE(extractor->Prepare(flatmap.Lambda().Ptr(), *mainTableDesc.SchemeNode, possibleKeys, ctx, typesCtx));
+
+ pointsExtractionResult = extractor->BuildComputeNode(pointKeys, ctx, typesCtx);
+ prefixPointsExpr = BuildPointsList(pointsExtractionResult, pointKeys, ctx);
+ }
+
TExprNode::TPtr residualLambda = buildResult.PrunedLambda;
TVector<TString> usedColumns;
@@ -248,23 +296,123 @@ TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx
YQL_CLOG(DEBUG, ProviderKqp) << "Residual lambda: " << KqpExprToPrettyString(*residualLambda, ctx);
TMaybe<TExprBase> input;
- if (indexName) {
- input = Build<TKqlReadTableIndexRanges>(ctx, read.Pos())
- .Table(read.Table())
- .Ranges(ranges)
- .Columns(read.Columns())
- .Settings(read.Settings())
- .ExplainPrompt(prompt.BuildNode(ctx, read.Pos()))
- .Index(indexName.Cast())
- .Done();
- } else {
- input = Build<TKqlReadTableRanges>(ctx, read.Pos())
- .Table(read.Table())
- .Ranges(ranges)
- .Columns(read.Columns())
- .Settings(read.Settings())
- .ExplainPrompt(prompt.BuildNode(ctx, read.Pos()))
- .Done();
+ if (kqpCtx.Config->PredicateExtract20 &&
+ (tableDesc.Metadata->Kind == EKikimrTableKind::Datashard ||
+ tableDesc.Metadata->Kind == EKikimrTableKind::SysView))
+ {
+ auto buildLookup = [&] (TExprNode::TPtr keys, TMaybe<TExprBase>& result) {
+ if (indexName) {
+ if (kqpCtx.IsDataQuery()) {
+ result = Build<TKqlLookupIndex>(ctx, node.Pos())
+ .Table(read.Table())
+ .Columns(read.Columns())
+ .LookupKeys(keys)
+ .Index(indexName.Cast())
+ .Done();
+ } else if (kqpCtx.IsScanQuery() && kqpCtx.Config->EnableKqpScanQueryStreamLookup) {
+ result = Build<TKqlStreamLookupIndex>(ctx, node.Pos())
+ .Table(read.Table())
+ .Columns(read.Columns())
+ .LookupKeys(keys)
+ .Index(indexName.Cast())
+ .LookupKeys(keys)
+ .Done();
+ }
+ } else if (kqpCtx.IsDataQuery()) {
+ result = Build<TKqlLookupTable>(ctx, node.Pos())
+ .Table(read.Table())
+ .Columns(read.Columns())
+ .LookupKeys(keys)
+ .Done();
+ } else if (kqpCtx.IsScanQuery() && kqpCtx.Config->EnableKqpScanQueryStreamLookup) {
+ result = Build<TKqlStreamLookupTable>(ctx, node.Pos())
+ .Table(read.Table())
+ .Columns(read.Columns())
+ .LookupKeys(keys)
+ .Done();
+ }
+ };
+
+ if (buildResult.LiteralRange) {
+ bool ispoint = buildResult.PointPrefixLen == tableDesc.Metadata->KeyColumnNames.size();
+ if (ispoint) {
+ TVector<TExprBase> structMembers;
+ for (size_t i = 0; i < tableDesc.Metadata->KeyColumnNames.size(); ++i) {
+ auto member = Build<TCoNameValueTuple>(ctx, node.Pos())
+ .Name().Build(tableDesc.Metadata->KeyColumnNames[i])
+ .Value(buildResult.LiteralRange->Left.Columns[i])
+ .Done();
+
+ structMembers.push_back(member);
+ }
+ TExprBase keys = Build<TCoAsList>(ctx, node.Pos())
+ .Add<TCoAsStruct>()
+ .Add(structMembers)
+ .Build()
+ .Done();
+
+ buildLookup(keys.Ptr(), input);
+ } else {
+ auto fromExpr = buildResult.LiteralRange->Left.Inclusive
+ ? Build<TKqlKeyInc>(ctx, read.Pos()).Add(buildResult.LiteralRange->Left.Columns).Done().Cast<TKqlKeyTuple>()
+ : Build<TKqlKeyExc>(ctx, read.Pos()).Add(buildResult.LiteralRange->Left.Columns).Done().Cast<TKqlKeyTuple>();
+
+ auto toExpr = buildResult.LiteralRange->Right.Inclusive
+ ? Build<TKqlKeyInc>(ctx, read.Pos()).Add(buildResult.LiteralRange->Right.Columns).Done().Cast<TKqlKeyTuple>()
+ : Build<TKqlKeyExc>(ctx, read.Pos()).Add(buildResult.LiteralRange->Right.Columns).Done().Cast<TKqlKeyTuple>();
+
+ auto keyRange = Build<TKqlKeyRange>(ctx, read.Pos())
+ .From(fromExpr)
+ .To(toExpr)
+ .Done();
+
+ if (indexName) {
+ input = Build<TKqlReadTableIndex>(ctx, read.Pos())
+ .Table(read.Table())
+ .Columns(read.Columns())
+ .Settings(read.Settings())
+ .Range(keyRange)
+ .Index(indexName.Cast())
+ .Done();
+ } else {
+ input = Build<TKqlReadTable>(ctx, read.Pos())
+ .Table(read.Table())
+ .Columns(read.Columns())
+ .Settings(read.Settings())
+ .Range(keyRange)
+ .Done();
+ }
+ }
+ } else if (buildResult.PointPrefixLen == tableDesc.Metadata->KeyColumnNames.size()) {
+ YQL_ENSURE(prefixPointsExpr);
+ residualLambda = pointsExtractionResult.PrunedLambda;
+ buildLookup(prefixPointsExpr, input);
+ }
+ }
+
+ if (!input) {
+ TMaybeNode<TExprBase> prefix = prefixPointsExpr;
+
+ if (indexName) {
+ input = Build<TKqlReadTableIndexRanges>(ctx, read.Pos())
+ .Table(read.Table())
+ .Ranges(ranges)
+ .Columns(read.Columns())
+ .Settings(read.Settings())
+ .ExplainPrompt(prompt.BuildNode(ctx, read.Pos()))
+ .Index(indexName.Cast())
+ .PrefixPointsExpr(prefix)
+ .Done();
+ } else {
+ input = Build<TKqlReadTableRanges>(ctx, read.Pos())
+ .Table(read.Table())
+ .Ranges(ranges)
+ .Columns(read.Columns())
+ .Settings(read.Settings())
+ .ExplainPrompt(prompt.BuildNode(ctx, read.Pos()))
+ .PrefixPointsExpr(prefix)
+ .Done();
+ }
}
*input = readMatch->BuildProcessNodes(*input, ctx);
diff --git a/ydb/core/kqp/provider/yql_kikimr_settings.h b/ydb/core/kqp/provider/yql_kikimr_settings.h
index 91f1135a075..aabf39f8170 100644
--- a/ydb/core/kqp/provider/yql_kikimr_settings.h
+++ b/ydb/core/kqp/provider/yql_kikimr_settings.h
@@ -141,6 +141,7 @@ struct TKikimrConfiguration : public TKikimrSettings, public NCommon::TSettingDi
bool EnableKqpScanQueryStreamIdxLookupJoin = false;
bool EnablePredicateExtractForScanQuery = true;
bool EnablePredicateExtractForDataQuery = false;
+ bool PredicateExtract20 = false;
bool EnableKqpImmediateEffects = false;
bool EnableSequentialReads = false;
bool EnablePreparedDdl = false;
diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto
index 2b952a6b630..8c10d52ef8f 100644
--- a/ydb/core/protos/config.proto
+++ b/ydb/core/protos/config.proto
@@ -1320,6 +1320,7 @@ message TTableServiceConfig {
optional EBindingsMode BindingsMode = 40 [default = BM_ENABLED];
optional TIteratorReadsRetrySettings IteratorReadsRetrySettings = 41;
+ optional bool PredicateExtract20 = 44 [default = false];
};
// Config describes immediate controls and allows
diff --git a/ydb/library/yql/core/extract_predicate/extract_predicate.h b/ydb/library/yql/core/extract_predicate/extract_predicate.h
index e4509474239..83fc16110d3 100644
--- a/ydb/library/yql/core/extract_predicate/extract_predicate.h
+++ b/ydb/library/yql/core/extract_predicate/extract_predicate.h
@@ -9,6 +9,8 @@ struct TPredicateExtractorSettings {
size_t MaxRanges = 10000;
bool MergeAdjacentPointRanges = true;
bool HaveNextValueCallable = false;
+ bool BuildLiteralRange = false;
+ std::function<bool(const NYql::TExprNode::TPtr&)> IsValidForRange;
};
class IPredicateRangeExtractor {
@@ -24,13 +26,28 @@ public:
size_t UsedPrefixLen = 0;
size_t PointPrefixLen = 0;
TMaybe<size_t> ExpectedMaxRanges;
+
+ struct TLiteralRange {
+ struct TLiteralRangeBound {
+ bool Inclusive = false;
+ TVector<TExprNode::TPtr> Columns;
+ };
+
+ TLiteralRangeBound Left;
+ TLiteralRangeBound Right;
+ };
+
+ TMaybe<TLiteralRange> LiteralRange;
};
- virtual TBuildResult BuildComputeNode(const TVector<TString>& indexKeys, TExprContext& ctx) const = 0;
+ virtual TBuildResult BuildComputeNode(const TVector<TString>& indexKeys, TExprContext& ctx, TTypeAnnotationContext& typesCtx) const = 0;
virtual ~IPredicateRangeExtractor() = default;
};
IPredicateRangeExtractor::TPtr MakePredicateRangeExtractor(const TPredicateExtractorSettings& settings = {});
+
+TExprNode::TPtr BuildPointsList(const IPredicateRangeExtractor::TBuildResult&, TConstArrayRef<TString> keyColumns, NYql::TExprContext& expCtx);
+
}
diff --git a/ydb/library/yql/core/extract_predicate/extract_predicate_dbg.cpp b/ydb/library/yql/core/extract_predicate/extract_predicate_dbg.cpp
index 67512884ec0..67726a54bbb 100644
--- a/ydb/library/yql/core/extract_predicate/extract_predicate_dbg.cpp
+++ b/ydb/library/yql/core/extract_predicate/extract_predicate_dbg.cpp
@@ -43,7 +43,7 @@ TExprNode::TPtr ExpandRangeComputeFor(const TExprNode::TPtr& node, TExprContext&
}
- auto buildResult = extractor->BuildComputeNode(indexKeys, ctx);
+ auto buildResult = extractor->BuildComputeNode(indexKeys, ctx, *typesCtx);
if (!buildResult.ComputeNode) {
YQL_CLOG(DEBUG, Core) << "BuildComputeNode: ranges can not be built for predicate";
return result;
diff --git a/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp b/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp
index d5c0c5a2915..40ef3047238 100644
--- a/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp
+++ b/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp
@@ -233,6 +233,9 @@ const THashMap<TStringBuf, TStringBuf> SupportedBinOps = {
};
bool IsValidForRange(TExprNode::TPtr& node, const TExprNode& row, const TPredicateExtractorSettings& settings, TExprContext& ctx) {
+ if (settings.IsValidForRange && !settings.IsValidForRange(node)) {
+ return false;
+ }
auto it = SupportedBinOps.find(node->Content());
if (it != SupportedBinOps.end()) {
if (IsValidForRange(node->Head(), &node->Tail(), row)) {
@@ -1340,10 +1343,268 @@ TExprNode::TPtr BuildRangeMultiply(TPositionHandle pos, size_t maxRanges, const
return ctx.NewCallable(pos, "RangeMultiply", std::move(args));
}
+using TRangeHint = IPredicateRangeExtractor::TBuildResult::TLiteralRange;
+using TRangeBoundHint = IPredicateRangeExtractor::TBuildResult::TLiteralRange::TLiteralRangeBound;
+
+TMaybe<int> TryCompareColumns(const TExprNode::TPtr& fs, const TExprNode::TPtr& sc) {
+ if (!fs || !sc) {
+ return {};
+ }
+ if (fs == sc) {
+ return 0;
+ }
+
+ auto isNull = [](const TExprNode::TPtr& ptr) {
+ return ptr->IsCallable("Nothing") || (ptr->GetTypeAnn()
+ && ptr->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Null);
+ };
+
+ if (isNull(fs)) {
+ if (isNull(sc)) {
+ return 0;
+ } else {
+ return -1;
+ }
+ }
+ if (isNull(sc)) {
+ return 1;
+ }
+
+ return {};
+}
+
+TMaybe<TRangeBoundHint> CompareBounds(
+ const TRangeBoundHint& hint1,
+ const TRangeBoundHint& hint2,
+ bool min, bool lefts)
+{
+ TRangeBoundHint hint;
+ bool uniteAreas = min == lefts;
+ for (size_t i = 0; ; i++) {
+ if (i >= hint1.Columns.size()) {
+ if (i >= hint2.Columns.size()) {
+ hint = hint1;
+ if (uniteAreas) {
+ hint.Inclusive = hint1.Inclusive || hint2.Inclusive;
+ } else {
+ hint.Inclusive = hint1.Inclusive && hint2.Inclusive;
+ }
+ } else if (hint1.Inclusive != uniteAreas) {
+ hint = hint2;
+ } else {
+ hint = hint1;
+ }
+ break;
+ }
+ if (i >= hint2.Columns.size()) {
+ if (hint2.Inclusive != uniteAreas) {
+ hint = hint1;
+ } else {
+ hint = hint2;
+ }
+ break;
+ }
+
+ if (!hint1.Columns[i] || !hint2.Columns[i]) {
+ return Nothing();
+ }
+ if (auto cmp = TryCompareColumns(hint1.Columns[i], hint2.Columns[i])) {
+ if ((cmp < 0) == min) {
+ hint = hint1;
+ } else {
+ hint = hint2;
+ }
+
+ if (cmp != 0) {
+ break;
+ }
+ } else {
+ return Nothing();
+ }
+ }
+
+ return hint;
+}
+
+TMaybe<TRangeHint> RangeHintIntersect(const TRangeHint& hint1, const TRangeHint& hint2) {
+ auto left = CompareBounds(hint1.Left, hint2.Left, /* min */ false, true);
+ auto right = CompareBounds(hint1.Right, hint2.Right, /* min */ true, false);
+ if (left && right) {
+ return TRangeHint{.Left = std::move(*left), .Right = std::move(*right)};
+ } else {
+ return Nothing();
+ }
+}
+
+TMaybe<TRangeHint> RangeHintIntersect(const TMaybe<TRangeHint>& hint1, const TMaybe<TRangeHint>& hint2) {
+ if (hint1 && hint2) {
+ return RangeHintIntersect(*hint1, *hint2);
+ } else {
+ return {};
+ }
+}
+
+
+TRangeHint RangeHintExtend(const TRangeHint& hint1, size_t hint1Len, const TRangeHint& hint2) {
+ TRangeHint hint = hint1;
+ if (hint.Left.Columns.size() == hint1Len && hint1.Left.Inclusive) {
+ hint.Left.Columns.insert(hint.Left.Columns.end(), hint2.Left.Columns.begin(), hint2.Left.Columns.end());
+ hint.Left.Inclusive = hint2.Left.Inclusive;
+ }
+ if (hint.Right.Columns.size() == hint1Len && hint1.Right.Inclusive) {
+ hint.Right.Columns.insert(hint.Right.Columns.end(), hint2.Right.Columns.begin(), hint2.Right.Columns.end());
+ hint.Right.Inclusive = hint2.Right.Inclusive;
+ }
+ return hint;
+}
+
+TMaybe<TRangeHint> RangeHintExtend(const TMaybe<TRangeHint>& hint1, size_t hint1Len, const TMaybe<TRangeHint>& hint2) {
+ if (hint1 && hint2) {
+ return RangeHintExtend(*hint1, hint1Len, *hint2);
+ } else {
+ return {};
+ }
+}
+
+bool IsValid(const TRangeBoundHint& left, const TRangeBoundHint& right, bool acceptExclusivePoint = true) {
+ for (size_t i = 0; ; ++i) {
+ if (i >= left.Columns.size() || i >= right.Columns.size()) {
+ // ok, we have +-inf and sure that it's valid
+ return true;
+ }
+ auto cmp = TryCompareColumns(left.Columns[i], right.Columns[i]);
+ if (!cmp) {
+ return false;
+ } else {
+ if (*cmp < 0) {
+ return true;
+ } else if (*cmp > 0) {
+ return false;
+ }
+ }
+ }
+ return acceptExclusivePoint || left.Inclusive || right.Inclusive;
+}
+
+TMaybe<TRangeHint> RangeHintUnion(const TRangeHint& hint1, const TRangeHint& hint2) {
+ if (!IsValid(hint1.Left, hint1.Right) || !IsValid(hint2.Left, hint2.Right)) {
+ return Nothing();
+ }
+
+ auto left = CompareBounds(hint1.Left, hint2.Left, /* min */ true, true);
+ auto right = CompareBounds(hint1.Right, hint2.Right, /* min */ false, false);
+ auto intersection = RangeHintIntersect(hint1, hint2);
+ if (!left || !right || !intersection) {
+ return Nothing();
+ }
+ if (IsValid(intersection->Left, intersection->Right, false)) {
+ return TRangeHint{.Left = std::move(*left), .Right = std::move(*right)};
+ } else {
+ return Nothing();
+ }
+}
+
+TMaybe<TRangeHint> RangeHintUnion(const TMaybe<TRangeHint>& hint1, const TMaybe<TRangeHint>& hint2) {
+ if (hint1 && hint2) {
+ return RangeHintUnion(*hint1, *hint2);
+ } else {
+ return {};
+ }
+}
+
+void TryBuildSingleRangeHint(TExprNode::TPtr range, const TStructExprType& rowType, const TVector<TString>& indexKeys, TIndexRange indexRange, TMaybe<TRangeHint>& hint, TExprContext& ctx) {
+ bool negated;
+ auto op = GetOpFromRange(*range, negated);
+ size_t rangeLen = indexRange.End - indexRange.Begin;
+
+ auto idx = rowType.FindItem(indexKeys[indexRange.Begin]);
+ YQL_ENSURE(idx);
+ const TTypeAnnotationNode* firstKeyType = rowType.GetItems()[*idx]->GetItemType();
+
+ auto isOptional = [&](const TExprNode::TPtr& node) {
+ YQL_ENSURE(node->GetTypeAnn());
+ return node->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Optional || node->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Null;
+ };
+
+ if (op->IsCallable("SqlIn") && !negated) {
+ TCoSqlIn sqlIn(op);
+ auto collection = sqlIn.Collection();
+ if ((collection.Ptr()->IsCallable({"AsList", "AsSet", "Just"}) ||
+ collection.Ptr()->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Tuple) &&
+ GetSqlInCollectionSize(collection.Ptr()) == TMaybe<size_t>(1))
+ {
+ auto item = sqlIn.Collection().Ptr()->Child(0);
+ if (isOptional(item)) {
+ return;
+ }
+
+ hint.ConstructInPlace();
+ hint->Left.Inclusive = hint->Right.Inclusive = true;
+ hint->Left.Columns = hint->Right.Columns = {item};
+ }
+ } else if (op->IsCallable(">") || op->IsCallable(">=")) {
+ YQL_ENSURE(!negated);
+ if (isOptional(op->ChildPtr(1))) {
+ return;
+ }
+
+ hint.ConstructInPlace();
+ hint->Left.Inclusive = op->IsCallable(">=");
+ hint->Right.Inclusive = true;
+ YQL_ENSURE(rangeLen == 1);
+ hint->Left.Columns.push_back(op->ChildPtr(1));
+ } else if (op->IsCallable("<") || op->IsCallable("<=")) {
+ YQL_ENSURE(!negated);
+ if (isOptional(op->ChildPtr(1))) {
+ return;
+ }
+
+ hint.ConstructInPlace();
+ hint->Right.Inclusive = op->IsCallable("<=");
+
+ YQL_ENSURE(rangeLen == 1);
+ hint->Right.Columns.push_back(op->ChildPtr(1));
+
+ if (firstKeyType->GetKind() == ETypeAnnotationKind::Optional) {
+ auto none = Build<TCoNothing>(ctx, op->Pos())
+ .OptionalType(ExpandType(op->Pos(), *firstKeyType, ctx))
+ .Done();
+ hint->Left.Columns.push_back(none.Ptr());
+ hint->Left.Inclusive = false;
+ } else {
+ hint->Left.Inclusive = true;
+ }
+ } else if (op->IsCallable("==")) {
+ YQL_ENSURE(!negated);
+ if (isOptional(op->ChildPtr(1))) {
+ return;
+ }
+
+ hint.ConstructInPlace();
+ hint->Left.Inclusive = hint->Right.Inclusive = true;
+ hint->Left.Columns = hint->Right.Columns = {op->ChildPtr(1)};
+ } else if (op->IsCallable("Exists")) {
+ YQL_ENSURE(rangeLen == 1);
+ hint.ConstructInPlace();
+ auto none = Build<TCoNothing>(ctx, op->Pos())
+ .OptionalType(ExpandType(op->Pos(), *firstKeyType, ctx))
+ .Done();
+ if (negated) {
+ hint->Left.Inclusive = hint->Right.Inclusive = true;
+ hint->Left.Columns.push_back(none.Ptr());
+ hint->Right.Columns.push_back(none.Ptr());
+ } else {
+ hint->Left.Inclusive = false;
+ hint->Left.Columns.push_back(none.Ptr());
+ hint->Right.Inclusive = true;
+ }
+ }
+}
+
TExprNode::TPtr DoBuildMultiColumnComputeNode(const TStructExprType& rowType, const TExprNode::TPtr& range,
const TVector<TString>& indexKeys, const THashMap<TString, size_t>& indexKeysOrder,
TExprNode::TPtr& prunedRange, TIndexRange& resultIndexRange, const TPredicateExtractorSettings& settings,
- size_t usedPrefixLen, TExprContext& ctx)
+ size_t usedPrefixLen, TExprContext& ctx, TMaybe<TRangeHint>& hint)
{
prunedRange = {};
resultIndexRange = {};
@@ -1357,6 +1618,9 @@ TExprNode::TPtr DoBuildMultiColumnComputeNode(const TStructExprType& rowType, co
prunedRange = (rawCols.size() == cols.size()) ?
BuildRestTrue(pos, rowType, ctx) :
RebuildAsRangeRest(rowType, *range, ctx);
+ if (settings.BuildLiteralRange) {
+ TryBuildSingleRangeHint(range, rowType, indexKeys, resultIndexRange, hint, ctx);
+ }
YQL_ENSURE(usedPrefixLen > 0 && usedPrefixLen <= indexKeys.size());
return BuildSingleComputeRange(rowType, *range, indexKeysOrder, settings, indexKeys[usedPrefixLen - 1], ctx);
}
@@ -1389,15 +1653,18 @@ TExprNode::TPtr DoBuildMultiColumnComputeNode(const TStructExprType& rowType, co
for (auto& child : range->ChildrenList()) {
prunedOutput.emplace_back();
TIndexRange childIndexRange;
- output.push_back(DoBuildMultiColumnComputeNode(rowType, child, indexKeys, indexKeysOrder, prunedOutput.back(), childIndexRange, settings, usedPrefixLen, ctx));
+ TMaybe<TRangeHint> childHint;
+ output.push_back(DoBuildMultiColumnComputeNode(rowType, child, indexKeys, indexKeysOrder, prunedOutput.back(), childIndexRange, settings, usedPrefixLen, ctx, childHint));
childIndexRanges.push_back(childIndexRange);
YQL_ENSURE(!childIndexRange.IsEmpty());
if (resultIndexRange.IsEmpty()) {
resultIndexRange = childIndexRange;
+ hint = childHint;
} else {
YQL_ENSURE(childIndexRange.Begin == resultIndexRange.Begin);
resultIndexRange.End = std::max(resultIndexRange.End, childIndexRange.End);
resultIndexRange.PointPrefixLen = std::min(resultIndexRange.PointPrefixLen, childIndexRange.PointPrefixLen);
+ hint = RangeHintUnion(childHint, hint);
}
}
@@ -1414,7 +1681,8 @@ TExprNode::TPtr DoBuildMultiColumnComputeNode(const TStructExprType& rowType, co
for (const auto& child : range->ChildrenList()) {
prunedOutput.emplace_back();
TIndexRange childIndexRange;
- auto compute = DoBuildMultiColumnComputeNode(rowType, child, indexKeys, indexKeysOrder, prunedOutput.back(), childIndexRange, settings, usedPrefixLen, ctx);
+ TMaybe<TRangeHint> childHint;
+ auto compute = DoBuildMultiColumnComputeNode(rowType, child, indexKeys, indexKeysOrder, prunedOutput.back(), childIndexRange, settings, usedPrefixLen, ctx, childHint);
if (!compute) {
continue;
}
@@ -1423,9 +1691,11 @@ TExprNode::TPtr DoBuildMultiColumnComputeNode(const TStructExprType& rowType, co
YQL_ENSURE(!childIndexRange.IsEmpty());
if (resultIndexRange.IsEmpty()) {
resultIndexRange = childIndexRange;
+ hint = childHint;
} else {
if (childIndexRange.Begin != resultIndexRange.Begin) {
YQL_ENSURE(childIndexRange.Begin == resultIndexRange.End);
+ hint = RangeHintExtend(hint, resultIndexRange.End - resultIndexRange.Begin, childHint);
needAlign = false;
if (!resultIndexRange.IsPoint()) {
prunedOutput.back() = RebuildAsRangeRest(rowType, *child, ctx);
@@ -1434,6 +1704,7 @@ TExprNode::TPtr DoBuildMultiColumnComputeNode(const TStructExprType& rowType, co
}
} else {
resultIndexRange.PointPrefixLen = std::max(resultIndexRange.PointPrefixLen, childIndexRange.PointPrefixLen);
+ hint = RangeHintIntersect(hint, childHint);
}
resultIndexRange.End = std::max(resultIndexRange.End, childIndexRange.End);
}
@@ -1456,7 +1727,7 @@ TExprNode::TPtr DoBuildMultiColumnComputeNode(const TStructExprType& rowType, co
prunedOutput.erase(
std::remove_if(prunedOutput.begin(), prunedOutput.end(), [](const auto& pruned) { return IsRestTrue(*pruned); }),
prunedOutput.end()
- );
+ );
if (prunedOutput.empty()) {
prunedRange = BuildRestTrue(pos, rowType, ctx);
@@ -1469,12 +1740,70 @@ TExprNode::TPtr DoBuildMultiColumnComputeNode(const TStructExprType& rowType, co
return ctx.NewCallable(pos, range->IsCallable("RangeOr") ? "RangeUnion" : "RangeIntersect", std::move(output));
}
+void NormalizeRangeHint(TMaybe<TRangeHint>& hint, const TVector<TString>& indexKeys, const TStructExprType& rowType, TExprContext& ctx, TTypeAnnotationContext& types) {
+ if (!hint) {
+ return;
+ }
+
+ auto normTypes = [&] (TRangeBoundHint& hint) {
+ for (size_t i = 0; i < hint.Columns.size(); ++i) {
+ auto idx = rowType.FindItem(indexKeys[i]);
+ YQL_ENSURE(idx);
+ const TTypeAnnotationNode* columnType = rowType.GetItems()[*idx]->GetItemType();
+ const TTypeAnnotationNode* unwrapOptional = columnType;
+
+ if (columnType->GetKind() == ETypeAnnotationKind::Optional) {
+ unwrapOptional = columnType->Cast<TOptionalExprType>()->GetItemType();
+ }
+
+ TTransformationPipeline pipeline(&types);
+ pipeline.AddServiceTransformers();
+ pipeline.AddTypeAnnotationTransformer();
+ pipeline.Add(CreateFunctorTransformer(
+ [&](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) -> IGraphTransformer::TStatus {
+ output = input;
+
+ auto status = TrySilentConvertTo(output, *unwrapOptional, ctx);
+ if (status == IGraphTransformer::TStatus::Error) {
+ output = input;
+ status = TrySilentConvertTo(output, *columnType, ctx);
+ }
+
+ if (status == IGraphTransformer::TStatus::Repeat) {
+ status.HasRestart = 1;
+ }
+ return status;
+ }
+ ), "ExtractPredicate", TIssuesIds::CORE_EXEC);
+
+ auto transformer = pipeline.BuildWithNoArgChecks(true);
+
+ for (;;) {
+ auto status = InstantTransform(*transformer, hint.Columns[i], ctx, true);
+ if (status == IGraphTransformer::TStatus::Ok) {
+ break;
+ }
+ if (status == IGraphTransformer::TStatus::Error) {
+ return false;
+ }
+ }
+ }
+ return true;
+ };
+
+ if (!normTypes(hint->Left) || !normTypes(hint->Right)) {
+ hint.Clear();
+ }
+}
+
TExprNode::TPtr BuildMultiColumnComputeNode(const TStructExprType& rowType, const TExprNode::TPtr& range,
const TVector<TString>& indexKeys, const THashMap<TString, size_t>& indexKeysOrder,
- TExprNode::TPtr& prunedRange, const TPredicateExtractorSettings& settings, size_t usedPrefixLen, size_t& pointPrefixLen, TExprContext& ctx)
+ TExprNode::TPtr& prunedRange, const TPredicateExtractorSettings& settings, size_t usedPrefixLen, size_t& pointPrefixLen,
+ TExprContext& ctx, TTypeAnnotationContext& types, TMaybe<TRangeHint>& resultHint)
{
TIndexRange resultIndexRange;
- auto result = DoBuildMultiColumnComputeNode(rowType, range, indexKeys, indexKeysOrder, prunedRange, resultIndexRange, settings, usedPrefixLen, ctx);
+ auto result = DoBuildMultiColumnComputeNode(rowType, range, indexKeys, indexKeysOrder, prunedRange, resultIndexRange, settings, usedPrefixLen, ctx, resultHint);
+ NormalizeRangeHint(resultHint, indexKeys, rowType, ctx, types);
pointPrefixLen = resultIndexRange.PointPrefixLen;
YQL_ENSURE(pointPrefixLen <= usedPrefixLen);
YQL_ENSURE(prunedRange);
@@ -1505,6 +1834,62 @@ TExprNode::TPtr BuildMultiColumnComputeNode(const TStructExprType& rowType, cons
return result;
}
+NYql::NNodes::TExprBase UnpackRangePoints(NYql::NNodes::TExprBase node, TConstArrayRef<TString> keyColumns, NYql::TExprContext& expCtx, NYql::TPositionHandle pos) {
+ TCoArgument rangeArg = Build<TCoArgument>(expCtx, pos)
+ .Name("rangeArg")
+ .Done();
+
+ TVector<TExprBase> structMembers;
+ structMembers.reserve(keyColumns.size());
+ for (size_t i = 0; i < keyColumns.size(); ++i) {
+ auto kth = [&] (size_t k) {
+ return Build<TCoUnwrap>(expCtx, pos)
+ .Optional<TCoNth>()
+ .Tuple<TCoNth>()
+ .Tuple(rangeArg)
+ .Index().Build(k)
+ .Build()
+ .Index().Build(i)
+ .Build()
+ .Done();
+ };
+
+ auto first = kth(0);
+ auto second = kth(1);
+
+ auto member = Build<TCoNameValueTuple>(expCtx, pos)
+ .Name().Build(keyColumns[i])
+ .Value<TCoEnsure>()
+ .Value(first)
+ .Message<TCoString>().Literal().Build("invalid range bounds").Build()
+ .Predicate<TCoOr>()
+ .Add<TCoCmpEqual>()
+ .Left(first)
+ .Right(second)
+ .Build()
+ .Add<TCoAnd>()
+ .Add<TCoNot>().Value<TCoExists>().Optional(first).Build().Build()
+ .Add<TCoNot>().Value<TCoExists>().Optional(second).Build().Build()
+ .Build()
+ .Build()
+ .Build()
+ .Done();
+
+ structMembers.push_back(member);
+ }
+
+
+ return Build<TCoMap>(expCtx, pos)
+ .Input(node)
+ .Lambda()
+ .Args({rangeArg})
+ .Body<TCoAsStruct>()
+ .Add(structMembers)
+ .Build()
+ .Build()
+ .Done();
+}
+
} // namespace
@@ -1570,7 +1955,7 @@ bool TPredicateRangeExtractor::Prepare(const TExprNode::TPtr& filterLambdaNode,
}
TPredicateRangeExtractor::TBuildResult TPredicateRangeExtractor::BuildComputeNode(const TVector<TString>& indexKeys,
- TExprContext& ctx) const
+ TExprContext& ctx, TTypeAnnotationContext& typesCtx) const
{
YQL_ENSURE(FilterLambda && Range && RowType, "Prepare() is not called");
@@ -1608,11 +1993,11 @@ TPredicateRangeExtractor::TBuildResult TPredicateRangeExtractor::BuildComputeNod
TExprNode::TPtr rebuiltRange = RebuildRangeForIndexKeys(*RowType, Range, indexKeysOrder, result.UsedPrefixLen, ctx);
TExprNode::TPtr prunedRange;
result.ComputeNode = BuildMultiColumnComputeNode(*RowType, rebuiltRange, effectiveIndexKeys, indexKeysOrder,
- prunedRange, Settings, result.UsedPrefixLen, result.PointPrefixLen, ctx);
+ prunedRange, Settings, result.UsedPrefixLen, result.PointPrefixLen, ctx, typesCtx, result.LiteralRange);
+
if (result.ComputeNode) {
result.ExpectedMaxRanges = CalcMaxRanges(rebuiltRange, indexKeysOrder);
if (result.ExpectedMaxRanges && *result.ExpectedMaxRanges < Settings.MaxRanges) {
- // rebuild filter lambda with prunedRange predicate
TCoLambda lambda(result.PrunedLambda);
auto newPred = MakePredicateFromPrunedRange(prunedRange, lambda.Args().Arg(0).Ptr(), ctx);
@@ -1635,4 +2020,9 @@ IPredicateRangeExtractor::TPtr MakePredicateRangeExtractor(const TPredicateExtra
return MakeHolder<NDetail::TPredicateRangeExtractor>(settings);
}
+
+TExprNode::TPtr BuildPointsList(const IPredicateRangeExtractor::TBuildResult& result, TConstArrayRef<TString> keyColumns, NYql::TExprContext& expCtx) {
+ return NDetail::UnpackRangePoints(NNodes::TExprBase(result.ComputeNode), keyColumns, expCtx, result.ComputeNode->Pos()).Ptr();
+}
+
} // namespace NYql
diff --git a/ydb/library/yql/core/extract_predicate/extract_predicate_impl.h b/ydb/library/yql/core/extract_predicate/extract_predicate_impl.h
index 8e2d693f722..68524c25d62 100644
--- a/ydb/library/yql/core/extract_predicate/extract_predicate_impl.h
+++ b/ydb/library/yql/core/extract_predicate/extract_predicate_impl.h
@@ -17,7 +17,7 @@ public:
return Range;
}
- TBuildResult BuildComputeNode(const TVector<TString>& indexKeys, TExprContext& ctx) const override final;
+ TBuildResult BuildComputeNode(const TVector<TString>& indexKeys, TExprContext& ctx, TTypeAnnotationContext& typesCtx) const override final;
private:
const TPredicateExtractorSettings Settings;
TExprNode::TPtr FilterLambda;
diff --git a/ydb/tests/functional/suite_tests/canondata/test_sql_logic.TestSQLLogic.test_sql_suite_plan-select2-4.test_/query_100.plan b/ydb/tests/functional/suite_tests/canondata/test_sql_logic.TestSQLLogic.test_sql_suite_plan-select2-4.test_/query_100.plan
index 830f2d7cf40..c6b7cadafb9 100644
--- a/ydb/tests/functional/suite_tests/canondata/test_sql_logic.TestSQLLogic.test_sql_suite_plan-select2-4.test_/query_100.plan
+++ b/ydb/tests/functional/suite_tests/canondata/test_sql_logic.TestSQLLogic.test_sql_suite_plan-select2-4.test_/query_100.plan
@@ -42,7 +42,7 @@
"b",
"d"
],
- "ReadRangesExpectedSize": "1",
+ "ReadRangesExpectedSize": 1,
"ReadRangesKeys": [
"a"
],