aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorssmike <ssmike@ydb.tech>2023-09-27 11:53:22 +0300
committerssmike <ssmike@ydb.tech>2023-09-27 12:52:22 +0300
commit8ab67a498f8ce3c89b63b3683c039b52a6de8848 (patch)
tree5eccba5151c3dc2ffad27a8e82d167b512d63d76
parentbbe7025e8e7dd949d7a5d78dd0e794ce6d161033 (diff)
downloadydb-8ab67a498f8ce3c89b63b3683c039b52a6de8848.tar.gz
Improve predicate conjunction handling
-rw-r--r--ydb/core/kqp/ut/opt/kqp_ne_ut.cpp4
-rw-r--r--ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp85
-rw-r--r--ydb/library/yql/core/extract_predicate/ut/extract_predicate_ut.cpp97
3 files changed, 151 insertions, 35 deletions
diff --git a/ydb/core/kqp/ut/opt/kqp_ne_ut.cpp b/ydb/core/kqp/ut/opt/kqp_ne_ut.cpp
index d03aa1f721b..421bd3aa411 100644
--- a/ydb/core/kqp/ut/opt/kqp_ne_ut.cpp
+++ b/ydb/core/kqp/ut/opt/kqp_ne_ut.cpp
@@ -3919,10 +3919,6 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) {
Y_UNIT_TEST_TWIN(ComplexLookupLimit, NewPredicateExtract) {
- if (NewPredicateExtract) {
- return;
- }
-
TKikimrSettings settings;
NKikimrConfig::TAppConfig appConfig;
appConfig.MutableTableServiceConfig()->SetPredicateExtract20(NewPredicateExtract);
diff --git a/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp b/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp
index 1b09e09974a..f38379da18e 100644
--- a/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp
+++ b/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp
@@ -1207,48 +1207,72 @@ TExprNode::TPtr DoRebuildRangeForIndexKeys(const TStructExprType& rowType, const
}
TVector<TNodeAndIndexRange> rests;
- TMap<TIndexRange, TVector<TNodeAndIndexRange>> children;
+ Sort(toRebuild.begin(), toRebuild.end(),
+ [&](const auto& fs, const auto& sc) {
+ return fs.IndexRange < sc.IndexRange;
+ });
+ TMap<size_t, TVector<TNodeAndIndexRange>> byBegin;
+ if (!toRebuild.empty()) {
+ byBegin[toRebuild[0].IndexRange.Begin] = {};
+ }
+
for (auto& current : toRebuild) {
if (current.IndexRange.IsEmpty()) {
YQL_ENSURE(current.Node->IsCallable("RangeRest"));
rests.emplace_back(std::move(current));
} else {
- children[current.IndexRange].push_back(std::move(current));
+ if (byBegin.contains(current.IndexRange.Begin)) {
+ byBegin[current.IndexRange.Begin].push_back(current);
+ byBegin[current.IndexRange.End] = {};
+ } else {
+ rests.push_back(std::move(current));
+ }
}
}
- TVector<TVector<TNodeAndIndexRange>> childrenChains;
- for (auto it = children.begin(); it != children.end(); ++it) {
- if (!commonIndexRange) {
- commonIndexRange = it->first;
- childrenChains.emplace_back(std::move(it->second));
- continue;
- }
- if (commonIndexRange->Begin == it->first.Begin) {
- YQL_ENSURE(it->first.End > commonIndexRange->End);
- for (auto& asRest : childrenChains) {
- rests.insert(rests.end(), asRest.begin(), asRest.end());
+ TMap<size_t, TNodeAndIndexRange> builtRange;
+ for (auto it = byBegin.rbegin(); it != byBegin.rend(); ++it) {
+ size_t end = 0;
+ size_t indexRangeEnd = 0;
+ TVector<TExprNode::TPtr> results;
+ TVector<TExprNode::TPtr> currents;
+ auto flush = [&] () {
+ if (!currents) {
+ return;
}
- childrenChains.clear();
- childrenChains.push_back(std::move(it->second));
- commonIndexRange = it->first;
- continue;
- }
- if (commonIndexRange->End == it->first.Begin) {
- commonIndexRange->End = it->first.End;
- childrenChains.push_back(std::move(it->second));
- } else {
- rests.insert(rests.end(), it->second.begin(), it->second.end());
+ TExprNode::TPtr toAdd = MakeRangeAnd(range->Pos(), std::move(currents), ctx);
+ if (auto ptr = builtRange.FindPtr(end)) {
+ toAdd = MakeRangeAnd(range->Pos(), { toAdd, ptr->Node }, ctx);
+ indexRangeEnd = std::max(indexRangeEnd, ptr->IndexRange.End);
+ }
+ results.push_back(toAdd);
+
+ indexRangeEnd = std::max(end, indexRangeEnd);
+ currents = {};
+ };
+ for (auto node : it->second) {
+ if (end != node.IndexRange.End) {
+ flush();
+ end = node.IndexRange.End;
+ }
+ currents.push_back(node.Node);
+ }
+ if (currents) {
+ flush();
+ }
+ if (results) {
+ auto& built = builtRange[it->first];
+ built.Node = MakeRangeAnd(range->Pos(), std::move(results), ctx);
+ built.IndexRange.Begin = it->first;
+ built.IndexRange.End = indexRangeEnd;
}
}
- for (auto& chain : childrenChains) {
- TExprNodeList chainNodes;
- for (auto& entry : chain) {
- chainNodes.push_back(entry.Node);
- }
- rebuilt.push_back(MakeRangeAnd(range->Pos(), std::move(chainNodes), ctx));
+ if (builtRange) {
+ auto& first = *builtRange.begin();
+ rebuilt.push_back(first.second.Node);
+ commonIndexRange = first.second.IndexRange;
}
if (!rests.empty()) {
@@ -1264,7 +1288,7 @@ TExprNode::TPtr DoRebuildRangeForIndexKeys(const TStructExprType& rowType, const
}
}
- TExprNode::TPtr result = ctx.ChangeChildren(*range, std::move(rebuilt));
+ TExprNode::TPtr result = rebuilt.size() == 1 ? rebuilt[0] : ctx.ChangeChildren(*range, std::move(rebuilt));
if (commonIndexRange) {
resultIndexRange = *commonIndexRange;
} else {
@@ -2018,6 +2042,7 @@ bool TPredicateRangeExtractor::Prepare(const TExprNode::TPtr& filterLambdaNode,
}
return node;
}, ctx, settings);
+
return status == IGraphTransformer::TStatus::Ok;
}
diff --git a/ydb/library/yql/core/extract_predicate/ut/extract_predicate_ut.cpp b/ydb/library/yql/core/extract_predicate/ut/extract_predicate_ut.cpp
index 9c83da1568c..acc641da97f 100644
--- a/ydb/library/yql/core/extract_predicate/ut/extract_predicate_ut.cpp
+++ b/ydb/library/yql/core/extract_predicate/ut/extract_predicate_ut.cpp
@@ -587,7 +587,7 @@ Y_UNIT_TEST_SUITE(TYqlExtractPredicate) {
"(let $4 (RangeFor '>= (Int32 '0) $2))\n"
"(let $5 (RangeFor '< (Int32 '\"-10\") $2))\n"
"(let $6 '((Nothing (OptionalType $2)) (Int32 '0)))\n"
- "(return (RangeFinalize (RangeMultiply (Uint64 '10000) (RangeUnion (RangeMultiply (Uint64 '10000) (RangeUnion (RangeIntersect (RangeIntersect $3 $4)) $5) (AsRange '($6 $6)))))))\n"
+ "(return (RangeFinalize (RangeMultiply (Uint64 '10000) (RangeUnion (RangeMultiply (Uint64 '10000) (RangeUnion (RangeIntersect $3 $4) $5) (AsRange '($6 $6)))))))\n"
")\n";
TExprContext exprCtx;
@@ -971,6 +971,101 @@ Y_UNIT_TEST_SUITE(TYqlExtractPredicate) {
UNIT_ASSERT_EQUAL(lambda, canonicalLambda);
}
+
+ Y_UNIT_TEST(LookupAnd) {
+ TString prog = GetNonOptionsSrc4() +
+ "insert into Output with truncate\n"
+ "select * from as_table($src) where (x, y) > (1, 2) and x in [0, 1, 2, 3];";
+
+ TExprContext exprCtx;
+ TTypeAnnotationContextPtr typesCtx;
+ TExprNode::TPtr exprRoot = ParseAndOptimize(prog, exprCtx, typesCtx);
+ TExprNode::TPtr filterLambda = LocateFilterLambda(exprRoot);
+
+ THashSet<TString> usedColumns;
+ using NDetail::TPredicateRangeExtractor;
+
+ auto extractor = MakePredicateRangeExtractor({});
+ UNIT_ASSERT(extractor->Prepare(filterLambda, *filterLambda->Head().Head().GetTypeAnn(), usedColumns, exprCtx, *typesCtx));
+
+ auto buildResult = extractor->BuildComputeNode({ "x", "y" }, exprCtx, *typesCtx);
+ UNIT_ASSERT(buildResult.ComputeNode);
+
+ auto canonicalRanges =
+ "(\n"
+ "(let $1 (Int32 '1))\n"
+ "(let $2 (Int32 '\"2\"))\n"
+ "(let $3 (DataType 'Int32))\n"
+ "(let $4 (OptionalType $3))\n"
+ "(let $5 (IfPresent (Map (Just (AsList (Int32 '0) $1 $2 (Int32 '\"3\"))) (lambda '($13) $13)) (lambda '($14) (block '(\n"
+ " (let $15 (Collect (Take (FlatMap $14 (lambda '($17) (block '(\n"
+ " (let $18 (RangeFor '== $17 $3))\n"
+ " (return (RangeMultiply (Uint64 '10000) $18))\n"
+ " )))) (Uint64 '10001))))\n"
+ " (let $16 '((Nothing $4) (Int32 '0)))\n"
+ " (return (IfStrict (> (Length $15) (Uint64 '10000)) (AsRange '($16 $16)) (RangeUnion $15)))\n"
+ "))) (RangeEmpty $3)))\n"
+ "(let $6 '((Nothing $4) (Int32 '0)))\n"
+ "(let $7 (RangeMultiply (Uint64 '10000) $5 (AsRange '($6 $6))))\n"
+ "(let $8 (RangeFor '> $1 $3))\n"
+ "(let $9 '((Nothing $4) (Int32 '0)))\n"
+ "(let $10 (RangeMultiply (Uint64 '10000) $8 (AsRange '($9 $9))))\n"
+ "(let $11 (RangeFor '== $1 $3))\n"
+ "(let $12 (RangeFor '> $2 $3))\n"
+ "(return (RangeFinalize (RangeMultiply (Uint64 '10000) (RangeUnion (RangeIntersect $7 (RangeUnion $10 (RangeIntersect (RangeMultiply (Uint64 '10000) $11 $12))))))))\n"
+ ")\n"
+ ;
+ auto ranges = DumpNode(*buildResult.ComputeNode, exprCtx);
+ Cerr << ranges;
+
+ auto canonicalLambda =
+ "(\n"
+ "(return (lambda '($1) (OptionalIf (Bool 'true) $1)))\n"
+ ")\n";
+ auto lambda = DumpNode(*buildResult.PrunedLambda, exprCtx);
+ Cerr << lambda;
+
+ UNIT_ASSERT_EQUAL(ranges, canonicalRanges);
+ UNIT_ASSERT_EQUAL(lambda, canonicalLambda);
+
+ TString expectedExecuteResult = R"__({"Write"=[{"Data"=[[[["1"];["2"];"0"];[["1"];#;"1"]];[[["2"];#;"1"];[["2"];#;"1"]];[[["3"];#;"1"];[["3"];#;"1"]]]}]})__";
+ ExecuteAndCheckRanges(ranges, expectedExecuteResult);
+ }
+
+ Y_UNIT_TEST(MixedAnd) {
+ TString prog = GetNonOptionsSrc4() +
+ "insert into Output with truncate\n"
+ "select * from as_table($src) where (x in [1, 2]) and (x, y) in [(2, 3), (3, 4)] and (z, t) < (4, 5) and (y, z, t) > (0,0,0);";
+
+ TExprContext exprCtx;
+ TTypeAnnotationContextPtr typesCtx;
+ TExprNode::TPtr exprRoot = ParseAndOptimize(prog, exprCtx, typesCtx);
+ TExprNode::TPtr filterLambda = LocateFilterLambda(exprRoot);
+
+ THashSet<TString> usedColumns;
+ using NDetail::TPredicateRangeExtractor;
+
+ auto extractor = MakePredicateRangeExtractor({});
+ UNIT_ASSERT(extractor->Prepare(filterLambda, *filterLambda->Head().Head().GetTypeAnn(), usedColumns, exprCtx, *typesCtx));
+
+ auto buildResult = extractor->BuildComputeNode({ "x", "y", "z", "t" }, exprCtx, *typesCtx);
+ UNIT_ASSERT(buildResult.ComputeNode);
+
+ auto ranges = DumpNode(*buildResult.ComputeNode, exprCtx);
+ Cerr << ranges;
+
+ auto canonicalLambda =
+ "(\n"
+ "(return (lambda '($1) (OptionalIf (Bool 'true) $1)))\n"
+ ")\n";
+ auto lambda = DumpNode(*buildResult.PrunedLambda, exprCtx);
+ Cerr << lambda;
+
+ TString expectedExecuteResult = R"__({"Write"=[{"Data"=[[[["2"];["3"];#;#;"1"];[["2"];["3"];["4"];["5"];"0"]]]}]})__";
+ ExecuteAndCheckRanges(ranges, expectedExecuteResult);
+
+ UNIT_ASSERT_EQUAL(lambda, canonicalLambda);
+ }
}
} // namespace NYql