diff options
author | ssmike <ssmike@ydb.tech> | 2023-09-27 11:53:22 +0300 |
---|---|---|
committer | ssmike <ssmike@ydb.tech> | 2023-09-27 12:52:22 +0300 |
commit | 8ab67a498f8ce3c89b63b3683c039b52a6de8848 (patch) | |
tree | 5eccba5151c3dc2ffad27a8e82d167b512d63d76 | |
parent | bbe7025e8e7dd949d7a5d78dd0e794ce6d161033 (diff) | |
download | ydb-8ab67a498f8ce3c89b63b3683c039b52a6de8848.tar.gz |
Improve predicate conjunction handling
3 files changed, 151 insertions, 35 deletions
diff --git a/ydb/core/kqp/ut/opt/kqp_ne_ut.cpp b/ydb/core/kqp/ut/opt/kqp_ne_ut.cpp index d03aa1f721b..421bd3aa411 100644 --- a/ydb/core/kqp/ut/opt/kqp_ne_ut.cpp +++ b/ydb/core/kqp/ut/opt/kqp_ne_ut.cpp @@ -3919,10 +3919,6 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { Y_UNIT_TEST_TWIN(ComplexLookupLimit, NewPredicateExtract) { - if (NewPredicateExtract) { - return; - } - TKikimrSettings settings; NKikimrConfig::TAppConfig appConfig; appConfig.MutableTableServiceConfig()->SetPredicateExtract20(NewPredicateExtract); diff --git a/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp b/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp index 1b09e09974a..f38379da18e 100644 --- a/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp +++ b/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp @@ -1207,48 +1207,72 @@ TExprNode::TPtr DoRebuildRangeForIndexKeys(const TStructExprType& rowType, const } TVector<TNodeAndIndexRange> rests; - TMap<TIndexRange, TVector<TNodeAndIndexRange>> children; + Sort(toRebuild.begin(), toRebuild.end(), + [&](const auto& fs, const auto& sc) { + return fs.IndexRange < sc.IndexRange; + }); + TMap<size_t, TVector<TNodeAndIndexRange>> byBegin; + if (!toRebuild.empty()) { + byBegin[toRebuild[0].IndexRange.Begin] = {}; + } + for (auto& current : toRebuild) { if (current.IndexRange.IsEmpty()) { YQL_ENSURE(current.Node->IsCallable("RangeRest")); rests.emplace_back(std::move(current)); } else { - children[current.IndexRange].push_back(std::move(current)); + if (byBegin.contains(current.IndexRange.Begin)) { + byBegin[current.IndexRange.Begin].push_back(current); + byBegin[current.IndexRange.End] = {}; + } else { + rests.push_back(std::move(current)); + } } } - TVector<TVector<TNodeAndIndexRange>> childrenChains; - for (auto it = children.begin(); it != children.end(); ++it) { - if (!commonIndexRange) { - commonIndexRange = it->first; - childrenChains.emplace_back(std::move(it->second)); - continue; - } - if (commonIndexRange->Begin == it->first.Begin) { - YQL_ENSURE(it->first.End > commonIndexRange->End); - for (auto& asRest : childrenChains) { - rests.insert(rests.end(), asRest.begin(), asRest.end()); + TMap<size_t, TNodeAndIndexRange> builtRange; + for (auto it = byBegin.rbegin(); it != byBegin.rend(); ++it) { + size_t end = 0; + size_t indexRangeEnd = 0; + TVector<TExprNode::TPtr> results; + TVector<TExprNode::TPtr> currents; + auto flush = [&] () { + if (!currents) { + return; } - childrenChains.clear(); - childrenChains.push_back(std::move(it->second)); - commonIndexRange = it->first; - continue; - } - if (commonIndexRange->End == it->first.Begin) { - commonIndexRange->End = it->first.End; - childrenChains.push_back(std::move(it->second)); - } else { - rests.insert(rests.end(), it->second.begin(), it->second.end()); + TExprNode::TPtr toAdd = MakeRangeAnd(range->Pos(), std::move(currents), ctx); + if (auto ptr = builtRange.FindPtr(end)) { + toAdd = MakeRangeAnd(range->Pos(), { toAdd, ptr->Node }, ctx); + indexRangeEnd = std::max(indexRangeEnd, ptr->IndexRange.End); + } + results.push_back(toAdd); + + indexRangeEnd = std::max(end, indexRangeEnd); + currents = {}; + }; + for (auto node : it->second) { + if (end != node.IndexRange.End) { + flush(); + end = node.IndexRange.End; + } + currents.push_back(node.Node); + } + if (currents) { + flush(); + } + if (results) { + auto& built = builtRange[it->first]; + built.Node = MakeRangeAnd(range->Pos(), std::move(results), ctx); + built.IndexRange.Begin = it->first; + built.IndexRange.End = indexRangeEnd; } } - for (auto& chain : childrenChains) { - TExprNodeList chainNodes; - for (auto& entry : chain) { - chainNodes.push_back(entry.Node); - } - rebuilt.push_back(MakeRangeAnd(range->Pos(), std::move(chainNodes), ctx)); + if (builtRange) { + auto& first = *builtRange.begin(); + rebuilt.push_back(first.second.Node); + commonIndexRange = first.second.IndexRange; } if (!rests.empty()) { @@ -1264,7 +1288,7 @@ TExprNode::TPtr DoRebuildRangeForIndexKeys(const TStructExprType& rowType, const } } - TExprNode::TPtr result = ctx.ChangeChildren(*range, std::move(rebuilt)); + TExprNode::TPtr result = rebuilt.size() == 1 ? rebuilt[0] : ctx.ChangeChildren(*range, std::move(rebuilt)); if (commonIndexRange) { resultIndexRange = *commonIndexRange; } else { @@ -2018,6 +2042,7 @@ bool TPredicateRangeExtractor::Prepare(const TExprNode::TPtr& filterLambdaNode, } return node; }, ctx, settings); + return status == IGraphTransformer::TStatus::Ok; } diff --git a/ydb/library/yql/core/extract_predicate/ut/extract_predicate_ut.cpp b/ydb/library/yql/core/extract_predicate/ut/extract_predicate_ut.cpp index 9c83da1568c..acc641da97f 100644 --- a/ydb/library/yql/core/extract_predicate/ut/extract_predicate_ut.cpp +++ b/ydb/library/yql/core/extract_predicate/ut/extract_predicate_ut.cpp @@ -587,7 +587,7 @@ Y_UNIT_TEST_SUITE(TYqlExtractPredicate) { "(let $4 (RangeFor '>= (Int32 '0) $2))\n" "(let $5 (RangeFor '< (Int32 '\"-10\") $2))\n" "(let $6 '((Nothing (OptionalType $2)) (Int32 '0)))\n" - "(return (RangeFinalize (RangeMultiply (Uint64 '10000) (RangeUnion (RangeMultiply (Uint64 '10000) (RangeUnion (RangeIntersect (RangeIntersect $3 $4)) $5) (AsRange '($6 $6)))))))\n" + "(return (RangeFinalize (RangeMultiply (Uint64 '10000) (RangeUnion (RangeMultiply (Uint64 '10000) (RangeUnion (RangeIntersect $3 $4) $5) (AsRange '($6 $6)))))))\n" ")\n"; TExprContext exprCtx; @@ -971,6 +971,101 @@ Y_UNIT_TEST_SUITE(TYqlExtractPredicate) { UNIT_ASSERT_EQUAL(lambda, canonicalLambda); } + + Y_UNIT_TEST(LookupAnd) { + TString prog = GetNonOptionsSrc4() + + "insert into Output with truncate\n" + "select * from as_table($src) where (x, y) > (1, 2) and x in [0, 1, 2, 3];"; + + TExprContext exprCtx; + TTypeAnnotationContextPtr typesCtx; + TExprNode::TPtr exprRoot = ParseAndOptimize(prog, exprCtx, typesCtx); + TExprNode::TPtr filterLambda = LocateFilterLambda(exprRoot); + + THashSet<TString> usedColumns; + using NDetail::TPredicateRangeExtractor; + + auto extractor = MakePredicateRangeExtractor({}); + UNIT_ASSERT(extractor->Prepare(filterLambda, *filterLambda->Head().Head().GetTypeAnn(), usedColumns, exprCtx, *typesCtx)); + + auto buildResult = extractor->BuildComputeNode({ "x", "y" }, exprCtx, *typesCtx); + UNIT_ASSERT(buildResult.ComputeNode); + + auto canonicalRanges = + "(\n" + "(let $1 (Int32 '1))\n" + "(let $2 (Int32 '\"2\"))\n" + "(let $3 (DataType 'Int32))\n" + "(let $4 (OptionalType $3))\n" + "(let $5 (IfPresent (Map (Just (AsList (Int32 '0) $1 $2 (Int32 '\"3\"))) (lambda '($13) $13)) (lambda '($14) (block '(\n" + " (let $15 (Collect (Take (FlatMap $14 (lambda '($17) (block '(\n" + " (let $18 (RangeFor '== $17 $3))\n" + " (return (RangeMultiply (Uint64 '10000) $18))\n" + " )))) (Uint64 '10001))))\n" + " (let $16 '((Nothing $4) (Int32 '0)))\n" + " (return (IfStrict (> (Length $15) (Uint64 '10000)) (AsRange '($16 $16)) (RangeUnion $15)))\n" + "))) (RangeEmpty $3)))\n" + "(let $6 '((Nothing $4) (Int32 '0)))\n" + "(let $7 (RangeMultiply (Uint64 '10000) $5 (AsRange '($6 $6))))\n" + "(let $8 (RangeFor '> $1 $3))\n" + "(let $9 '((Nothing $4) (Int32 '0)))\n" + "(let $10 (RangeMultiply (Uint64 '10000) $8 (AsRange '($9 $9))))\n" + "(let $11 (RangeFor '== $1 $3))\n" + "(let $12 (RangeFor '> $2 $3))\n" + "(return (RangeFinalize (RangeMultiply (Uint64 '10000) (RangeUnion (RangeIntersect $7 (RangeUnion $10 (RangeIntersect (RangeMultiply (Uint64 '10000) $11 $12))))))))\n" + ")\n" + ; + auto ranges = DumpNode(*buildResult.ComputeNode, exprCtx); + Cerr << ranges; + + auto canonicalLambda = + "(\n" + "(return (lambda '($1) (OptionalIf (Bool 'true) $1)))\n" + ")\n"; + auto lambda = DumpNode(*buildResult.PrunedLambda, exprCtx); + Cerr << lambda; + + UNIT_ASSERT_EQUAL(ranges, canonicalRanges); + UNIT_ASSERT_EQUAL(lambda, canonicalLambda); + + TString expectedExecuteResult = R"__({"Write"=[{"Data"=[[[["1"];["2"];"0"];[["1"];#;"1"]];[[["2"];#;"1"];[["2"];#;"1"]];[[["3"];#;"1"];[["3"];#;"1"]]]}]})__"; + ExecuteAndCheckRanges(ranges, expectedExecuteResult); + } + + Y_UNIT_TEST(MixedAnd) { + TString prog = GetNonOptionsSrc4() + + "insert into Output with truncate\n" + "select * from as_table($src) where (x in [1, 2]) and (x, y) in [(2, 3), (3, 4)] and (z, t) < (4, 5) and (y, z, t) > (0,0,0);"; + + TExprContext exprCtx; + TTypeAnnotationContextPtr typesCtx; + TExprNode::TPtr exprRoot = ParseAndOptimize(prog, exprCtx, typesCtx); + TExprNode::TPtr filterLambda = LocateFilterLambda(exprRoot); + + THashSet<TString> usedColumns; + using NDetail::TPredicateRangeExtractor; + + auto extractor = MakePredicateRangeExtractor({}); + UNIT_ASSERT(extractor->Prepare(filterLambda, *filterLambda->Head().Head().GetTypeAnn(), usedColumns, exprCtx, *typesCtx)); + + auto buildResult = extractor->BuildComputeNode({ "x", "y", "z", "t" }, exprCtx, *typesCtx); + UNIT_ASSERT(buildResult.ComputeNode); + + auto ranges = DumpNode(*buildResult.ComputeNode, exprCtx); + Cerr << ranges; + + auto canonicalLambda = + "(\n" + "(return (lambda '($1) (OptionalIf (Bool 'true) $1)))\n" + ")\n"; + auto lambda = DumpNode(*buildResult.PrunedLambda, exprCtx); + Cerr << lambda; + + TString expectedExecuteResult = R"__({"Write"=[{"Data"=[[[["2"];["3"];#;#;"1"];[["2"];["3"];["4"];["5"];"0"]]]}]})__"; + ExecuteAndCheckRanges(ranges, expectedExecuteResult); + + UNIT_ASSERT_EQUAL(lambda, canonicalLambda); + } } } // namespace NYql |