diff options
author | avevad <avevad@yandex-team.com> | 2023-11-23 09:33:31 +0300 |
---|---|---|
committer | avevad <avevad@yandex-team.com> | 2023-11-23 09:55:22 +0300 |
commit | 0e69bf615395fdd48ecee032faaec81bc468b0b8 (patch) | |
tree | 786d4190f7afb246e2286e12c5fb2c68a996e35f | |
parent | be2dd2d37b3fbccbba3bcf6bc6fc2d9f535d8da5 (diff) | |
download | ydb-0e69bf615395fdd48ecee032faaec81bc468b0b8.tar.gz |
YQL-16834 Add optional parameter of MATCH_RECOGNIZE pattern to optimize matching of unused variables
18 files changed, 221 insertions, 114 deletions
diff --git a/ydb/library/yql/core/sql_types/match_recognize.h b/ydb/library/yql/core/sql_types/match_recognize.h index 766bd4874f..eeec975bf2 100644 --- a/ydb/library/yql/core/sql_types/match_recognize.h +++ b/ydb/library/yql/core/sql_types/match_recognize.h @@ -35,6 +35,7 @@ struct TRowPatternFactor { uint64_t QuantityMax; bool Greedy; bool Output; //include in output with ALL ROW PER MATCH + bool Unused; // optimization flag; is true when the variable is not used in defines and measures }; THashSet<TString> GetPatternVars(const TRowPattern&); diff --git a/ydb/library/yql/core/sql_types/match_recognize_ut.cpp b/ydb/library/yql/core/sql_types/match_recognize_ut.cpp index 1f1d681141..230b3bf9fc 100644 --- a/ydb/library/yql/core/sql_types/match_recognize_ut.cpp +++ b/ydb/library/yql/core/sql_types/match_recognize_ut.cpp @@ -5,8 +5,8 @@ using namespace NYql::NMatchRecognize; Y_UNIT_TEST_SUITE(MatchRecognizePattern){ - auto factorVar = [](const TString & v) { return TRowPatternFactor{v, 0, 0, false, false};}; - auto factorExpr = [](const TRowPattern& p) { return TRowPatternFactor{p, 0, 0, false, false};}; + auto factorVar = [](const TString& v) { return TRowPatternFactor{v, 0, 0, false, false, false};}; + auto factorExpr = [](const TRowPattern& p) { return TRowPatternFactor{p, 0, 0, false, false, false};}; auto expected = [](std::initializer_list<TString> list) { return THashSet<TString>(list); }; Y_UNIT_TEST(SingleVarPattern) { const TRowPattern pattern = {{factorVar("A")}}; diff --git a/ydb/library/yql/core/yql_match_recognize.h b/ydb/library/yql/core/yql_match_recognize.h index 3afefb06ba..9089b963b5 100644 --- a/ydb/library/yql/core/yql_match_recognize.h +++ b/ydb/library/yql/core/yql_match_recognize.h @@ -10,15 +10,16 @@ inline TRowPattern ConvertPattern(const TExprNode::TPtr& pattern, TExprContext & for (const auto& term: pattern->Children()) { result.push_back(TRowPatternTerm{}); for (const auto& factor: term->Children()) { - YQL_ENSURE(factor->ChildrenSize() == 5, "Expect 5 args"); + YQL_ENSURE(factor->ChildrenSize() == 6, "Expect 6 args"); result.back().push_back(TRowPatternFactor{ factor->ChildRef(0)->IsAtom() ? - TRowPatternPrimary(TString(factor->ChildRef(0)->Content())) : - ConvertPattern(factor->ChildRef(0), ctx, ++nestingLevel), + TRowPatternPrimary(TString(factor->ChildRef(0)->Content())) : + ConvertPattern(factor->ChildRef(0), ctx, ++nestingLevel), FromString<ui64>(factor->ChildRef(1)->Content()), FromString<ui64>(factor->ChildRef(2)->Content()), FromString<bool>(factor->ChildRef(3)->Content()), - FromString<bool>(factor->ChildRef(4)->Content()) + FromString<bool>(factor->ChildRef(4)->Content()), + FromString<bool>(factor->ChildRef(5)->Content()) }); } } diff --git a/ydb/library/yql/core/yql_opt_match_recognize.cpp b/ydb/library/yql/core/yql_opt_match_recognize.cpp index 356fd67c2c..5e99874b54 100644 --- a/ydb/library/yql/core/yql_opt_match_recognize.cpp +++ b/ydb/library/yql/core/yql_opt_match_recognize.cpp @@ -31,6 +31,79 @@ bool IsStreaming(const TExprNode::TPtr& input, const TTypeAnnotationContext& typ } } //namespace +// returns std::nullopt if all vars could be used +std::optional<TSet<TStringBuf>> FindUsedVars(const TExprNode::TPtr& params) { + TSet<TStringBuf> usedVars; + bool allVarsUsed = false; + + const auto createVisitor = [&usedVars, &allVarsUsed](const TExprNode::TPtr& varsArg) { + return [&varsArg, &usedVars, &allVarsUsed](const TExprNode::TPtr& node) -> bool { + if (node->IsCallable("Member")) { + if (node->ChildRef(0) == varsArg) { + usedVars.insert(node->ChildRef(1)->Content()); + return false; + } + } + if (node == varsArg) { + allVarsUsed = true; + } + return true; + }; + }; + + const auto& measures = params->ChildRef(0); + static constexpr size_t measureLambdasStartPos = 3; + for (size_t pos = measureLambdasStartPos; pos != measures->ChildrenSize(); pos++) { + const auto& lambda = measures->ChildRef(pos); + const auto& lambdaArgs = lambda->ChildRef(0); + const auto& lambdaBody = lambda->ChildRef(1); + const auto& varsArg = lambdaArgs->ChildRef(1); + NYql::VisitExpr(lambdaBody, createVisitor(varsArg)); + } + + const auto& defines = params->ChildRef(4); + static constexpr size_t defineLambdasStartPos = 3; + for (size_t pos = defineLambdasStartPos; pos != defines->ChildrenSize(); pos++) { + const auto& lambda = defines->ChildRef(pos); + const auto& lambdaArgs = lambda->ChildRef(0); + const auto& lambdaBody = lambda->ChildRef(1); + const auto& varsArg = lambdaArgs->ChildRef(1); + NYql::VisitExpr(lambdaBody, createVisitor(varsArg)); + } + + return allVarsUsed ? std::nullopt : std::make_optional(usedVars); +} + +// usedVars can be std::nullopt if all vars could probably be used +TExprNode::TPtr MarkUnusedPatternVars(const TExprNode::TPtr& node, TExprContext& ctx, const std::optional<TSet<TStringBuf>> &usedVars) { + const auto pos = node->Pos(); + if (node->ChildrenSize() != 0 && node->ChildRef(0)->IsAtom()) { + const auto& varName = node->ChildRef(0)->Content(); + bool varUsed = !usedVars.has_value() || usedVars.value().contains(varName); + return ctx.Builder(pos) + .List() + .Add(0, node->ChildRef(0)) + .Add(1, node->ChildRef(1)) + .Add(2, node->ChildRef(2)) + .Add(3, node->ChildRef(3)) + .Add(4, node->ChildRef(4)) + .Add(5, ctx.NewAtom(pos, varUsed ? "0" : "1")) + .Seal() + .Build(); + } + TExprNodeList newChildren; + for (size_t chPos = 0; chPos != node->ChildrenSize(); chPos++) { + newChildren.push_back(MarkUnusedPatternVars(node->ChildRef(chPos), ctx, usedVars)); + } + if (node->IsCallable()) { + return ctx.Builder(pos).Callable(node->Content()).Add(std::move(newChildren)).Seal().Build(); + } else if (node->IsList()) { + return ctx.Builder(pos).List().Add(std::move(newChildren)).Seal().Build(); + } else { // Atom + return node; + } +} + TExprNode::TPtr ExpandMatchRecognize(const TExprNode::TPtr& node, TExprContext& ctx, TTypeAnnotationContext& typeAnnCtx) { YQL_ENSURE(node->IsCallable({"MatchRecognize"})); const auto& input = node->ChildRef(0); @@ -55,7 +128,13 @@ TExprNode::TPtr ExpandMatchRecognize(const TExprNode::TPtr& node, TExprContext& .Seal() .Add(1, partitionKeySelector) .Add(2, partitionColumns) - .Add(3, params) + .Callable(3, params->Content()) + .Add(0, params->ChildRef(0)) + .Add(1, params->ChildRef(1)) + .Add(2, params->ChildRef(2)) + .Add(3, MarkUnusedPatternVars(params->ChildRef(3), ctx, FindUsedVars(params))) + .Add(4, params->ChildRef(4)) + .Seal() .Add(4, settings) .Seal() .Seal() diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp index b4f519c238..2058d02575 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp @@ -484,7 +484,7 @@ TRowPattern ConvertPattern(const TRuntimeNode& pattern) { TVector<TRowPatternFactor> term; for (ui32 j = 0; j != inputTerm->GetValuesCount(); ++j) { const auto& inputFactor = AS_VALUE(TTupleLiteral, inputTerm->GetValue(j)); - MKQL_ENSURE(inputFactor->GetValuesCount() == 5, "Internal logic error"); + MKQL_ENSURE(inputFactor->GetValuesCount() == 6, "Internal logic error"); const auto& primary = inputFactor->GetValue(0); term.push_back(TRowPatternFactor{ primary.GetRuntimeType()->IsData() ? @@ -493,7 +493,8 @@ TRowPattern ConvertPattern(const TRuntimeNode& pattern) { AS_VALUE(TDataLiteral, inputFactor->GetValue(1))->AsValue().Get<ui64>(), AS_VALUE(TDataLiteral, inputFactor->GetValue(2))->AsValue().Get<ui64>(), AS_VALUE(TDataLiteral, inputFactor->GetValue(3))->AsValue().Get<bool>(), - AS_VALUE(TDataLiteral, inputFactor->GetValue(4))->AsValue().Get<bool>() + AS_VALUE(TDataLiteral, inputFactor->GetValue(4))->AsValue().Get<bool>(), + AS_VALUE(TDataLiteral, inputFactor->GetValue(5))->AsValue().Get<bool>() }); } result.push_back(std::move(term)); diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_nfa.h b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_nfa.h index 540b9d005e..1ad86607a2 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_nfa.h +++ b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_nfa.h @@ -15,7 +15,7 @@ struct TVoidTransition { }; using TEpsilonTransition = size_t; //to using TEpsilonTransitions = std::vector<TEpsilonTransition, TMKQLAllocator<TEpsilonTransition>>; -using TMatchedVarTransition = std::pair<ui32, size_t>; //{varIndex, to} +using TMatchedVarTransition = std::pair<std::pair<ui32, bool>, size_t>; //{{varIndex, saveState}, to} using TQuantityEnterTransition = size_t; //to using TQuantityExitTransition = std::pair<std::pair<ui64, ui64>, std::pair<size_t, size_t>>; //{{min, max}, {foFindMore, toMatched}} using TNfaTransition = std::variant< @@ -203,7 +203,7 @@ private: auto input = AddNode(); auto output = AddNode(); auto item = factor.Primary.index() == 0 ? - BuildVar(varNameToIndex.at(std::get<0>(factor.Primary))) : + BuildVar(varNameToIndex.at(std::get<0>(factor.Primary)), !factor.Unused) : BuildTerms(std::get<1>(factor.Primary), varNameToIndex); if (1 == factor.QuantityMin && 1 == factor.QuantityMax) { //simple linear case Graph->Transitions[input] = TEpsilonTransitions{item.Input}; @@ -223,12 +223,12 @@ private: } return {input, output}; } - TNfaItem BuildVar(ui32 varIndex) { + TNfaItem BuildVar(ui32 varIndex, bool isUsed) { auto input = AddNode(); auto matchVar = AddNode(); auto output = AddNode(); Graph->Transitions[input] = TEpsilonTransitions({matchVar}); - Graph->Transitions[matchVar] = std::pair{varIndex, output}; + Graph->Transitions[matchVar] = std::pair{std::pair{varIndex, isUsed}, output}; return {input, output}; } public: @@ -283,13 +283,17 @@ public: //all other transitions are handled in MakeEpsilonTransitions if (const auto* matchedVarTransition = std::get_if<TMatchedVarTransition>(&TransitionGraph->Transitions[s.Index])) { MatchedRangesArg->SetValue(ctx, ctx.HolderFactory.Create<TMatchedVarsValue<TRange>>(ctx.HolderFactory, s.Vars)); - const auto varIndex = matchedVarTransition->first; + const auto varIndex = matchedVarTransition->first.first; const auto& v = Defines[varIndex]->GetValue(ctx); if (v && v.Get<bool>()) { - auto vars = s.Vars; //TODO get rid of this copy - auto& matchedVar = vars[varIndex]; - Extend(matchedVar, currentRowLock); - newStates.emplace(matchedVarTransition->second, std::move(vars), std::stack<ui64, std::deque<ui64, TMKQLAllocator<ui64>>>(s.Quantifiers)); + if (matchedVarTransition->first.second) { + auto vars = s.Vars; //TODO get rid of this copy + auto& matchedVar = vars[varIndex]; + Extend(matchedVar, currentRowLock); + newStates.emplace(matchedVarTransition->second, std::move(vars), std::stack<ui64, std::deque<ui64, TMKQLAllocator<ui64>>>(s.Quantifiers)); + } else { + newStates.emplace(matchedVarTransition->second, s.Vars, std::stack<ui64, std::deque<ui64, TMKQLAllocator<ui64>>>(s.Quantifiers)); + } } deletedStates.insert(s); } diff --git a/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_nfa_ut.cpp b/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_nfa_ut.cpp index 7d116d6563..16e6ced0bf 100644 --- a/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_nfa_ut.cpp +++ b/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_nfa_ut.cpp @@ -126,7 +126,7 @@ Y_UNIT_TEST_SUITE(MatchRecognizeNfa) { Y_UNIT_TEST(OutputStateHasNoOutputEdges) { TScopedAlloc alloc(__LOCATION__); - const TRowPattern pattern{{TRowPatternFactor{"A", 1, 1, false, false}}}; + const TRowPattern pattern{{TRowPatternFactor{"A", 1, 1, false, false, false}}}; const auto transitionGraph = TNfaTransitionGraphBuilder::Create(pattern, {{"A", 0}}); const auto& output = transitionGraph->Transitions.at(transitionGraph->Output); UNIT_ASSERT(std::get_if<TVoidTransition>(&output)); @@ -135,25 +135,25 @@ Y_UNIT_TEST_SUITE(MatchRecognizeNfa) { TScopedAlloc alloc(__LOCATION__); const TRowPattern pattern{ { - TRowPatternFactor{"A", 1, 1, false, false}, - TRowPatternFactor{"B", 1, 100, false, false}, + TRowPatternFactor{"A", 1, 1, false, false, false}, + TRowPatternFactor{"B", 1, 100, false, false, false}, TRowPatternFactor{ TRowPattern{ - {TRowPatternFactor{"C", 1, 1, false, false}}, - {TRowPatternFactor{"D", 1, 1, false, false}} + {TRowPatternFactor{"C", 1, 1, false, false, false}}, + {TRowPatternFactor{"D", 1, 1, false, false, false}} }, - 1, 1, false, false + 1, 1, false, false, false } }, { TRowPatternFactor{ TRowPattern{{ - TRowPatternFactor{"E", 1, 1, false, false}, - TRowPatternFactor{"F", 1, 100, false, false}, + TRowPatternFactor{"E", 1, 1, false, false, false}, + TRowPatternFactor{"F", 1, 100, false, false, false}, }}, - 2, 100, false, false + 2, 100, false, false, false }, - TRowPatternFactor{"G", 1, 1, false, false} + TRowPatternFactor{"G", 1, 1, false, false, false} } }; const auto graph = TNfaTransitionGraphBuilder::Create(pattern, TNfaSetup::BuildVarLookup(pattern)); @@ -172,8 +172,8 @@ Y_UNIT_TEST_SUITE(MatchRecognizeNfa) { Y_UNIT_TEST(SingleEpsilonsEliminated) { TScopedAlloc alloc(__LOCATION__); const TRowPattern pattern{{ - TRowPatternFactor{"A", 1, 1, false, false}, - TRowPatternFactor{"B", 1, 1, false, false}, + TRowPatternFactor{"A", 1, 1, false, false, false}, + TRowPatternFactor{"B", 1, 1, false, false, false}, }}; const auto graph = TNfaTransitionGraphBuilder::Create(pattern, TNfaSetup::BuildVarLookup(pattern)); for (size_t node = 0; node != graph->Transitions.size(); node++) { @@ -198,7 +198,7 @@ Y_UNIT_TEST_SUITE(MatchRecognizeNfa) { Y_UNIT_TEST(SingleVarAcceptNothing) { TScopedAlloc alloc(__LOCATION__); THolderFactory holderFactory(alloc.Ref(), memUsage); - const TRowPattern pattern{{TRowPatternFactor{"A", 1, 1, false, false}}}; + const TRowPattern pattern{{TRowPatternFactor{"A", 1, 1, false, false, false}}}; TNfaSetup setup{pattern}; auto& defineA = setup.Defines.at(0); auto& ctx = setup.Ctx(); @@ -212,7 +212,7 @@ Y_UNIT_TEST_SUITE(MatchRecognizeNfa) { Y_UNIT_TEST(SingleVarAcceptEveryRow) { TScopedAlloc alloc(__LOCATION__); THolderFactory holderFactory(alloc.Ref(), memUsage); - const TRowPattern pattern{{TRowPatternFactor{"A", 1, 1, false, false}}}; + const TRowPattern pattern{{TRowPatternFactor{"A", 1, 1, false, false, false}}}; TNfaSetup setup{pattern}; auto& defineA = setup.Defines.at(0); auto& ctx = setup.Ctx(); @@ -226,7 +226,7 @@ Y_UNIT_TEST_SUITE(MatchRecognizeNfa) { Y_UNIT_TEST(SingleAlternatedVarAcceptEven) { TScopedAlloc alloc(__LOCATION__); THolderFactory holderFactory(alloc.Ref(), memUsage); - const TRowPattern pattern{{TRowPatternFactor{"A", 1, 1, false, false}}}; + const TRowPattern pattern{{TRowPatternFactor{"A", 1, 1, false, false, false}}}; TNfaSetup setup{pattern}; auto& defineA = setup.Defines.at(0); auto& ctx = setup.Ctx(); @@ -242,7 +242,7 @@ Y_UNIT_TEST_SUITE(MatchRecognizeNfa) { TScopedAlloc alloc(__LOCATION__); THolderFactory holderFactory(alloc.Ref(), memUsage); // "A{4, 6}" - const TRowPattern pattern{{TRowPatternFactor{"A", 4, 6, false, false}}}; + const TRowPattern pattern{{TRowPatternFactor{"A", 4, 6, false, false, false}}}; TNfaSetup setup{pattern}; auto& defineA = setup.Defines.at(0); auto& ctx = setup.Ctx(); @@ -274,9 +274,9 @@ Y_UNIT_TEST_SUITE(MatchRecognizeNfa) { THolderFactory holderFactory(alloc.Ref(), memUsage); // "A A A" const TRowPattern pattern{{ - TRowPatternFactor{"A", 1, 1, false, false}, - TRowPatternFactor{"A", 1, 1, false, false}, - TRowPatternFactor{"A", 1, 1, false, false} + TRowPatternFactor{"A", 1, 1, false, false, false}, + TRowPatternFactor{"A", 1, 1, false, false, false}, + TRowPatternFactor{"A", 1, 1, false, false, false} }}; TNfaSetup setup{pattern}; auto& defineA = setup.Defines.at(0); @@ -295,8 +295,8 @@ Y_UNIT_TEST_SUITE(MatchRecognizeNfa) { THolderFactory holderFactory(alloc.Ref(), memUsage); //"A B" const TRowPattern pattern{{ - TRowPatternFactor{"A", 1, 1, false, false}, - TRowPatternFactor{"B", 1, 1, false, false}, + TRowPatternFactor{"A", 1, 1, false, false, false}, + TRowPatternFactor{"B", 1, 1, false, false, false}, }}; TNfaSetup setup{pattern}; auto& defineA = setup.Defines.at(0); @@ -316,8 +316,8 @@ Y_UNIT_TEST_SUITE(MatchRecognizeNfa) { THolderFactory holderFactory(alloc.Ref(), memUsage); //"A | B" const TRowPattern pattern{ - {TRowPatternFactor{"A", 1, 1, false, false}}, - {TRowPatternFactor{"B", 1, 1, false, false}}, + {TRowPatternFactor{"A", 1, 1, false, false, false}}, + {TRowPatternFactor{"B", 1, 1, false, false, false}}, }; TNfaSetup setup{pattern}; auto& defineA = setup.Defines.at(0); @@ -340,7 +340,7 @@ Y_UNIT_TEST_SUITE(MatchRecognizeNfa) { THolderFactory holderFactory(alloc.Ref(), memUsage); //"Any*" const TRowPattern pattern{{ - TRowPatternFactor{"A", 1, 1000000000, false, false}, + TRowPatternFactor{"A", 1, 1000000000, false, false, false}, }}; TNfaSetup setup{pattern}; auto& defineA = setup.Defines.at(0); @@ -359,6 +359,24 @@ Y_UNIT_TEST_SUITE(MatchRecognizeNfa) { UNIT_ASSERT_VALUES_EQUAL(inputSize * (inputSize + 1) / 2, totalMatches); } + Y_UNIT_TEST(UnusedVarIgnored) { + TScopedAlloc alloc(__LOCATION__); + THolderFactory holderFactory(alloc.Ref(), memUsage); + const TRowPattern pattern{{ + TRowPatternFactor{"ANY", 1, 100, false, false, true}, + }}; + TNfaSetup setup{pattern}; + auto& defineA = setup.Defines.at(0); + auto& ctx = setup.Ctx(); + defineA->SetValue(ctx, NUdf::TUnboxedValuePod{true}); + TSparseList list; + const size_t inputSize = 100; + for (size_t i = 0; i != inputSize; ++i) { + setup.Nfa.ProcessRow(list.Append(NUdf::TUnboxedValue{}), ctx); + UNIT_ASSERT_EQUAL(0, setup.Nfa.GetMatched().value()[0].size()); + } + } + //Pattern: A* //Input: intermittent series events that match A Y_UNIT_TEST(AStar) { @@ -366,7 +384,7 @@ Y_UNIT_TEST_SUITE(MatchRecognizeNfa) { THolderFactory holderFactory(alloc.Ref(), memUsage); //"A*" const TRowPattern pattern{{ - TRowPatternFactor{"A", 1, 1000000000, false, false}, + TRowPatternFactor{"A", 1, 1000000000, false, false, false}, }}; TNfaSetup setup{pattern}; auto& defineA = setup.Defines.at(0); @@ -403,9 +421,9 @@ Y_UNIT_TEST_SUITE(MatchRecognizeNfa) { THolderFactory holderFactory(alloc.Ref(), memUsage); //"A ANY* B" const TRowPattern pattern{{ - TRowPatternFactor{"A", 1, 1, false, false}, - TRowPatternFactor{"ANY", 1, 1000000000, false, false}, - TRowPatternFactor{"B", 1, 1, false, false}, + TRowPatternFactor{"A", 1, 1, false, false, false}, + TRowPatternFactor{"ANY", 1, 1000000000, false, false, false}, + TRowPatternFactor{"B", 1, 1, false, false, false}, }}; TNfaSetup setup{pattern}; auto& defineA = setup.Defines.at(0); @@ -432,9 +450,9 @@ Y_UNIT_TEST_SUITE(MatchRecognizeNfa) { THolderFactory holderFactory(alloc.Ref(), memUsage); //"A ANY* B" const TRowPattern pattern{{ - TRowPatternFactor{"A", 1, 1, false, false}, - TRowPatternFactor{"ANY", 1, 1000000000, false, false}, - TRowPatternFactor{"B", 1, 1, false, false}, + TRowPatternFactor{"A", 1, 1, false, false, false}, + TRowPatternFactor{"ANY", 1, 1000000000, false, false, false}, + TRowPatternFactor{"B", 1, 1, false, false, false}, }}; TNfaSetup setup{pattern}; auto& defineA = setup.Defines.at(0); @@ -478,11 +496,11 @@ Y_UNIT_TEST_SUITE(MatchRecognizeNfa) { THolderFactory holderFactory(alloc.Ref(), memUsage); //"A ANY* B ANY* C" const TRowPattern pattern{{ - TRowPatternFactor{"A", 1, 1, false, false}, - TRowPatternFactor{"ANY", 1, 1000000000, false, false}, - TRowPatternFactor{"B", 1, 1, false, false}, - TRowPatternFactor{"ANY", 1, 1000000000, false, false}, - TRowPatternFactor{"C", 1, 1, false, false}, + TRowPatternFactor{"A", 1, 1, false, false, false}, + TRowPatternFactor{"ANY", 1, 1000000000, false, false, false}, + TRowPatternFactor{"B", 1, 1, false, false, false}, + TRowPatternFactor{"ANY", 1, 1000000000, false, false, false}, + TRowPatternFactor{"C", 1, 1, false, false, false}, }}; TNfaSetup setup{pattern}; auto& defineA = setup.Defines.at(0); diff --git a/ydb/library/yql/minikql/mkql_program_builder.cpp b/ydb/library/yql/minikql/mkql_program_builder.cpp index 28f8ae5c6f..367305c09f 100644 --- a/ydb/library/yql/minikql/mkql_program_builder.cpp +++ b/ydb/library/yql/minikql/mkql_program_builder.cpp @@ -5826,6 +5826,7 @@ TRuntimeNode PatternToRuntimeNode(const TRowPattern& pattern, const TProgramBuil factorBuilder.Add(programBuilder.NewDataLiteral<ui64>(factor.QuantityMax)); factorBuilder.Add(programBuilder.NewDataLiteral<bool>(factor.Greedy)); factorBuilder.Add(programBuilder.NewDataLiteral<bool>(factor.Output)); + factorBuilder.Add(programBuilder.NewDataLiteral<bool>(factor.Unused)); termBuilder.Add({factorBuilder.Build(), true}); } patternBuilder.Add({termBuilder.Build(), true}); diff --git a/ydb/library/yql/sql/v1/match_recognize.cpp b/ydb/library/yql/sql/v1/match_recognize.cpp index 92fe9257d0..41e7cfbbc6 100644 --- a/ydb/library/yql/sql/v1/match_recognize.cpp +++ b/ydb/library/yql/sql/v1/match_recognize.cpp @@ -128,6 +128,7 @@ private: BuildQuotedAtom(pos, ToString(factor.QuantityMax)), BuildQuotedAtom(pos, ToString(factor.Greedy)), BuildQuotedAtom(pos, ToString(factor.Output)), + BuildQuotedAtom(pos, ToString(factor.Unused)) }); } diff --git a/ydb/library/yql/sql/v1/sql_match_recognize.cpp b/ydb/library/yql/sql/v1/sql_match_recognize.cpp index 058d3d7c1c..17697b9082 100644 --- a/ydb/library/yql/sql/v1/sql_match_recognize.cpp +++ b/ydb/library/yql/sql/v1/sql_match_recognize.cpp @@ -310,7 +310,7 @@ NYql::NMatchRecognize::TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTer Y_ABORT("You should change implementation according to grammar changes"); } } - term.push_back(NYql::NMatchRecognize::TRowPatternFactor{std::move(primary), quantityMin, quantityMax, greedy, output}); + term.push_back(NYql::NMatchRecognize::TRowPatternFactor{std::move(primary), quantityMin, quantityMax, greedy, output, false}); } return term; } diff --git a/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp b/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp index 30d715eeca..1c94a54593 100644 --- a/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp +++ b/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp @@ -405,7 +405,7 @@ FROM Input MATCH_RECOGNIZE( const auto& lastTerm = patternCallable->GetChild(2); UNIT_ASSERT(IsQuotedListOfSize(lastTerm, 3)); const auto& firstFactorOfLastTerm = lastTerm->GetChild(1)->GetChild(0); - UNIT_ASSERT(IsQuotedListOfSize(firstFactorOfLastTerm, 5)); + UNIT_ASSERT(IsQuotedListOfSize(firstFactorOfLastTerm, 6)); const auto nestedPattern = firstFactorOfLastTerm->GetChild(1)->GetChild(0); UNIT_ASSERT_EQUAL(nestedPattern->GetChildrenCount(), 1 + 1); UNIT_ASSERT_EQUAL(nestedPattern->GetChild(0)->GetContent(), "MatchRecognizePattern"); @@ -461,7 +461,8 @@ FROM Input MATCH_RECOGNIZE( FromString<uint64_t>(factor->GetChild(1)->GetChild(1)->GetContent()), //QuantityMin FromString<uint64_t>(factor->GetChild(2)->GetChild(1)->GetContent()), //QuantityMax FromString<bool>(factor->GetChild(3)->GetChild(1)->GetContent()), //Greedy - false //Output, not used in this test + false, //Output, not used in this test + false, // Flag "Unused", not used in this test }; }; { diff --git a/ydb/library/yql/tests/sql/dq_file/part3/canondata/result.json b/ydb/library/yql/tests/sql/dq_file/part3/canondata/result.json index 767839350e..342e58c297 100644 --- a/ydb/library/yql/tests/sql/dq_file/part3/canondata/result.json +++ b/ydb/library/yql/tests/sql/dq_file/part3/canondata/result.json @@ -3032,9 +3032,9 @@ ], "test.test[match_recognize-test_type-streaming-default.txt-Debug]": [ { - "checksum": "c7c026731f4761c004df4dcf6a81bd97", - "size": 3598, - "uri": "https://{canondata_backend}/1903280/e4ff5eb809a5b4c4bf055c44b9764cdc1eed94de/resource.tar.gz#test.test_match_recognize-test_type-streaming-default.txt-Debug_/opt.yql_patched" + "checksum": "fe12fc1a1791b4e40756a4b7b18cdf4d", + "size": 3809, + "uri": "https://{canondata_backend}/1814674/c01721727cf436116792c9757d000fdff090c9ef/resource.tar.gz#test.test_match_recognize-test_type-streaming-default.txt-Debug_/opt.yql_patched" } ], "test.test[match_recognize-test_type-streaming-default.txt-Plan]": [ diff --git a/ydb/library/yql/tests/sql/dq_file/part6/canondata/result.json b/ydb/library/yql/tests/sql/dq_file/part6/canondata/result.json index f96e188e19..be6f210565 100644 --- a/ydb/library/yql/tests/sql/dq_file/part6/canondata/result.json +++ b/ydb/library/yql/tests/sql/dq_file/part6/canondata/result.json @@ -3447,9 +3447,9 @@ ], "test.test[match_recognize-alerts-streaming-default.txt-Debug]": [ { - "checksum": "b90ce285ad954f51ef416e5121375235", - "size": 5577, - "uri": "https://{canondata_backend}/1784117/84e5539d9eb1fa06aed3012cd84be2264e2caeb7/resource.tar.gz#test.test_match_recognize-alerts-streaming-default.txt-Debug_/opt.yql_patched" + "checksum": "8b7be5f1f1a32456f1f4a7c32910723c", + "size": 5935, + "uri": "https://{canondata_backend}/1900335/d3caf6ae49c6f1143fbe0ff0e930c679766932e1/resource.tar.gz#test.test_match_recognize-alerts-streaming-default.txt-Debug_/opt.yql_patched" } ], "test.test[match_recognize-alerts-streaming-default.txt-Plan]": [ @@ -3469,9 +3469,9 @@ ], "test.test[match_recognize-simple_paritioning-streaming-default.txt-Debug]": [ { - "checksum": "19794b1c1d34994f1f8d75967e14e60a", - "size": 3337, - "uri": "https://{canondata_backend}/1784117/84e5539d9eb1fa06aed3012cd84be2264e2caeb7/resource.tar.gz#test.test_match_recognize-simple_paritioning-streaming-default.txt-Debug_/opt.yql_patched" + "checksum": "95f7f6fecfb0bc130af790e5cbdb67ee", + "size": 3352, + "uri": "https://{canondata_backend}/1923547/22c2c509a5434879db9a69ad8b9605c384a0c07b/resource.tar.gz#test.test_match_recognize-simple_paritioning-streaming-default.txt-Debug_/opt.yql_patched" } ], "test.test[match_recognize-simple_paritioning-streaming-default.txt-Plan]": [ @@ -3491,9 +3491,9 @@ ], "test.test[match_recognize-test_type-default.txt-Debug]": [ { - "checksum": "dafc5cc5a5159114d1db9cd6fd54da47", - "size": 2993, - "uri": "https://{canondata_backend}/1871182/71ebe565a1b9516b1fa184b75e577161b7c1a437/resource.tar.gz#test.test_match_recognize-test_type-default.txt-Debug_/opt.yql_patched" + "checksum": "413daca8efd9dda687246a807bf6d461", + "size": 3206, + "uri": "https://{canondata_backend}/1900335/d3caf6ae49c6f1143fbe0ff0e930c679766932e1/resource.tar.gz#test.test_match_recognize-test_type-default.txt-Debug_/opt.yql_patched" } ], "test.test[match_recognize-test_type-default.txt-Plan]": [ diff --git a/ydb/library/yql/tests/sql/dq_file/part7/canondata/result.json b/ydb/library/yql/tests/sql/dq_file/part7/canondata/result.json index 64619a4bf9..1c427bb0bb 100644 --- a/ydb/library/yql/tests/sql/dq_file/part7/canondata/result.json +++ b/ydb/library/yql/tests/sql/dq_file/part7/canondata/result.json @@ -3233,9 +3233,9 @@ ], "test.test[match_recognize-alerts-default.txt-Debug]": [ { - "checksum": "b4eb2e6e0f331d998902e8561d779563", - "size": 5246, - "uri": "https://{canondata_backend}/1781765/ba0fd27f0b93ac8fb3a3cab19fc0bd078d7d266a/resource.tar.gz#test.test_match_recognize-alerts-default.txt-Debug_/opt.yql_patched" + "checksum": "eb440da9e5a9e63e803c9f13e41929ed", + "size": 5588, + "uri": "https://{canondata_backend}/1925821/1eec33217b798df6e04e067717ff27656f697094/resource.tar.gz#test.test_match_recognize-alerts-default.txt-Debug_/opt.yql_patched" } ], "test.test[match_recognize-alerts-default.txt-Plan]": [ @@ -3255,9 +3255,9 @@ ], "test.test[match_recognize-simple_paritioning-default.txt-Debug]": [ { - "checksum": "dc786f53771dd23b67ddfdc27bc83a49", - "size": 3157, - "uri": "https://{canondata_backend}/1781765/ba0fd27f0b93ac8fb3a3cab19fc0bd078d7d266a/resource.tar.gz#test.test_match_recognize-simple_paritioning-default.txt-Debug_/opt.yql_patched" + "checksum": "4e2345b31a8edf56f9aa082b48be8eb1", + "size": 3172, + "uri": "https://{canondata_backend}/1600758/743fec0dc57746c777c8f1b3b8fd0c82a29a2914/resource.tar.gz#test.test_match_recognize-simple_paritioning-default.txt-Debug_/opt.yql_patched" } ], "test.test[match_recognize-simple_paritioning-default.txt-Plan]": [ diff --git a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json index e8b5c64ccd..80beb3ccce 100644 --- a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json +++ b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json @@ -9563,51 +9563,51 @@ ], "test_sql2yql.test[match_recognize-alerts-streaming]": [ { - "checksum": "3be2f00c1d8899180aa3dea7670eacd4", - "size": 8741, - "uri": "https://{canondata_backend}/1942278/e0bc23d599a7fdda195ee253dc30609d74968230/resource.tar.gz#test_sql2yql.test_match_recognize-alerts-streaming_/sql.yql" + "checksum": "c14f925c06e489c925a41540ae063447", + "size": 8876, + "uri": "https://{canondata_backend}/1946324/07f86b02802add9cba590f938304abe892044623/resource.tar.gz#test_sql2yql.test_match_recognize-alerts-streaming_/sql.yql" } ], "test_sql2yql.test[match_recognize-alerts]": [ { - "checksum": "d51a6625653fd8f1d4d1443daf1ea043", - "size": 8743, - "uri": "https://{canondata_backend}/1942278/e0bc23d599a7fdda195ee253dc30609d74968230/resource.tar.gz#test_sql2yql.test_match_recognize-alerts_/sql.yql" + "checksum": "02363f0191bd5cb00f1481bf5b4818c0", + "size": 8878, + "uri": "https://{canondata_backend}/1946324/07f86b02802add9cba590f938304abe892044623/resource.tar.gz#test_sql2yql.test_match_recognize-alerts_/sql.yql" } ], "test_sql2yql.test[match_recognize-simple_paritioning-streaming]": [ { - "checksum": "c45fd9828693a5900e2c841ed524781f", - "size": 4976, - "uri": "https://{canondata_backend}/1942278/e0bc23d599a7fdda195ee253dc30609d74968230/resource.tar.gz#test_sql2yql.test_match_recognize-simple_paritioning-streaming_/sql.yql" + "checksum": "6eaebd92e04aa13e993cbbe0f824e34b", + "size": 5021, + "uri": "https://{canondata_backend}/1946324/07f86b02802add9cba590f938304abe892044623/resource.tar.gz#test_sql2yql.test_match_recognize-simple_paritioning-streaming_/sql.yql" } ], "test_sql2yql.test[match_recognize-simple_paritioning]": [ { - "checksum": "55eb0729d523fa8c202d7f3c4ba0edb8", - "size": 4945, - "uri": "https://{canondata_backend}/1942278/e0bc23d599a7fdda195ee253dc30609d74968230/resource.tar.gz#test_sql2yql.test_match_recognize-simple_paritioning_/sql.yql" + "checksum": "e8c9158351d5e43a5f83028e1ed0c535", + "size": 4990, + "uri": "https://{canondata_backend}/1946324/07f86b02802add9cba590f938304abe892044623/resource.tar.gz#test_sql2yql.test_match_recognize-simple_paritioning_/sql.yql" } ], "test_sql2yql.test[match_recognize-test_type-streaming]": [ { - "checksum": "ba36579721990422880a8ec84a43101a", - "size": 10064, - "uri": "https://{canondata_backend}/1775059/4b9f229dddaae6a829602db98c6b8cd8a6452685/resource.tar.gz#test_sql2yql.test_match_recognize-test_type-streaming_/sql.yql" + "checksum": "cce28aca3d6a3e1799d06f1085816764", + "size": 10139, + "uri": "https://{canondata_backend}/1946324/07f86b02802add9cba590f938304abe892044623/resource.tar.gz#test_sql2yql.test_match_recognize-test_type-streaming_/sql.yql" } ], "test_sql2yql.test[match_recognize-test_type]": [ { - "checksum": "0f3c31b4d5d120603b925ae242171e7c", - "size": 9474, - "uri": "https://{canondata_backend}/1775059/4b9f229dddaae6a829602db98c6b8cd8a6452685/resource.tar.gz#test_sql2yql.test_match_recognize-test_type_/sql.yql" + "checksum": "b2b6731a78df7ac091d8513f85d3435e", + "size": 9549, + "uri": "https://{canondata_backend}/1946324/07f86b02802add9cba590f938304abe892044623/resource.tar.gz#test_sql2yql.test_match_recognize-test_type_/sql.yql" } ], "test_sql2yql.test[match_recognize-test_type_predicate]": [ { - "checksum": "da4eb86916a5035e217cc756a5972b46", - "size": 3232, - "uri": "https://{canondata_backend}/1817427/e86cdcb0a013ce49ca9d3d5e9a91e98036212d9d/resource.tar.gz#test_sql2yql.test_match_recognize-test_type_predicate_/sql.yql" + "checksum": "a0b22d5906eedef81e2f2eac6247b09b", + "size": 3247, + "uri": "https://{canondata_backend}/1946324/07f86b02802add9cba590f938304abe892044623/resource.tar.gz#test_sql2yql.test_match_recognize-test_type_predicate_/sql.yql" } ], "test_sql2yql.test[optimizers-aggregate_over_aggregate]": [ diff --git a/ydb/library/yql/tests/sql/yt_native_file/part3/canondata/result.json b/ydb/library/yql/tests/sql/yt_native_file/part3/canondata/result.json index 0722e6ad71..e37384f760 100644 --- a/ydb/library/yql/tests/sql/yt_native_file/part3/canondata/result.json +++ b/ydb/library/yql/tests/sql/yt_native_file/part3/canondata/result.json @@ -2660,9 +2660,9 @@ ], "test.test[match_recognize-test_type-streaming-default.txt-Debug]": [ { - "checksum": "f84cf9afc5be7c271aa813ed55e57af9", - "size": 3526, - "uri": "https://{canondata_backend}/1777230/0d9e1c2588cec48a60243a7c4faadbcda4dadc09/resource.tar.gz#test.test_match_recognize-test_type-streaming-default.txt-Debug_/opt.yql" + "checksum": "d50a7a97b56d9beba48fa158a5c56f63", + "size": 3737, + "uri": "https://{canondata_backend}/1880306/b6bed19f17e61b27c0a1fde8695f88bca72f8193/resource.tar.gz#test.test_match_recognize-test_type-streaming-default.txt-Debug_/opt.yql" } ], "test.test[match_recognize-test_type-streaming-default.txt-Plan]": [ diff --git a/ydb/library/yql/tests/sql/yt_native_file/part6/canondata/result.json b/ydb/library/yql/tests/sql/yt_native_file/part6/canondata/result.json index 32d4c0e41c..04a7013495 100644 --- a/ydb/library/yql/tests/sql/yt_native_file/part6/canondata/result.json +++ b/ydb/library/yql/tests/sql/yt_native_file/part6/canondata/result.json @@ -2736,9 +2736,9 @@ ], "test.test[match_recognize-alerts-streaming-default.txt-Debug]": [ { - "checksum": "addb819f78b95c15ca2f07da966f806b", - "size": 5497, - "uri": "https://{canondata_backend}/1942278/62c45e31d5fff8d0d7beffe426b33aec87242b5e/resource.tar.gz#test.test_match_recognize-alerts-streaming-default.txt-Debug_/opt.yql" + "checksum": "265537794494c10822c0ae3caf2f72af", + "size": 5855, + "uri": "https://{canondata_backend}/212715/e697796a41bf025b25a9ce3feee2344c5dcbd7d9/resource.tar.gz#test.test_match_recognize-alerts-streaming-default.txt-Debug_/opt.yql" } ], "test.test[match_recognize-alerts-streaming-default.txt-Plan]": [ @@ -2757,9 +2757,9 @@ ], "test.test[match_recognize-simple_paritioning-streaming-default.txt-Debug]": [ { - "checksum": "6c887c37b904e2582f1ff2eb5e36ca5d", - "size": 3272, - "uri": "https://{canondata_backend}/1942278/62c45e31d5fff8d0d7beffe426b33aec87242b5e/resource.tar.gz#test.test_match_recognize-simple_paritioning-streaming-default.txt-Debug_/opt.yql" + "checksum": "ab5647ac59852b23ab62828c6bf5e23c", + "size": 3287, + "uri": "https://{canondata_backend}/1942173/2d950a73c887747ac5d5d11958531290098e5395/resource.tar.gz#test.test_match_recognize-simple_paritioning-streaming-default.txt-Debug_/opt.yql" } ], "test.test[match_recognize-simple_paritioning-streaming-default.txt-Plan]": [ @@ -2778,9 +2778,9 @@ ], "test.test[match_recognize-test_type-default.txt-Debug]": [ { - "checksum": "853611ed0f2bc089c518e64eda058973", - "size": 2925, - "uri": "https://{canondata_backend}/1937367/d3efc76b94aa4ceb09138ae54ddae7e6baf26ec0/resource.tar.gz#test.test_match_recognize-test_type-default.txt-Debug_/opt.yql" + "checksum": "f237ba53d4395d3769288c46d9619abd", + "size": 3138, + "uri": "https://{canondata_backend}/212715/e697796a41bf025b25a9ce3feee2344c5dcbd7d9/resource.tar.gz#test.test_match_recognize-test_type-default.txt-Debug_/opt.yql" } ], "test.test[match_recognize-test_type-default.txt-Plan]": [ diff --git a/ydb/library/yql/tests/sql/yt_native_file/part7/canondata/result.json b/ydb/library/yql/tests/sql/yt_native_file/part7/canondata/result.json index e91603bf61..b8cfc4038a 100644 --- a/ydb/library/yql/tests/sql/yt_native_file/part7/canondata/result.json +++ b/ydb/library/yql/tests/sql/yt_native_file/part7/canondata/result.json @@ -2614,9 +2614,9 @@ ], "test.test[match_recognize-alerts-default.txt-Debug]": [ { - "checksum": "be9f6a11644a21ff2405035e4d3f2757", - "size": 5173, - "uri": "https://{canondata_backend}/1946324/ba41d9d40ac153e4344fd3b0eb660265fe450592/resource.tar.gz#test.test_match_recognize-alerts-default.txt-Debug_/opt.yql" + "checksum": "cdacfd53d916c957793e245a99e437fe", + "size": 5508, + "uri": "https://{canondata_backend}/1781765/96a9e2f1ee513087746dfc123e242312c31ebc52/resource.tar.gz#test.test_match_recognize-alerts-default.txt-Debug_/opt.yql" } ], "test.test[match_recognize-alerts-default.txt-Plan]": [ @@ -2635,9 +2635,9 @@ ], "test.test[match_recognize-simple_paritioning-default.txt-Debug]": [ { - "checksum": "add9b542b1c21948dd2b1f443990ebbe", - "size": 3092, - "uri": "https://{canondata_backend}/1946324/ba41d9d40ac153e4344fd3b0eb660265fe450592/resource.tar.gz#test.test_match_recognize-simple_paritioning-default.txt-Debug_/opt.yql" + "checksum": "20ea23851178e349eb7bc5c1e5de063b", + "size": 3107, + "uri": "https://{canondata_backend}/1775059/3be4e11bd7199862ba1a99533633a2722b6067b6/resource.tar.gz#test.test_match_recognize-simple_paritioning-default.txt-Debug_/opt.yql" } ], "test.test[match_recognize-simple_paritioning-default.txt-Plan]": [ |