aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorzverevgeny <zverevgeny@ydb.tech>2023-09-26 12:13:29 +0300
committerzverevgeny <zverevgeny@ydb.tech>2023-09-26 13:26:30 +0300
commitc083da78d46e31819d8e04e6624a25409a5f99d0 (patch)
treee7749fb55c86893490f6de24354bba7f5ff69803
parent2b0c4b4dbf4284b4290d53d105aa598d1822ab49 (diff)
downloadydb-c083da78d46e31819d8e04e6624a25409a5f99d0.tar.gz
YQL-16443 MATCH_RECOGNIZE ut setup + simple cases
-rw-r--r--ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.darwin-x86_64.txt1
-rw-r--r--ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.linux-x86_64.txt1
-rw-r--r--ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.windows-x86_64.txt1
-rw-r--r--ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_nfa_ut.cpp144
-rw-r--r--ydb/library/yql/minikql/comp_nodes/ut/ya.make1
6 files changed, 149 insertions, 0 deletions
diff --git a/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.darwin-x86_64.txt
index c1a21d9c998..ae4ac0e1e24 100644
--- a/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.darwin-x86_64.txt
+++ b/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.darwin-x86_64.txt
@@ -62,6 +62,7 @@ target_sources(ydb-library-yql-minikql-comp_nodes-ut PRIVATE
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_map_join_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_matched_vars_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_list_ut.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_nfa_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_safe_circular_buffer_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_sort_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_switch_ut.cpp
diff --git a/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.linux-aarch64.txt b/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.linux-aarch64.txt
index 81ac3b2d11b..fe4986b50ad 100644
--- a/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.linux-aarch64.txt
+++ b/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.linux-aarch64.txt
@@ -65,6 +65,7 @@ target_sources(ydb-library-yql-minikql-comp_nodes-ut PRIVATE
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_map_join_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_matched_vars_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_list_ut.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_nfa_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_safe_circular_buffer_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_sort_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_switch_ut.cpp
diff --git a/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.linux-x86_64.txt b/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.linux-x86_64.txt
index f6d0d474c60..1b0c65d9998 100644
--- a/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.linux-x86_64.txt
+++ b/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.linux-x86_64.txt
@@ -66,6 +66,7 @@ target_sources(ydb-library-yql-minikql-comp_nodes-ut PRIVATE
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_map_join_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_matched_vars_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_list_ut.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_nfa_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_safe_circular_buffer_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_sort_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_switch_ut.cpp
diff --git a/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.windows-x86_64.txt b/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.windows-x86_64.txt
index 48a437b9234..a3c7a645371 100644
--- a/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.windows-x86_64.txt
+++ b/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.windows-x86_64.txt
@@ -55,6 +55,7 @@ target_sources(ydb-library-yql-minikql-comp_nodes-ut PRIVATE
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_map_join_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_matched_vars_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_list_ut.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_nfa_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_safe_circular_buffer_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_sort_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_switch_ut.cpp
diff --git a/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_nfa_ut.cpp b/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_nfa_ut.cpp
new file mode 100644
index 00000000000..f4e73123b5d
--- /dev/null
+++ b/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_nfa_ut.cpp
@@ -0,0 +1,144 @@
+#include "../mkql_match_recognize_nfa.h"
+#include "mkql_computation_node_ut.h"
+#include <ydb/library/yql/minikql/computation/mkql_computation_node_impl.h>
+#include <ydb/library/yql/core/sql_types/match_recognize.h>
+#include <library/cpp/testing/unittest/registar.h>
+#include <vector>
+#include <algorithm>
+
+namespace NKikimr::NMiniKQL::NMatchRecognize {
+namespace {
+
+struct TNfaSetup {
+ TNfaSetup(const TRowPattern& pattern)
+ : Setup(GetAuxCallableFactory())
+ , Graph(InitComutationGrah(pattern))
+ , Nfa(InitNfa(pattern))
+ {}
+
+ THolder<IComputationGraph> InitComutationGrah(const TRowPattern& pattern) {
+ auto& env = *Setup.Env;
+ TStructTypeBuilder indexRangeTypeBuilder(env);
+ indexRangeTypeBuilder.Add("From", TDataType::Create(NUdf::TDataType<ui64>::Id, env));
+ indexRangeTypeBuilder.Add("To", TDataType::Create(NUdf::TDataType<ui64>::Id, env));
+ const auto& rangeList = TListType::Create(indexRangeTypeBuilder.Build(), env);
+ const auto& vars = GetPatternVars(pattern);
+ VarCount = vars.size();
+ TStructTypeBuilder matchedVarsTypeBuilder(env);
+ for (const auto& var: vars) {
+ matchedVarsTypeBuilder.Add(var, rangeList);
+ }
+
+ auto& pgmBuilder = *Setup.PgmBuilder;
+ TCallableBuilder callableBuilder(env, "TestNfa", env.GetTypeOfVoid());
+ callableBuilder.Add(pgmBuilder.Arg(matchedVarsTypeBuilder.Build()));
+ for (size_t i = 0; i != VarCount; ++i) {
+ callableBuilder.Add(pgmBuilder.Arg(pgmBuilder.NewDataType(NUdf::EDataSlot::Bool)));
+ }
+ auto testNfa = TRuntimeNode(callableBuilder.Build(), false);
+ auto graph = Setup.BuildGraph(testNfa);
+ return graph;
+ }
+
+ TNfa InitNfa(const TRowPattern& pattern) {
+ const auto& vars = GetPatternVars(pattern);
+ std::vector<TString> varVec{vars.cbegin(), vars.cend()};
+ //Simulate implicit name ordering in YQL structs
+ sort(varVec.begin(), varVec.end());
+ THashMap<TString, size_t> varNameLookup;
+ for(size_t i = 0; i != vars.size(); ++i) {
+ varNameLookup[varVec[i]] = i;
+ }
+ const auto& transitionGraph = TNfaTransitionGraph::Create(pattern, varNameLookup);
+ TComputationNodePtrVector defines;
+ defines.reserve(Defines.size());
+ for (auto& d: Defines) {
+ defines.push_back(d);
+ }
+ return TNfa(transitionGraph, MatchedVars, defines);
+ }
+
+ TComputationNodeFactory GetAuxCallableFactory() {
+ return [this](TCallable& callable, const TComputationNodeFactoryContext& ctx) -> IComputationNode* {
+ if (callable.GetType()->GetName() == "TestNfa") {
+ MatchedVars = static_cast<IComputationExternalNode *>(LocateNode(ctx.NodeLocator, *callable.GetInput(0).GetNode()));
+ for (size_t i = 0; i != VarCount; ++i) {
+ auto d = callable.GetInput(1 + i).GetNode();
+ const auto& nn = LocateNode(ctx.NodeLocator, *d);
+ Defines.push_back(static_cast<IComputationExternalNode *>(nn));
+ }
+ return new TExternalComputationNode(ctx.Mutables);
+ }
+ return GetBuiltinFactory()(callable, ctx);
+ };
+ }
+
+ size_t GetMatchedCount() {
+ size_t result = 0;
+ while (Nfa.GetMatched()) {
+ ++result;
+ }
+ return result;
+ }
+
+ TComputationContext& Ctx() const {
+ return Graph->GetContext();
+ }
+ TSetup<false> Setup;
+ IComputationExternalNode* MatchedVars;
+ TComputationExternalNodePtrVector Defines;
+ size_t VarCount;
+ THolder<IComputationGraph> Graph;
+ TNfa Nfa;
+};
+
+} //namespace
+
+Y_UNIT_TEST_SUITE(MatchRecognizeNfa) {
+ TMemoryUsageInfo memUsage("MatchedVars");
+ Y_UNIT_TEST(SingleVarAcceptNothing) {
+ TScopedAlloc alloc(__LOCATION__);
+ THolderFactory holderFactory(alloc.Ref(), memUsage);
+ const TRowPattern pattern{{TRowPatternFactor{"A", 1 ,1, false, false}}};
+ TNfaSetup setup{pattern};
+ auto& defineA = setup.Defines[0];
+ auto& ctx = setup.Ctx();
+ defineA->SetValue(ctx, NUdf::TUnboxedValuePod{false});
+ TSparseList list;
+ for (size_t i = 0; i != 100; ++i) {
+ setup.Nfa.ProcessRow(list.Append(NUdf::TUnboxedValue{}), ctx);
+ UNIT_ASSERT_VALUES_EQUAL(0, setup.GetMatchedCount());
+ }
+ }
+ Y_UNIT_TEST(SingleVarAcceptEveryRow) {
+ TScopedAlloc alloc(__LOCATION__);
+ THolderFactory holderFactory(alloc.Ref(), memUsage);
+ const TRowPattern pattern{{TRowPatternFactor{"A", 1 ,1, false, false}}};
+ TNfaSetup setup{pattern};
+ auto& defineA = setup.Defines[0];
+ auto& ctx = setup.Ctx();
+ defineA->SetValue(ctx, NUdf::TUnboxedValuePod{true});
+ TSparseList list;
+ for (size_t i = 0; i != 100; ++i) {
+ setup.Nfa.ProcessRow(list.Append(NUdf::TUnboxedValue{}), ctx);
+ UNIT_ASSERT_VALUES_EQUAL(1, setup.GetMatchedCount());
+ }
+ }
+ Y_UNIT_TEST(SingleAlternatedVarAcceptEven) {
+ TScopedAlloc alloc(__LOCATION__);
+ THolderFactory holderFactory(alloc.Ref(), memUsage);
+ const TRowPattern pattern{{TRowPatternFactor{"A", 1 ,1, false, false}}};
+ TNfaSetup setup{pattern};
+ auto& defineA = setup.Defines[0];
+ auto& ctx = setup.Ctx();
+ TSparseList list;
+ for (size_t i = 0; i != 100; ++i) {
+ //change the value of the var
+ defineA->SetValue(ctx, NUdf::TUnboxedValuePod{i % 2});
+ setup.Nfa.ProcessRow(list.Append(NUdf::TUnboxedValue{}), ctx);
+ UNIT_ASSERT_VALUES_EQUAL(i % 2, setup.GetMatchedCount());
+ }
+ }
+}
+
+} //namespace NKikimr::NMiniKQL::NMatchRecognize
diff --git a/ydb/library/yql/minikql/comp_nodes/ut/ya.make b/ydb/library/yql/minikql/comp_nodes/ut/ya.make
index 7d015819ace..5d5af0d667e 100644
--- a/ydb/library/yql/minikql/comp_nodes/ut/ya.make
+++ b/ydb/library/yql/minikql/comp_nodes/ut/ya.make
@@ -42,6 +42,7 @@ SRCS(
mkql_map_join_ut.cpp
mkql_match_recognize_matched_vars_ut.cpp
mkql_match_recognize_list_ut.cpp
+ mkql_match_recognize_nfa_ut.cpp
mkql_safe_circular_buffer_ut.cpp
mkql_sort_ut.cpp
mkql_switch_ut.cpp