aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorzverevgeny <zverevgeny@ydb.tech>2023-09-01 16:42:24 +0300
committerzverevgeny <zverevgeny@ydb.tech>2023-09-01 17:13:20 +0300
commitcb9faf27a068c2b006c6711252fdebc20a9377dd (patch)
tree8ccab2c9a118f9e854da7c10c89f77454e189d4d
parent09a74b181d913ec129c5af5d071fca27b5ac62e6 (diff)
downloadydb-cb9faf27a068c2b006c6711252fdebc20a9377dd.tar.gz
YQL-16325 MATCH_RECOGNIZE, pass defines & pattern to computation
-rw-r--r--ydb/library/yql/core/sql_types/CMakeLists.darwin-x86_64.txt2
-rw-r--r--ydb/library/yql/core/sql_types/CMakeLists.linux-aarch64.txt2
-rw-r--r--ydb/library/yql/core/sql_types/CMakeLists.linux-x86_64.txt2
-rw-r--r--ydb/library/yql/core/sql_types/CMakeLists.windows-x86_64.txt2
-rw-r--r--ydb/library/yql/core/sql_types/match_recognize.cpp23
-rw-r--r--ydb/library/yql/core/sql_types/match_recognize.h21
-rw-r--r--ydb/library/yql/core/sql_types/match_recognize_ut.cpp39
-rw-r--r--ydb/library/yql/core/sql_types/ut/CMakeLists.darwin-x86_64.txt65
-rw-r--r--ydb/library/yql/core/sql_types/ut/CMakeLists.linux-aarch64.txt70
-rw-r--r--ydb/library/yql/core/sql_types/ut/CMakeLists.linux-x86_64.txt72
-rw-r--r--ydb/library/yql/core/sql_types/ut/CMakeLists.txt17
-rw-r--r--ydb/library/yql/core/sql_types/ut/CMakeLists.windows-x86_64.txt60
-rw-r--r--ydb/library/yql/core/sql_types/ut/ya.make12
-rw-r--r--ydb/library/yql/core/sql_types/ya.make5
-rw-r--r--ydb/library/yql/core/type_ann/type_ann_match_recognize.cpp21
-rw-r--r--ydb/library/yql/core/yql_match_recognize.h28
-rw-r--r--ydb/library/yql/core/yql_opt_match_recognize.cpp1
-rw-r--r--ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp134
-rw-r--r--ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_matched_vars.h1
-rw-r--r--ydb/library/yql/minikql/mkql_program_builder.cpp74
-rw-r--r--ydb/library/yql/minikql/mkql_program_builder.h4
-rw-r--r--ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp23
-rw-r--r--ydb/library/yql/sql/v1/match_recognize.cpp12
-rw-r--r--ydb/library/yql/sql/v1/match_recognize.h27
-rw-r--r--ydb/library/yql/sql/v1/sql_match_recognize.cpp36
-rw-r--r--ydb/library/yql/sql/v1/sql_match_recognize.h6
-rw-r--r--ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp3
-rw-r--r--ydb/library/yql/sql/v1/ut/CMakeLists.darwin-x86_64.txt1
-rw-r--r--ydb/library/yql/sql/v1/ut/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/library/yql/sql/v1/ut/CMakeLists.linux-x86_64.txt1
-rw-r--r--ydb/library/yql/sql/v1/ut/CMakeLists.windows-x86_64.txt1
-rw-r--r--ydb/library/yql/sql/v1/ut/ya.make1
32 files changed, 652 insertions, 115 deletions
diff --git a/ydb/library/yql/core/sql_types/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/core/sql_types/CMakeLists.darwin-x86_64.txt
index a715d1f3c1f..5d0fdac0e62 100644
--- a/ydb/library/yql/core/sql_types/CMakeLists.darwin-x86_64.txt
+++ b/ydb/library/yql/core/sql_types/CMakeLists.darwin-x86_64.txt
@@ -6,6 +6,7 @@
# original buildsystem will not be accepted.
+add_subdirectory(ut)
get_built_tool_path(
TOOL_enum_parser_bin
TOOL_enum_parser_dependency
@@ -20,6 +21,7 @@ target_link_libraries(yql-core-sql_types PUBLIC
tools-enum_parser-enum_serialization_runtime
)
target_sources(yql-core-sql_types PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/match_recognize.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/simple_types.cpp
)
generate_enum_serilization(yql-core-sql_types
diff --git a/ydb/library/yql/core/sql_types/CMakeLists.linux-aarch64.txt b/ydb/library/yql/core/sql_types/CMakeLists.linux-aarch64.txt
index 54472b4296f..f709108d6b2 100644
--- a/ydb/library/yql/core/sql_types/CMakeLists.linux-aarch64.txt
+++ b/ydb/library/yql/core/sql_types/CMakeLists.linux-aarch64.txt
@@ -6,6 +6,7 @@
# original buildsystem will not be accepted.
+add_subdirectory(ut)
get_built_tool_path(
TOOL_enum_parser_bin
TOOL_enum_parser_dependency
@@ -21,6 +22,7 @@ target_link_libraries(yql-core-sql_types PUBLIC
tools-enum_parser-enum_serialization_runtime
)
target_sources(yql-core-sql_types PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/match_recognize.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/simple_types.cpp
)
generate_enum_serilization(yql-core-sql_types
diff --git a/ydb/library/yql/core/sql_types/CMakeLists.linux-x86_64.txt b/ydb/library/yql/core/sql_types/CMakeLists.linux-x86_64.txt
index 54472b4296f..f709108d6b2 100644
--- a/ydb/library/yql/core/sql_types/CMakeLists.linux-x86_64.txt
+++ b/ydb/library/yql/core/sql_types/CMakeLists.linux-x86_64.txt
@@ -6,6 +6,7 @@
# original buildsystem will not be accepted.
+add_subdirectory(ut)
get_built_tool_path(
TOOL_enum_parser_bin
TOOL_enum_parser_dependency
@@ -21,6 +22,7 @@ target_link_libraries(yql-core-sql_types PUBLIC
tools-enum_parser-enum_serialization_runtime
)
target_sources(yql-core-sql_types PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/match_recognize.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/simple_types.cpp
)
generate_enum_serilization(yql-core-sql_types
diff --git a/ydb/library/yql/core/sql_types/CMakeLists.windows-x86_64.txt b/ydb/library/yql/core/sql_types/CMakeLists.windows-x86_64.txt
index a715d1f3c1f..5d0fdac0e62 100644
--- a/ydb/library/yql/core/sql_types/CMakeLists.windows-x86_64.txt
+++ b/ydb/library/yql/core/sql_types/CMakeLists.windows-x86_64.txt
@@ -6,6 +6,7 @@
# original buildsystem will not be accepted.
+add_subdirectory(ut)
get_built_tool_path(
TOOL_enum_parser_bin
TOOL_enum_parser_dependency
@@ -20,6 +21,7 @@ target_link_libraries(yql-core-sql_types PUBLIC
tools-enum_parser-enum_serialization_runtime
)
target_sources(yql-core-sql_types PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/match_recognize.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/simple_types.cpp
)
generate_enum_serilization(yql-core-sql_types
diff --git a/ydb/library/yql/core/sql_types/match_recognize.cpp b/ydb/library/yql/core/sql_types/match_recognize.cpp
new file mode 100644
index 00000000000..5eee7448269
--- /dev/null
+++ b/ydb/library/yql/core/sql_types/match_recognize.cpp
@@ -0,0 +1,23 @@
+#include "match_recognize.h"
+#include <util/generic/string.h>
+#include <util/generic/hash_set.h>
+
+namespace NYql::NMatchRecognize {
+
+THashSet<TString> GetPatternVars(const TRowPattern& pattern) {
+ THashSet<TString> result;
+ for (const auto& term: pattern) {
+ for (const auto& factor: term) {
+ const auto& primary = factor.Primary;
+ if (primary.index() == 0){
+ result.insert(std::get<0>(primary));
+ } else {
+ const auto& vars = GetPatternVars(std::get<1>(primary));
+ result.insert(vars.begin(), vars.end());
+ }
+ }
+ }
+ return result;
+}
+
+}//namespace NYql::NMatchRecognize
diff --git a/ydb/library/yql/core/sql_types/match_recognize.h b/ydb/library/yql/core/sql_types/match_recognize.h
index fc2d6881ff3..dc02eb0302d 100644
--- a/ydb/library/yql/core/sql_types/match_recognize.h
+++ b/ydb/library/yql/core/sql_types/match_recognize.h
@@ -1,6 +1,9 @@
#pragma once
#include <util/generic/string.h>
#include <util/string/cast.h>
+#include <util/generic/vector.h>
+#include <util/generic/hash_set.h>
+#include <variant>
#include <stddef.h>
namespace NYql::NMatchRecognize {
@@ -18,4 +21,22 @@ inline TString MeasureInputDataSpecialColumnName(MeasureInputDataSpecialColumns
return TString("_yql_") + ToString(c);
}
+struct TRowPatternFactor;
+
+using TRowPatternTerm = TVector<TRowPatternFactor>;
+
+using TRowPattern = TVector<TRowPatternTerm>;
+
+using TRowPatternPrimary = std::variant<TString, TRowPattern>;
+
+struct TRowPatternFactor {
+ TRowPatternPrimary Primary;
+ uint64_t QuantityMin;
+ uint64_t QuantityMax;
+ bool Greedy;
+ bool Output; //include in output with ALL ROW PER MATCH
+};
+
+THashSet<TString> GetPatternVars(const TRowPattern&);
+
}//namespace NYql::NMatchRecognize
diff --git a/ydb/library/yql/core/sql_types/match_recognize_ut.cpp b/ydb/library/yql/core/sql_types/match_recognize_ut.cpp
new file mode 100644
index 00000000000..1f1d6811417
--- /dev/null
+++ b/ydb/library/yql/core/sql_types/match_recognize_ut.cpp
@@ -0,0 +1,39 @@
+#include "match_recognize.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NYql::NMatchRecognize;
+
+Y_UNIT_TEST_SUITE(MatchRecognizePattern){
+ auto factorVar = [](const TString & v) { return TRowPatternFactor{v, 0, 0, false, false};};
+ auto factorExpr = [](const TRowPattern& p) { return TRowPatternFactor{p, 0, 0, false, false};};
+ auto expected = [](std::initializer_list<TString> list) { return THashSet<TString>(list); };
+ Y_UNIT_TEST(SingleVarPattern) {
+ const TRowPattern pattern = {{factorVar("A")}};
+ UNIT_ASSERT_VALUES_EQUAL(expected({"A"}), GetPatternVars(pattern));
+ }
+ Y_UNIT_TEST(DistinctVarsPattern) {
+ const TRowPattern pattern = {{factorVar("A"), factorVar("B"), factorVar("C")}};
+ UNIT_ASSERT_VALUES_EQUAL(expected({"A", "B", "C"}), GetPatternVars(pattern));
+ }
+ Y_UNIT_TEST(RepeatedVarsPattern) {
+ const TRowPattern pattern = {{factorVar("A"), factorVar("B"), factorVar("B"), factorVar("C")}};
+ UNIT_ASSERT_VALUES_EQUAL(expected({"A", "B", "C"}), GetPatternVars(pattern));
+ }
+ Y_UNIT_TEST(NestedPattern) {
+ const TRowPattern pattern = {
+ {factorVar("A"), factorVar("B"), factorVar("B"), factorVar("C")},
+ {factorVar("B"), factorVar("C"), factorVar("D"), factorExpr(
+ {{factorExpr({
+ {factorVar("C"), factorVar("D"), factorVar("E")},
+ {factorVar("F")}
+ })}}
+ )
+ },
+ {factorVar("C")},
+ {factorVar("D")},
+ {factorVar("E")}
+ };
+ UNIT_ASSERT_VALUES_EQUAL(expected({"A", "B", "C", "D", "E", "F"}), GetPatternVars(pattern));
+ }
+}
diff --git a/ydb/library/yql/core/sql_types/ut/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/core/sql_types/ut/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 00000000000..f48e3fb5186
--- /dev/null
+++ b/ydb/library/yql/core/sql_types/ut/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,65 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(ydb-library-yql-core-sql_types-ut)
+target_include_directories(ydb-library-yql-core-sql_types-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types
+)
+target_link_libraries(ydb-library-yql-core-sql_types-ut PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ cpp-testing-unittest_main
+ yql-core-sql_types
+)
+target_link_options(ydb-library-yql-core-sql_types-ut PRIVATE
+ -Wl,-platform_version,macos,11.0,11.0
+ -fPIC
+ -fPIC
+)
+target_sources(ydb-library-yql-core-sql_types-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/match_recognize_ut.cpp
+)
+set_property(
+ TARGET
+ ydb-library-yql-core-sql_types-ut
+ PROPERTY
+ SPLIT_FACTOR
+ 1
+)
+add_yunittest(
+ NAME
+ ydb-library-yql-core-sql_types-ut
+ TEST_TARGET
+ ydb-library-yql-core-sql_types-ut
+ TEST_ARG
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-core-sql_types-ut
+ PROPERTY
+ LABELS
+ SMALL
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-core-sql_types-ut
+ PROPERTY
+ PROCESSORS
+ 1
+)
+target_allocator(ydb-library-yql-core-sql_types-ut
+ system_allocator
+)
+vcs_info(ydb-library-yql-core-sql_types-ut)
diff --git a/ydb/library/yql/core/sql_types/ut/CMakeLists.linux-aarch64.txt b/ydb/library/yql/core/sql_types/ut/CMakeLists.linux-aarch64.txt
new file mode 100644
index 00000000000..4e43cbcc6d3
--- /dev/null
+++ b/ydb/library/yql/core/sql_types/ut/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,70 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(ydb-library-yql-core-sql_types-ut)
+target_include_directories(ydb-library-yql-core-sql_types-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types
+)
+target_link_libraries(ydb-library-yql-core-sql_types-ut PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-testing-unittest_main
+ yql-core-sql_types
+)
+target_link_options(ydb-library-yql-core-sql_types-ut PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_sources(ydb-library-yql-core-sql_types-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/match_recognize_ut.cpp
+)
+set_property(
+ TARGET
+ ydb-library-yql-core-sql_types-ut
+ PROPERTY
+ SPLIT_FACTOR
+ 1
+)
+add_yunittest(
+ NAME
+ ydb-library-yql-core-sql_types-ut
+ TEST_TARGET
+ ydb-library-yql-core-sql_types-ut
+ TEST_ARG
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-core-sql_types-ut
+ PROPERTY
+ LABELS
+ SMALL
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-core-sql_types-ut
+ PROPERTY
+ PROCESSORS
+ 1
+)
+target_allocator(ydb-library-yql-core-sql_types-ut
+ cpp-malloc-jemalloc
+)
+vcs_info(ydb-library-yql-core-sql_types-ut)
diff --git a/ydb/library/yql/core/sql_types/ut/CMakeLists.linux-x86_64.txt b/ydb/library/yql/core/sql_types/ut/CMakeLists.linux-x86_64.txt
new file mode 100644
index 00000000000..c360cf20127
--- /dev/null
+++ b/ydb/library/yql/core/sql_types/ut/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,72 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(ydb-library-yql-core-sql_types-ut)
+target_include_directories(ydb-library-yql-core-sql_types-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types
+)
+target_link_libraries(ydb-library-yql-core-sql_types-ut PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ cpp-testing-unittest_main
+ yql-core-sql_types
+)
+target_link_options(ydb-library-yql-core-sql_types-ut PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_sources(ydb-library-yql-core-sql_types-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/match_recognize_ut.cpp
+)
+set_property(
+ TARGET
+ ydb-library-yql-core-sql_types-ut
+ PROPERTY
+ SPLIT_FACTOR
+ 1
+)
+add_yunittest(
+ NAME
+ ydb-library-yql-core-sql_types-ut
+ TEST_TARGET
+ ydb-library-yql-core-sql_types-ut
+ TEST_ARG
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-core-sql_types-ut
+ PROPERTY
+ LABELS
+ SMALL
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-core-sql_types-ut
+ PROPERTY
+ PROCESSORS
+ 1
+)
+target_allocator(ydb-library-yql-core-sql_types-ut
+ cpp-malloc-tcmalloc
+ libs-tcmalloc-no_percpu_cache
+)
+vcs_info(ydb-library-yql-core-sql_types-ut)
diff --git a/ydb/library/yql/core/sql_types/ut/CMakeLists.txt b/ydb/library/yql/core/sql_types/ut/CMakeLists.txt
new file mode 100644
index 00000000000..f8b31df0c11
--- /dev/null
+++ b/ydb/library/yql/core/sql_types/ut/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/ydb/library/yql/core/sql_types/ut/CMakeLists.windows-x86_64.txt b/ydb/library/yql/core/sql_types/ut/CMakeLists.windows-x86_64.txt
new file mode 100644
index 00000000000..6e38dd6b7b6
--- /dev/null
+++ b/ydb/library/yql/core/sql_types/ut/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,60 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(ydb-library-yql-core-sql_types-ut)
+target_include_directories(ydb-library-yql-core-sql_types-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types
+)
+target_link_libraries(ydb-library-yql-core-sql_types-ut PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ cpp-testing-unittest_main
+ yql-core-sql_types
+)
+target_sources(ydb-library-yql-core-sql_types-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/match_recognize_ut.cpp
+)
+set_property(
+ TARGET
+ ydb-library-yql-core-sql_types-ut
+ PROPERTY
+ SPLIT_FACTOR
+ 1
+)
+add_yunittest(
+ NAME
+ ydb-library-yql-core-sql_types-ut
+ TEST_TARGET
+ ydb-library-yql-core-sql_types-ut
+ TEST_ARG
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-core-sql_types-ut
+ PROPERTY
+ LABELS
+ SMALL
+)
+set_yunittest_property(
+ TEST
+ ydb-library-yql-core-sql_types-ut
+ PROPERTY
+ PROCESSORS
+ 1
+)
+target_allocator(ydb-library-yql-core-sql_types-ut
+ system_allocator
+)
+vcs_info(ydb-library-yql-core-sql_types-ut)
diff --git a/ydb/library/yql/core/sql_types/ut/ya.make b/ydb/library/yql/core/sql_types/ut/ya.make
new file mode 100644
index 00000000000..f6f5ddd20d1
--- /dev/null
+++ b/ydb/library/yql/core/sql_types/ut/ya.make
@@ -0,0 +1,12 @@
+UNITTEST_FOR(ydb/library/yql/core/sql_types)
+SRCS(
+ match_recognize_ut.cpp
+)
+
+PEERDIR(
+ ydb/library/yql/core/sql_types
+)
+
+SIZE(SMALL)
+
+END()
diff --git a/ydb/library/yql/core/sql_types/ya.make b/ydb/library/yql/core/sql_types/ya.make
index 2ccf3b837d8..ad018c89957 100644
--- a/ydb/library/yql/core/sql_types/ya.make
+++ b/ydb/library/yql/core/sql_types/ya.make
@@ -2,9 +2,14 @@ LIBRARY()
SRCS(
match_recognize.h
+ match_recognize.cpp
simple_types.h
simple_types.cpp
)
GENERATE_ENUM_SERIALIZATION(match_recognize.h)
END()
+
+RECURSE_FOR_TESTS(
+ ut
+)
diff --git a/ydb/library/yql/core/type_ann/type_ann_match_recognize.cpp b/ydb/library/yql/core/type_ann/type_ann_match_recognize.cpp
index 2f985dec3a5..081d2209cae 100644
--- a/ydb/library/yql/core/type_ann/type_ann_match_recognize.cpp
+++ b/ydb/library/yql/core/type_ann/type_ann_match_recognize.cpp
@@ -1,5 +1,6 @@
#include "type_ann_match_recognize.h"
#include <ydb/library/yql/core/sql_types/match_recognize.h>
+#include <ydb/library/yql/core/yql_match_recognize.h>
namespace NYql::NTypeAnnImpl {
@@ -54,26 +55,8 @@ MatchRecognizeParamsWrapper(const TExprNode::TPtr &input, TExprNode::TPtr &outpu
namespace {
-
-const std::unordered_set<TString> GetPrimaryVars(const TExprNode::TPtr& pattern, TContext &ctx, size_t nestingLevel) {
- std::unordered_set<TString> result;
- for (const auto& term: pattern->Children()) {
- for (const auto& factor: term->Children()) {
- YQL_ENSURE(EnsureArgsCount(*factor, 5, ctx.Expr), "Expect 5 args");
- if (factor->ChildRef(0)->IsAtom()) {
- result.insert(TString(factor->ChildRef(0)->Content()));
- } else {
- YQL_ENSURE(nestingLevel < NYql::NMatchRecognize::MaxPatternNesting, "To big nesting level in the pattern");
- auto subExprVars = GetPrimaryVars(factor->ChildRef(0), ctx, ++nestingLevel);
- result.insert(subExprVars.begin(), subExprVars.end());
- }
- }
- }
- return result;
-}
-
const TStructExprType* GetMatchedRowsRangesType(const TExprNode::TPtr& pattern, TContext &ctx) {
- auto vars = GetPrimaryVars(pattern, ctx, 0);
+ auto vars = GetPatternVars(NYql::NMatchRecognize::ConvertPattern(pattern, ctx.Expr, 0));
TVector<const TItemExprType*> items;
for (const auto& var: vars) {
const auto& item = ctx.Expr.MakeType<TStructExprType>(TVector<const TItemExprType*>{
diff --git a/ydb/library/yql/core/yql_match_recognize.h b/ydb/library/yql/core/yql_match_recognize.h
new file mode 100644
index 00000000000..3afefb06ba7
--- /dev/null
+++ b/ydb/library/yql/core/yql_match_recognize.h
@@ -0,0 +1,28 @@
+#pragma once
+#include <ydb/library/yql/core/sql_types/match_recognize.h>
+#include "ydb/library/yql/ast/yql_expr.h"
+
+namespace NYql::NMatchRecognize {
+
+inline TRowPattern ConvertPattern(const TExprNode::TPtr& pattern, TExprContext &ctx, size_t nestingLevel = 0) {
+ YQL_ENSURE(nestingLevel <= MaxPatternNesting, "To big nesting level in the pattern");
+ TRowPattern result;
+ for (const auto& term: pattern->Children()) {
+ result.push_back(TRowPatternTerm{});
+ for (const auto& factor: term->Children()) {
+ YQL_ENSURE(factor->ChildrenSize() == 5, "Expect 5 args");
+ result.back().push_back(TRowPatternFactor{
+ factor->ChildRef(0)->IsAtom() ?
+ TRowPatternPrimary(TString(factor->ChildRef(0)->Content())) :
+ ConvertPattern(factor->ChildRef(0), ctx, ++nestingLevel),
+ FromString<ui64>(factor->ChildRef(1)->Content()),
+ FromString<ui64>(factor->ChildRef(2)->Content()),
+ FromString<bool>(factor->ChildRef(3)->Content()),
+ FromString<bool>(factor->ChildRef(4)->Content())
+ });
+ }
+ }
+ return result;
+}
+
+} //namespace NYql::NMatchRecognize
diff --git a/ydb/library/yql/core/yql_opt_match_recognize.cpp b/ydb/library/yql/core/yql_opt_match_recognize.cpp
index f7cc70ca3d8..11d5cb08c94 100644
--- a/ydb/library/yql/core/yql_opt_match_recognize.cpp
+++ b/ydb/library/yql/core/yql_opt_match_recognize.cpp
@@ -71,5 +71,4 @@ TExprNode::TPtr ExpandMatchRecognize(const TExprNode::TPtr& node, TExprContext&
return result;
}
-
} //namespace NYql
diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp
index dd42271bdd8..74504eb06f7 100644
--- a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp
+++ b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp
@@ -8,20 +8,20 @@
namespace NKikimr::NMiniKQL {
-namespace {
+namespace NMatchRecognize {
enum class EMeasureColumnSource {Classifier = 0, MatchNumber = 1, Input};
-using TMeasureInputColumnOrder = std::vector<std::pair<EMeasureColumnSource, size_t>>;
+using TMeasureInputColumnOrder = TVector<std::pair<EMeasureColumnSource, size_t>>;
enum class EOutputColumnSource {PartitionKey, Measure};
-using TOutputColumnOrder = std::vector<std::pair<EOutputColumnSource, size_t>>;
+using TOutputColumnOrder = TVector<std::pair<EOutputColumnSource, size_t>>;
-using namespace NMatchRecognize;
+using namespace NYql::NMatchRecognize;
//Process one partition of input data
struct IProcessMatchRecognize {
///return true if it has output data ready
- virtual bool ProcessInputRow(NUdf::TUnboxedValue&& row) = 0;
+ virtual bool ProcessInputRow(NUdf::TUnboxedValue&& row, TComputationContext& ctx) = 0;
virtual NUdf::TUnboxedValue GetOutputIfReady(TComputationContext& ctx) = 0;
virtual bool ProcessEndOfData() = 0;
virtual ~IProcessMatchRecognize(){}
@@ -32,34 +32,42 @@ public:
TStreamMatchRecognize(
NUdf::TUnboxedValue&& partitionKey,
IComputationExternalNode* matchedVarsArg,
- std::vector<IComputationNode*>& measures,
+ TVector<IComputationNode*>& measures,
const TOutputColumnOrder& outputColumnOrder,
+ IComputationExternalNode* currentRowIndexArg,
+ const TVector<IComputationNode*>& defines,
const TContainerCacheOnContext& cache
)
: PartitionKey(std::move(partitionKey))
, MatchedVarsArg(matchedVarsArg)
, Measures(measures)
, OutputColumnOrder(outputColumnOrder)
+ , CurrentRowIndexArg(currentRowIndexArg)
+ , Defines(defines)
, Cache(cache)
- , MatchedVars(2) //Assume pattern (A B B)*, where A matches every 3rd row (i%3 == 0) and B matches the rest
+ , MatchedVars(Defines.size())
, HasMatch(false)
, RowCount(0)
{
}
- bool ProcessInputRow(NUdf::TUnboxedValue&& row) override{
+ bool ProcessInputRow(NUdf::TUnboxedValue&& row, TComputationContext& ctx) override{
Y_UNUSED(row);
- //Assume pattern (A B B)*, where A matches every 3rd row (i%3 == 0) and B matches the rest
- switch (RowCount % 3) {
- case 0:
- MatchedVars[0].push_back({RowCount, RowCount});
- break;
- case 1:
- MatchedVars[1].push_back({RowCount, RowCount});
- break;
- case 2:
- MatchedVars[1].back().second++;
- break;
+ CurrentRowIndexArg->SetValue(ctx, NUdf::TUnboxedValuePod(RowCount));
+ for (size_t i = 0; i != Defines.size(); ++i) {
+ const auto& d = Defines[i]->GetValue(ctx);
+ if (d && d.GetOptionalValue().Get<bool>()) {
+ auto& var = MatchedVars[i];
+ if (var.empty()) {
+ var.emplace_back(RowCount, RowCount);
+ }
+ else if (var.back().second + 1 == RowCount) {
+ ++var.back().second;
+ }
+ else {
+ var.emplace_back(RowCount, RowCount);
+ }
+ }
}
++RowCount;
return HasMatch;
@@ -86,14 +94,17 @@ public:
return result;
}
bool ProcessEndOfData() override {
+ //Assume match at the the end of each partition
HasMatch = true;
return HasMatch;
}
private:
const NUdf::TUnboxedValue PartitionKey;
IComputationExternalNode* const MatchedVarsArg;
- const std::vector<IComputationNode*>& Measures;
+ const TVector<IComputationNode*>& Measures;
const TOutputColumnOrder& OutputColumnOrder;
+ IComputationExternalNode* const CurrentRowIndexArg;
+ const TVector<IComputationNode*>& Defines;
const TContainerCacheOnContext& Cache;
TMatchedVars MatchedVars;
bool HasMatch;
@@ -109,8 +120,10 @@ public:
IComputationNode *partitionKey,
TType* partitionKeyType,
IComputationExternalNode* matchedVarsArg,
- std::vector<IComputationNode*>&& measures,
- TOutputColumnOrder&& outputColumnOrder
+ TVector<IComputationNode*>&& measures,
+ TOutputColumnOrder&& outputColumnOrder,
+ IComputationExternalNode* currentRowIndexArg,
+ const TVector<IComputationNode*>& defines
)
:TBaseComputation(mutables, inputFlow, kind, EValueRepresentation::Embedded)
, InputFlow(inputFlow)
@@ -120,6 +133,8 @@ public:
, MatchedVarsArg(matchedVarsArg)
, Measures(measures)
, OutputColumnOrder(outputColumnOrder)
+ , CurrentRowIndexArg(currentRowIndexArg)
+ , Defines(defines)
, Cache(mutables)
{}
@@ -132,6 +147,8 @@ public:
MatchedVarsArg,
Measures,
OutputColumnOrder,
+ CurrentRowIndexArg,
+ Defines,
Cache
);
}
@@ -161,8 +178,10 @@ private:
IComputationNode* partitionKey,
TType* partitionKeyType,
IComputationExternalNode* matchedVarsArg,
- const std::vector<IComputationNode*>& measures,
+ const TVector<IComputationNode*>& measures,
const TOutputColumnOrder& outputColumnOrder,
+ IComputationExternalNode* currentRowIndexArg,
+ const TVector<IComputationNode*>& defines,
const TContainerCacheOnContext& cache
)
: TComputationValue<TState>(memInfo)
@@ -172,13 +191,15 @@ private:
, MatchedVarsArg(matchedVarsArg)
, Measures(measures)
, OutputColumnOrder(outputColumnOrder)
+ , CurrentRowIndexArg(currentRowIndexArg)
+ , Defines(defines)
, Cache(cache)
{
}
void ProcessInputRow(NUdf::TUnboxedValue&& row, TComputationContext& ctx) {
auto partition = GetPartitionHandler(row, ctx);
- if (partition->second->ProcessInputRow(std::move(row))) {
+ if (partition->second->ProcessInputRow(std::move(row), ctx)) {
HasReadyOutput.push(partition);
}
}
@@ -220,6 +241,8 @@ private:
MatchedVarsArg,
Measures,
OutputColumnOrder,
+ CurrentRowIndexArg,
+ Defines,
Cache
));
}
@@ -238,8 +261,10 @@ private:
//to be passed to partitions
IComputationExternalNode* const MatchedVarsArg;
- std::vector<IComputationNode*> Measures;
+ TVector<IComputationNode*> Measures;
const TOutputColumnOrder& OutputColumnOrder;
+ IComputationExternalNode* const CurrentRowIndexArg;
+ const TVector<IComputationNode*>& Defines;
const TContainerCacheOnContext& Cache;
};
@@ -248,10 +273,14 @@ private:
if (const auto flow = FlowDependsOn(InputFlow)) {
Own(flow, InputRowArg);
Own(flow, MatchedVarsArg);
+ Own(flow, CurrentRowIndexArg);
DependsOn(flow, PartitionKey);
for (auto& m: Measures) {
DependsOn(flow, m);
}
+ for (auto& d: Defines) {
+ DependsOn(flow, d);
+ }
}
}
@@ -260,8 +289,10 @@ private:
IComputationNode* const PartitionKey;
TType* const PartitionKeyType;
IComputationExternalNode* const MatchedVarsArg;
- std::vector<IComputationNode*> Measures;
+ TVector<IComputationNode*> Measures;
TOutputColumnOrder OutputColumnOrder;
+ IComputationExternalNode* const CurrentRowIndexArg;
+ const TVector<IComputationNode*> Defines;
const TContainerCacheOnContext Cache;
};
@@ -291,8 +322,33 @@ TOutputColumnOrder GetOutputColumnOrder(TRuntimeNode partitionKyeColumnsIndexes,
return result;
}
-std::vector<IComputationNode*> ConvertVectorOfCallables(const std::vector<TRuntimeNode>& v, const TComputationNodeFactoryContext& ctx) {
- std::vector<IComputationNode*> result;
+TRowPattern ConvertPattern(const TRuntimeNode& pattern) {
+ TVector<TRowPatternTerm> result;
+ const auto& inputPattern = AS_VALUE(TTupleLiteral, pattern);
+ for (ui32 i = 0; i != inputPattern->GetValuesCount(); ++i) {
+ const auto& inputTerm = AS_VALUE(TTupleLiteral, inputPattern->GetValue(i));
+ TVector<TRowPatternFactor> term;
+ for (ui32 j = 0; j != inputTerm->GetValuesCount(); ++j) {
+ const auto& inputFactor = AS_VALUE(TTupleLiteral, inputTerm->GetValue(j));
+ MKQL_ENSURE(inputFactor->GetValuesCount() == 5, "Internal logic error");
+ const auto& primary = inputFactor->GetValue(0);
+ term.push_back(TRowPatternFactor{
+ primary.IsImmediate() ?
+ TRowPatternPrimary(TString(AS_VALUE(TDataLiteral, primary)->AsValue().AsStringRef())) :
+ ConvertPattern(primary),
+ AS_VALUE(TDataLiteral, inputFactor->GetValue(1))->AsValue().Get<ui64>(),
+ AS_VALUE(TDataLiteral, inputFactor->GetValue(2))->AsValue().Get<ui64>(),
+ AS_VALUE(TDataLiteral, inputFactor->GetValue(3))->AsValue().Get<bool>(),
+ AS_VALUE(TDataLiteral, inputFactor->GetValue(4))->AsValue().Get<bool>()
+ });
+ }
+ result.push_back(std::move(term));
+ }
+ return result;
+}
+
+TVector<IComputationNode*> ConvertVectorOfCallables(const TVector<TRuntimeNode>& v, const TComputationNodeFactoryContext& ctx) {
+ TVector<IComputationNode*> result;
result.reserve(v.size());
for (auto& c: v) {
result.push_back(LocateNode(ctx.NodeLocator, *c.GetNode()));
@@ -300,10 +356,12 @@ std::vector<IComputationNode*> ConvertVectorOfCallables(const std::vector<TRunti
return result;
}
-} //namespace
+} //namespace NMatchRecognize
+
IComputationNode* WrapMatchRecognizeCore(TCallable& callable, const TComputationNodeFactoryContext& ctx) {
+ using namespace NMatchRecognize;
size_t inputIndex = 0;
const auto& inputFlow = callable.GetInput(inputIndex++);
const auto& inputRowArg = callable.GetInput(inputIndex++);
@@ -314,16 +372,28 @@ IComputationNode* WrapMatchRecognizeCore(TCallable& callable, const TComputation
const auto& inputRowColumnCount = callable.GetInput(inputIndex++);
const auto& matchedVarsArg = callable.GetInput(inputIndex++);
const auto& measureColumnIndexes = callable.GetInput(inputIndex++);
- std::vector<TRuntimeNode> measures;
+ TVector<TRuntimeNode> measures;
for (size_t i = 0; i != AS_VALUE(TListLiteral, measureColumnIndexes)->GetItemsCount(); ++i) {
measures.push_back(callable.GetInput(inputIndex++));
}
+ const auto& pattern = callable.GetInput(inputIndex++);
+ const auto& currentRowIndexArg = callable.GetInput(inputIndex++);
+ const auto& defineInputDataArg = callable.GetInput(inputIndex++);
+ const auto& defineNames = callable.GetInput(inputIndex++);
+ TVector<TRuntimeNode> defines ;
+ for (size_t i = 0; i != AS_VALUE(TListLiteral, defineNames)->GetItemsCount(); ++i) {
+ defines.push_back(callable.GetInput(inputIndex++));
+ }
MKQL_ENSURE(callable.GetInputsCount() == inputIndex, "Wrong input count");
+
Y_UNUSED(measureInputDataArg);
Y_UNUSED(measureSpecialColumnIndexes);
Y_UNUSED(inputRowColumnCount);
+ Y_UNUSED(pattern);
+ Y_UNUSED(defineInputDataArg);
- return new TMatchRecognizeWrapper(ctx.Mutables, GetValueRepresentation(inputFlow.GetStaticType())
+ return new TMatchRecognizeWrapper(ctx.Mutables
+ , GetValueRepresentation(inputFlow.GetStaticType())
, LocateNode(ctx.NodeLocator, *inputFlow.GetNode())
, static_cast<IComputationExternalNode*>(LocateNode(ctx.NodeLocator, *inputRowArg.GetNode()))
, LocateNode(ctx.NodeLocator, *partitionKeySelector.GetNode())
@@ -331,6 +401,8 @@ IComputationNode* WrapMatchRecognizeCore(TCallable& callable, const TComputation
, static_cast<IComputationExternalNode*>(LocateNode(ctx.NodeLocator, *matchedVarsArg.GetNode()))
, ConvertVectorOfCallables(measures, ctx)
, GetOutputColumnOrder(partitionColumnIndexes, measureColumnIndexes)
+ , static_cast<IComputationExternalNode*>(LocateNode(ctx.NodeLocator, *currentRowIndexArg.GetNode()))
+ , ConvertVectorOfCallables(defines, ctx)
);
}
diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_matched_vars.h b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_matched_vars.h
index 91ae930f6f8..cb585571934 100644
--- a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_matched_vars.h
+++ b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_matched_vars.h
@@ -92,4 +92,5 @@ public:
private:
const std::vector<TMatchedVar>& Vars;
};
+
}//namespace NKikimr::NMiniKQL::NMatchRecognize
diff --git a/ydb/library/yql/minikql/mkql_program_builder.cpp b/ydb/library/yql/minikql/mkql_program_builder.cpp
index 27be8ff034e..0904c0d3b93 100644
--- a/ydb/library/yql/minikql/mkql_program_builder.cpp
+++ b/ydb/library/yql/minikql/mkql_program_builder.cpp
@@ -5791,11 +5791,39 @@ TRuntimeNode TProgramBuilder::ScalarApply(const TArrayRef<const TRuntimeNode>& a
return TRuntimeNode(builder.Build(), false);
}
+namespace {
+using namespace NYql::NMatchRecognize;
+TRuntimeNode PatternToRuntimeNode(const TRowPattern& pattern, const TProgramBuilder& programBuilder) {
+ const auto& env = programBuilder.GetTypeEnvironment();
+ TTupleLiteralBuilder patternBuilder(env);
+ for (const auto& term: pattern) {
+ TTupleLiteralBuilder termBuilder(env);
+ for (const auto& factor: term) {
+ TStructLiteralBuilder factorBuilder(env);
+ factorBuilder.Add("Primary", factor.Primary.index() == 0 ?
+ programBuilder.NewDataLiteral<NUdf::EDataSlot::String>(std::get<0>(factor.Primary)) :
+ PatternToRuntimeNode(std::get<1>(factor.Primary), programBuilder)
+ );
+ factorBuilder.Add("QuantityMin", programBuilder.NewDataLiteral<ui64>(factor.QuantityMin));
+ factorBuilder.Add("QuantityMax", programBuilder.NewDataLiteral<ui64>(factor.QuantityMax));
+ factorBuilder.Add("Greedy", programBuilder.NewDataLiteral<bool>(factor.Greedy));
+ factorBuilder.Add("Output", programBuilder.NewDataLiteral<bool>(factor.Output));
+ termBuilder.Add({factorBuilder.Build(), true});
+ }
+ patternBuilder.Add({termBuilder.Build(), true});
+ }
+ return {patternBuilder.Build(), true};
+};
+
+} //namespace
+
TRuntimeNode TProgramBuilder::MatchRecognizeCore(
TRuntimeNode inputStream,
const TUnaryLambda& getPartitionKeySelectorNode,
const TArrayRef<TStringBuf>& partitionColumns,
- const TArrayRef<std::pair<TStringBuf, TBinaryLambda>>& getMeasures
+ const TArrayRef<std::pair<TStringBuf, TBinaryLambda>>& getMeasures,
+ const NYql::NMatchRecognize::TRowPattern& pattern,
+ const TArrayRef<std::pair<TStringBuf, TTernaryLambda>>& getDefines
) {
MKQL_ENSURE(RuntimeVersion >= 42, "MatchRecognize is not supported in runtime version " << RuntimeVersion);
@@ -5808,8 +5836,9 @@ TRuntimeNode TProgramBuilder::MatchRecognizeCore(
indexRangeTypeBuilder.Add("To", TDataType::Create(NUdf::TDataType<ui64>::Id, Env));
const auto& rangeList = TListType::Create(indexRangeTypeBuilder.Build(), Env);
TStructTypeBuilder matchedVarsTypeBuilder(Env);
- //assume simple pattern with one var "A" that always match TODO fixme
- matchedVarsTypeBuilder.Add("A", rangeList);
+ for (const auto& var: GetPatternVars(pattern)) {
+ matchedVarsTypeBuilder.Add(var, rangeList);
+ }
TRuntimeNode matchedVarsArg = Arg(matchedVarsTypeBuilder.Build());
//---These vars may be empty in case of no measures
@@ -5889,6 +5918,35 @@ TRuntimeNode TProgramBuilder::MatchRecognizeCore(
}
auto outputType = (TType*)TFlowType::Create(outputRowType, Env);
+ THashMap<TStringBuf , size_t> patternVarLookup;
+ for (ui32 i = 0; i != AS_TYPE(TStructType, matchedVarsArg.GetStaticType())->GetMembersCount(); ++i){
+ patternVarLookup[AS_TYPE(TStructType, matchedVarsArg.GetStaticType())->GetMemberName(i)] = i;
+ }
+
+ THashMap<TStringBuf , size_t> defineLookup;
+ for (size_t i = 0; i != getDefines.size(); ++i) {
+ defineLookup[getDefines[i].first] = i;
+ }
+ std::vector<TRuntimeNode> defineNames(patternVarLookup.size());
+ std::vector<TRuntimeNode> defineNodes(patternVarLookup.size());
+
+ const auto& defineInputDataArg = Arg(TListType::Create(inputRowType, Env));
+ const auto& currentRowIndexArg = Arg(TDataType::Create(NUdf::TDataType<ui64>::Id, Env));
+ for (const auto& [v, i]: patternVarLookup) {
+ defineNames[i] = NewDataLiteral<NUdf::EDataSlot::String>(v);
+ if (const auto it = defineLookup.find(v); it != defineLookup.end()) {
+ defineNodes[i] = getDefines[it->second].second(defineInputDataArg, matchedVarsArg, currentRowIndexArg);
+ }
+ else { //no predicate for var
+ if ("$" == v || "^" == v) {
+ //DO nothing, //will be handled in a specific way
+ }
+ else { // a var without a predicate matches any row
+ defineNodes[i] = NewDataLiteral<bool>(true);
+ }
+ }
+ }
+
TCallableBuilder callableBuilder(GetTypeEnvironment(), "MatchRecognizeCore", outputType);
auto indexType = TDataType::Create(NUdf::TDataType<ui32>::Id, Env);
auto indexListType = TListType::Create(indexType, Env);
@@ -5908,6 +5966,16 @@ TRuntimeNode TProgramBuilder::MatchRecognizeCore(
for (const auto& m: measures) {
callableBuilder.Add(m);
}
+
+ callableBuilder.Add(PatternToRuntimeNode(pattern, *this));
+
+ callableBuilder.Add(currentRowIndexArg);
+ callableBuilder.Add(defineInputDataArg);
+ const auto stringType = NewDataType(NUdf::EDataSlot::String);
+ callableBuilder.Add(TRuntimeNode(TListLiteral::Create(defineNames.begin(), defineNames.size(), TListType::Create(stringType, Env), Env), true));
+ for (const auto& d: defineNodes) {
+ callableBuilder.Add(d);
+ }
return TRuntimeNode(callableBuilder.Build(), false);
}
diff --git a/ydb/library/yql/minikql/mkql_program_builder.h b/ydb/library/yql/minikql/mkql_program_builder.h
index 92aefa416c6..3e9a951327f 100644
--- a/ydb/library/yql/minikql/mkql_program_builder.h
+++ b/ydb/library/yql/minikql/mkql_program_builder.h
@@ -700,7 +700,9 @@ public:
TRuntimeNode inputStream,
const TUnaryLambda& getPartitionKeySelectorNode,
const TArrayRef<TStringBuf>& partitionColumns,
- const TArrayRef<std::pair<TStringBuf, TBinaryLambda>>& getMeasures
+ const TArrayRef<std::pair<TStringBuf, TBinaryLambda>>& getMeasures,
+ const NYql::NMatchRecognize::TRowPattern& pattern,
+ const TArrayRef<std::pair<TStringBuf, TTernaryLambda>>& getDefines
);
protected:
diff --git a/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp b/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp
index ce0b02f4dad..a3ede6f1668 100644
--- a/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp
+++ b/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp
@@ -5,6 +5,8 @@
#include <ydb/library/yql/core/yql_expr_type_annotation.h>
#include <ydb/library/yql/core/expr_nodes/yql_expr_nodes.h>
#include <ydb/library/yql/core/yql_expr_type_annotation.h>
+#include <ydb/library/yql/core/yql_match_recognize.h>
+
#include <ydb/library/yql/core/yql_join.h>
#include <ydb/library/yql/core/yql_opt_utils.h>
@@ -860,11 +862,17 @@ TMkqlCommonCallableCompiler::TShared::TShared() {
//explore params
const auto& measures = params->ChildRef(0);
+ const auto& pattern = params->ChildRef(3);
+ const auto& defines = params->ChildRef(4);
//explore measures
const auto measureNames = measures->ChildRef(2);
constexpr size_t FirstMeasureLambdaIndex = 3;
+ //explore defines
+ const auto defineNames = defines->ChildRef(2);
+ const size_t FirstDefineLambdaIndex = 3;
+
TVector<TStringBuf> partitionColumnNames;
for (const auto& n: partitionColumns->Children()) {
partitionColumnNames.push_back(n->Content());
@@ -874,6 +882,17 @@ TMkqlCommonCallableCompiler::TShared::TShared() {
return MkqlBuildLambda(*partitionKeySelector, ctx, {inputRowArg});
};
+ TVector<std::pair<TStringBuf, TProgramBuilder::TTernaryLambda>> getDefines(defineNames->ChildrenSize());
+ for (size_t i = 0; i != defineNames->ChildrenSize(); ++i) {
+ getDefines[i] = std::pair{
+ defineNames->ChildRef(i)->Content(),
+ [i, defines, &ctx](TRuntimeNode data, TRuntimeNode matchedVars, TRuntimeNode rowIndex) {
+ return MkqlBuildLambda(*defines->ChildRef(FirstDefineLambdaIndex + i), ctx,
+ {data, matchedVars, rowIndex});
+ }
+ };
+ }
+
TVector<std::pair<TStringBuf, TProgramBuilder::TBinaryLambda>> getMeasures(measureNames->ChildrenSize());
for (size_t i = 0; i != measureNames->ChildrenSize(); ++i) {
getMeasures[i] = std::pair{
@@ -889,7 +908,9 @@ TMkqlCommonCallableCompiler::TShared::TShared() {
MkqlBuildExpr(*inputStream, ctx),
getPartitionKeySelector,
partitionColumnNames,
- getMeasures
+ getMeasures,
+ NYql::NMatchRecognize::ConvertPattern(pattern, ctx.ExprCtx),
+ getDefines
);
});
diff --git a/ydb/library/yql/sql/v1/match_recognize.cpp b/ydb/library/yql/sql/v1/match_recognize.cpp
index 54e52aee634..5906a709ad7 100644
--- a/ydb/library/yql/sql/v1/match_recognize.cpp
+++ b/ydb/library/yql/sql/v1/match_recognize.cpp
@@ -14,7 +14,7 @@ public:
std::pair<TPosition, TVector<TNamedLambda>>&& measures,
std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch,
std::pair<TPosition, TAfterMatchSkipTo>&& skipTo,
- std::pair<TPosition, TRowPatternPtr>&& pattern,
+ std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern,
std::pair<TPosition, TNodePtr>&& subset,
std::pair<TPosition, TVector<TNamedLambda>>&& definitions
): TAstListNode(pos, {BuildAtom(pos, "block")})
@@ -43,7 +43,7 @@ private:
std::pair<TPosition, TVector<TNamedLambda>>&& measures,
std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch,
std::pair<TPosition, TAfterMatchSkipTo>&& skipTo,
- std::pair<TPosition, TRowPatternPtr>&& pattern,
+ std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern,
std::pair<TPosition, TNodePtr>&& subset,
std::pair<TPosition, TVector<TNamedLambda>>&& definitions
) {
@@ -110,7 +110,7 @@ private:
));
}
- TPtr PatternFactor(const TPosition& pos, const TRowPatternFactor& factor) {
+ TPtr PatternFactor(const TPosition& pos, const NYql::NMatchRecognize::TRowPatternFactor& factor) {
return BuildTuple(pos, {
factor.Primary.index() == 0 ?
BuildQuotedAtom(pos, std::get<0>(factor.Primary)) :
@@ -123,16 +123,16 @@ private:
}
- TPtr PatternTerm(const TPosition& pos, const TRowPatternTerm& term) {
+ TPtr PatternTerm(const TPosition& pos, const NYql::NMatchRecognize::TRowPatternTerm& term) {
auto factors = Y();
for (const auto& f: term)
factors->Add(PatternFactor(pos, f));
return Q(std::move(factors));
}
- TPtr Pattern(const TPosition& pos, const TRowPatternPtr& pattern) {
+ TPtr Pattern(const TPosition& pos, const NYql::NMatchRecognize::TRowPattern& pattern) {
TNodePtr patternNode = Y("MatchRecognizePattern");
- for (const auto& t: pattern->Terms) {
+ for (const auto& t: pattern) {
patternNode->Add(PatternTerm(pos, t));
}
return patternNode;
diff --git a/ydb/library/yql/sql/v1/match_recognize.h b/ydb/library/yql/sql/v1/match_recognize.h
index 190849dab1f..101ac432c49 100644
--- a/ydb/library/yql/sql/v1/match_recognize.h
+++ b/ydb/library/yql/sql/v1/match_recognize.h
@@ -1,5 +1,6 @@
#pragma once
#include "node.h"
+#include <ydb/library/yql/core/sql_types/match_recognize.h>
#include <util/generic/ptr.h>
namespace NSQLTranslationV1 {
@@ -36,26 +37,6 @@ struct TAfterMatchSkipTo {
TString Var;
};
-struct TRowPattern;
-
-using TRowPatternPtr = std::unique_ptr<TRowPattern>;
-
-using TRowPatternPrimary = std::variant<TString, TRowPatternPtr>;
-
-struct TRowPatternFactor{
- TRowPatternPrimary Primary;
- uint64_t QuantityMin;
- uint64_t QuantityMax;
- bool Greedy;
- bool Output; //include in output with ALL ROW PER MATCH
-};
-
-using TRowPatternTerm = std::vector<TRowPatternFactor>;
-
-struct TRowPattern {
- std::vector<TRowPatternTerm> Terms;
-};
-
class TMatchRecognizeBuilder: public TSimpleRefCount<TMatchRecognizeBuilder> {
public:
TMatchRecognizeBuilder(
@@ -65,7 +46,7 @@ public:
std::pair<TPosition, TVector<TNamedLambda>>&& measures,
std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch,
std::pair<TPosition, TAfterMatchSkipTo>&& skipTo,
- std::pair<TPosition, TRowPatternPtr>&& pattern,
+ std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern,
std::pair<TPosition, TNodePtr>&& subset,
std::pair<TPosition, TVector<TNamedLambda>>&& definitions
)
@@ -88,11 +69,11 @@ private:
std::pair<TPosition, TVector<TNamedLambda>> Measures;
std::pair<TPosition, ERowsPerMatch> RowsPerMatch;
std::pair<TPosition, TAfterMatchSkipTo> SkipTo;
- std::pair<TPosition, TRowPatternPtr> Pattern;
+ std::pair<TPosition, NYql::NMatchRecognize::TRowPattern> Pattern;
std::pair<TPosition, TNodePtr> Subset;
std::pair<TPosition, TVector<TNamedLambda>> Definitions;
};
using TMatchRecognizeBuilderPtr=TIntrusivePtr<TMatchRecognizeBuilder> ;
-} // namespace NSQLTranslationV1 \ No newline at end of file
+} // namespace NSQLTranslationV1
diff --git a/ydb/library/yql/sql/v1/sql_match_recognize.cpp b/ydb/library/yql/sql/v1/sql_match_recognize.cpp
index 3b05ce64093..a0413a6eb95 100644
--- a/ydb/library/yql/sql/v1/sql_match_recognize.cpp
+++ b/ydb/library/yql/sql/v1/sql_match_recognize.cpp
@@ -17,22 +17,6 @@ TString PatternVar(const TRule_row_pattern_variable_name& node, TSqlMatchRecogni
return Id(node.GetRule_identifier1(), ctx);
}
-std::unordered_set<TString> GetAllPatternVars(const TRowPatternPtr& pattern){
- std::unordered_set<TString> result;
- for (const auto& t: pattern->Terms) {
- for (const auto& f: t) {
- if (f.Primary.index() == 0) {
- result.insert(std::get<0>(f.Primary));
- }
- else {
- auto nested = GetAllPatternVars(std::get<1>(f.Primary));
- result.insert(nested.cbegin(), nested.cend());
- }
- }
- }
- return result;
-}
-
} //namespace
TMatchRecognizeBuilderPtr TSqlMatchRecognizeClause::CreateBuilder(const NSQLv1Generated::TRule_row_pattern_recognition_clause &matchRecognizeClause) {
@@ -101,7 +85,7 @@ TMatchRecognizeBuilderPtr TSqlMatchRecognizeClause::CreateBuilder(const NSQLv1Ge
EAfterMatchSkipTo::ToLast == skipTo.second.To ||
EAfterMatchSkipTo::To == skipTo.second.To;
if (varRequired) {
- const auto& allVars = GetAllPatternVars(pattern);
+ const auto& allVars = NYql::NMatchRecognize::GetPatternVars(pattern);
if (allVars.find(skipTo.second.Var) == allVars.cend()) {
Ctx.Error(skipTo.first) << "Unknown pattern variable in AFTER MATCH";
return {};
@@ -234,12 +218,12 @@ std::pair<TPosition, TAfterMatchSkipTo> TSqlMatchRecognizeClause::ParseAfterMatc
}
}
-TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTerm(const TRule_row_pattern_term& node){
- TRowPatternTerm term;
+NYql::NMatchRecognize::TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTerm(const TRule_row_pattern_term& node){
+ NYql::NMatchRecognize::TRowPatternTerm term;
TPosition pos;
for (const auto& factor: node.GetBlock1()) {
const auto& primaryVar = factor.GetRule_row_pattern_factor1().GetRule_row_pattern_primary1();
- TRowPatternPrimary primary;
+ NYql::NMatchRecognize::TRowPatternPrimary primary;
bool output = true;
switch(primaryVar.GetAltCase()){
case TRule_row_pattern_primary::kAltRowPatternPrimary1:
@@ -259,7 +243,7 @@ TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTerm(const TRule_row_patte
} else {
Ctx.Error(TokenPosition(primaryVar.GetAlt_row_pattern_primary4().GetToken1()))
<< "To big nesting level in the pattern";
- return TRowPatternTerm{};
+ return NYql::NMatchRecognize::TRowPatternTerm{};
}
break;
}
@@ -318,17 +302,17 @@ TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTerm(const TRule_row_patte
Y_FAIL("You should change implementation according to grammar changes");
}
}
- term.push_back(TRowPatternFactor{std::move(primary), quantityMin, quantityMax, greedy, output});
+ term.push_back(NYql::NMatchRecognize::TRowPatternFactor{std::move(primary), quantityMin, quantityMax, greedy, output});
}
return term;
}
-TRowPatternPtr TSqlMatchRecognizeClause::ParsePattern(const TRule_row_pattern& node){
- TVector<TRowPatternTerm> result;
- result.emplace_back(ParsePatternTerm(node.GetRule_row_pattern_term1()));
+NYql::NMatchRecognize::TRowPattern TSqlMatchRecognizeClause::ParsePattern(const TRule_row_pattern& node){
+ TVector<NYql::NMatchRecognize::TRowPatternTerm> result;
+ result.push_back(ParsePatternTerm(node.GetRule_row_pattern_term1()));
for (const auto& term: node.GetBlock2())
result.push_back(ParsePatternTerm(term.GetRule_row_pattern_term2()));
- return std::make_unique<TRowPattern>(TRowPattern{std::move(result)});
+ return result;
}
TNamedLambda TSqlMatchRecognizeClause::ParseOneDefinition(const TRule_row_pattern_definition& node){
diff --git a/ydb/library/yql/sql/v1/sql_match_recognize.h b/ydb/library/yql/sql/v1/sql_match_recognize.h
index 65a49d2162d..cd32223675b 100644
--- a/ydb/library/yql/sql/v1/sql_match_recognize.h
+++ b/ydb/library/yql/sql/v1/sql_match_recognize.h
@@ -19,12 +19,12 @@ private:
TVector<TNamedLambda> ParseMeasures(const TRule_row_pattern_measure_list& node);
std::pair<TPosition, ERowsPerMatch> ParseRowsPerMatch(const TRule_row_pattern_rows_per_match& rowsPerMatchClause);
std::pair<TPosition, TAfterMatchSkipTo> ParseAfterMatchSkipTo(const TRule_row_pattern_skip_to& skipToClause);
- TRowPatternTerm ParsePatternTerm(const TRule_row_pattern_term& node);
- TRowPatternPtr ParsePattern(const TRule_row_pattern& node);
+ NYql::NMatchRecognize::TRowPatternTerm ParsePatternTerm(const TRule_row_pattern_term& node);
+ NYql::NMatchRecognize::TRowPattern ParsePattern(const TRule_row_pattern& node);
TNamedLambda ParseOneDefinition(const TRule_row_pattern_definition& node);
TVector<TNamedLambda> ParseDefinitions(const TRule_row_pattern_definition_list& node);
private:
size_t PatternNestingLevel = 0;
};
-} // namespace NSQLTranslationV1 \ No newline at end of file
+} // namespace NSQLTranslationV1
diff --git a/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp b/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp
index c66c96d79e3..2606380b05e 100644
--- a/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp
+++ b/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp
@@ -1,6 +1,7 @@
#include "sql_ut.h"
#include "match_recognize.h"
#include <ydb/library/yql/providers/common/provider/yql_provider_names.h>
+#include <ydb/library/yql/core/sql_types/match_recognize.h>
#include <ydb/library/yql/sql/sql.h>
#include <util/generic/map.h>
@@ -426,7 +427,7 @@ FROM Input MATCH_RECOGNIZE(
auto getTheFactor = [](const NYql::TAstNode* root) {
const auto& patternCallable = FindMatchRecognizeParam(root, "pattern");
const auto& factor = patternCallable->GetChild(1)->GetChild(1)->GetChild(0)->GetChild(1);
- return NSQLTranslationV1::TRowPatternFactor{
+ return NYql::NMatchRecognize::TRowPatternFactor{
TString(), //primary var or subexpression, not used in this test
FromString<uint64_t>(factor->GetChild(1)->GetChild(1)->GetContent()), //QuantityMin
FromString<uint64_t>(factor->GetChild(2)->GetChild(1)->GetContent()), //QuantityMax
diff --git a/ydb/library/yql/sql/v1/ut/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/sql/v1/ut/CMakeLists.darwin-x86_64.txt
index 17379347b27..80dcef14ef0 100644
--- a/ydb/library/yql/sql/v1/ut/CMakeLists.darwin-x86_64.txt
+++ b/ydb/library/yql/sql/v1/ut/CMakeLists.darwin-x86_64.txt
@@ -18,6 +18,7 @@ target_link_libraries(ydb-library-yql-sql-v1-ut PUBLIC
cpp-testing-unittest_main
yql-sql-v1
udf-service-exception_policy
+ yql-core-sql_types
library-yql-sql
yql-sql-pg_dummy
sql-v1-format
diff --git a/ydb/library/yql/sql/v1/ut/CMakeLists.linux-aarch64.txt b/ydb/library/yql/sql/v1/ut/CMakeLists.linux-aarch64.txt
index e752dcf4475..54041a09026 100644
--- a/ydb/library/yql/sql/v1/ut/CMakeLists.linux-aarch64.txt
+++ b/ydb/library/yql/sql/v1/ut/CMakeLists.linux-aarch64.txt
@@ -18,6 +18,7 @@ target_link_libraries(ydb-library-yql-sql-v1-ut PUBLIC
cpp-testing-unittest_main
yql-sql-v1
udf-service-exception_policy
+ yql-core-sql_types
library-yql-sql
yql-sql-pg_dummy
sql-v1-format
diff --git a/ydb/library/yql/sql/v1/ut/CMakeLists.linux-x86_64.txt b/ydb/library/yql/sql/v1/ut/CMakeLists.linux-x86_64.txt
index cbe9969c009..0cda4c5e185 100644
--- a/ydb/library/yql/sql/v1/ut/CMakeLists.linux-x86_64.txt
+++ b/ydb/library/yql/sql/v1/ut/CMakeLists.linux-x86_64.txt
@@ -19,6 +19,7 @@ target_link_libraries(ydb-library-yql-sql-v1-ut PUBLIC
cpp-testing-unittest_main
yql-sql-v1
udf-service-exception_policy
+ yql-core-sql_types
library-yql-sql
yql-sql-pg_dummy
sql-v1-format
diff --git a/ydb/library/yql/sql/v1/ut/CMakeLists.windows-x86_64.txt b/ydb/library/yql/sql/v1/ut/CMakeLists.windows-x86_64.txt
index 2352e1c720f..0e8469627af 100644
--- a/ydb/library/yql/sql/v1/ut/CMakeLists.windows-x86_64.txt
+++ b/ydb/library/yql/sql/v1/ut/CMakeLists.windows-x86_64.txt
@@ -18,6 +18,7 @@ target_link_libraries(ydb-library-yql-sql-v1-ut PUBLIC
cpp-testing-unittest_main
yql-sql-v1
udf-service-exception_policy
+ yql-core-sql_types
library-yql-sql
yql-sql-pg_dummy
sql-v1-format
diff --git a/ydb/library/yql/sql/v1/ut/ya.make b/ydb/library/yql/sql/v1/ut/ya.make
index c3a04bc05c5..0d1b12449bf 100644
--- a/ydb/library/yql/sql/v1/ut/ya.make
+++ b/ydb/library/yql/sql/v1/ut/ya.make
@@ -7,6 +7,7 @@ SRCS(
PEERDIR(
ydb/library/yql/public/udf/service/exception_policy
+ ydb/library/yql/core/sql_types
ydb/library/yql/sql
ydb/library/yql/sql/pg_dummy
ydb/library/yql/sql/v1/format