diff options
author | zverevgeny <zverevgeny@ydb.tech> | 2023-09-01 16:42:24 +0300 |
---|---|---|
committer | zverevgeny <zverevgeny@ydb.tech> | 2023-09-01 17:13:20 +0300 |
commit | cb9faf27a068c2b006c6711252fdebc20a9377dd (patch) | |
tree | 8ccab2c9a118f9e854da7c10c89f77454e189d4d | |
parent | 09a74b181d913ec129c5af5d071fca27b5ac62e6 (diff) | |
download | ydb-cb9faf27a068c2b006c6711252fdebc20a9377dd.tar.gz |
YQL-16325 MATCH_RECOGNIZE, pass defines & pattern to computation
32 files changed, 652 insertions, 115 deletions
diff --git a/ydb/library/yql/core/sql_types/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/core/sql_types/CMakeLists.darwin-x86_64.txt index a715d1f3c1f..5d0fdac0e62 100644 --- a/ydb/library/yql/core/sql_types/CMakeLists.darwin-x86_64.txt +++ b/ydb/library/yql/core/sql_types/CMakeLists.darwin-x86_64.txt @@ -6,6 +6,7 @@ # original buildsystem will not be accepted. +add_subdirectory(ut) get_built_tool_path( TOOL_enum_parser_bin TOOL_enum_parser_dependency @@ -20,6 +21,7 @@ target_link_libraries(yql-core-sql_types PUBLIC tools-enum_parser-enum_serialization_runtime ) target_sources(yql-core-sql_types PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/match_recognize.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/simple_types.cpp ) generate_enum_serilization(yql-core-sql_types diff --git a/ydb/library/yql/core/sql_types/CMakeLists.linux-aarch64.txt b/ydb/library/yql/core/sql_types/CMakeLists.linux-aarch64.txt index 54472b4296f..f709108d6b2 100644 --- a/ydb/library/yql/core/sql_types/CMakeLists.linux-aarch64.txt +++ b/ydb/library/yql/core/sql_types/CMakeLists.linux-aarch64.txt @@ -6,6 +6,7 @@ # original buildsystem will not be accepted. +add_subdirectory(ut) get_built_tool_path( TOOL_enum_parser_bin TOOL_enum_parser_dependency @@ -21,6 +22,7 @@ target_link_libraries(yql-core-sql_types PUBLIC tools-enum_parser-enum_serialization_runtime ) target_sources(yql-core-sql_types PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/match_recognize.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/simple_types.cpp ) generate_enum_serilization(yql-core-sql_types diff --git a/ydb/library/yql/core/sql_types/CMakeLists.linux-x86_64.txt b/ydb/library/yql/core/sql_types/CMakeLists.linux-x86_64.txt index 54472b4296f..f709108d6b2 100644 --- a/ydb/library/yql/core/sql_types/CMakeLists.linux-x86_64.txt +++ b/ydb/library/yql/core/sql_types/CMakeLists.linux-x86_64.txt @@ -6,6 +6,7 @@ # original buildsystem will not be accepted. +add_subdirectory(ut) get_built_tool_path( TOOL_enum_parser_bin TOOL_enum_parser_dependency @@ -21,6 +22,7 @@ target_link_libraries(yql-core-sql_types PUBLIC tools-enum_parser-enum_serialization_runtime ) target_sources(yql-core-sql_types PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/match_recognize.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/simple_types.cpp ) generate_enum_serilization(yql-core-sql_types diff --git a/ydb/library/yql/core/sql_types/CMakeLists.windows-x86_64.txt b/ydb/library/yql/core/sql_types/CMakeLists.windows-x86_64.txt index a715d1f3c1f..5d0fdac0e62 100644 --- a/ydb/library/yql/core/sql_types/CMakeLists.windows-x86_64.txt +++ b/ydb/library/yql/core/sql_types/CMakeLists.windows-x86_64.txt @@ -6,6 +6,7 @@ # original buildsystem will not be accepted. +add_subdirectory(ut) get_built_tool_path( TOOL_enum_parser_bin TOOL_enum_parser_dependency @@ -20,6 +21,7 @@ target_link_libraries(yql-core-sql_types PUBLIC tools-enum_parser-enum_serialization_runtime ) target_sources(yql-core-sql_types PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/match_recognize.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/simple_types.cpp ) generate_enum_serilization(yql-core-sql_types diff --git a/ydb/library/yql/core/sql_types/match_recognize.cpp b/ydb/library/yql/core/sql_types/match_recognize.cpp new file mode 100644 index 00000000000..5eee7448269 --- /dev/null +++ b/ydb/library/yql/core/sql_types/match_recognize.cpp @@ -0,0 +1,23 @@ +#include "match_recognize.h" +#include <util/generic/string.h> +#include <util/generic/hash_set.h> + +namespace NYql::NMatchRecognize { + +THashSet<TString> GetPatternVars(const TRowPattern& pattern) { + THashSet<TString> result; + for (const auto& term: pattern) { + for (const auto& factor: term) { + const auto& primary = factor.Primary; + if (primary.index() == 0){ + result.insert(std::get<0>(primary)); + } else { + const auto& vars = GetPatternVars(std::get<1>(primary)); + result.insert(vars.begin(), vars.end()); + } + } + } + return result; +} + +}//namespace NYql::NMatchRecognize diff --git a/ydb/library/yql/core/sql_types/match_recognize.h b/ydb/library/yql/core/sql_types/match_recognize.h index fc2d6881ff3..dc02eb0302d 100644 --- a/ydb/library/yql/core/sql_types/match_recognize.h +++ b/ydb/library/yql/core/sql_types/match_recognize.h @@ -1,6 +1,9 @@ #pragma once #include <util/generic/string.h> #include <util/string/cast.h> +#include <util/generic/vector.h> +#include <util/generic/hash_set.h> +#include <variant> #include <stddef.h> namespace NYql::NMatchRecognize { @@ -18,4 +21,22 @@ inline TString MeasureInputDataSpecialColumnName(MeasureInputDataSpecialColumns return TString("_yql_") + ToString(c); } +struct TRowPatternFactor; + +using TRowPatternTerm = TVector<TRowPatternFactor>; + +using TRowPattern = TVector<TRowPatternTerm>; + +using TRowPatternPrimary = std::variant<TString, TRowPattern>; + +struct TRowPatternFactor { + TRowPatternPrimary Primary; + uint64_t QuantityMin; + uint64_t QuantityMax; + bool Greedy; + bool Output; //include in output with ALL ROW PER MATCH +}; + +THashSet<TString> GetPatternVars(const TRowPattern&); + }//namespace NYql::NMatchRecognize diff --git a/ydb/library/yql/core/sql_types/match_recognize_ut.cpp b/ydb/library/yql/core/sql_types/match_recognize_ut.cpp new file mode 100644 index 00000000000..1f1d6811417 --- /dev/null +++ b/ydb/library/yql/core/sql_types/match_recognize_ut.cpp @@ -0,0 +1,39 @@ +#include "match_recognize.h" + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NYql::NMatchRecognize; + +Y_UNIT_TEST_SUITE(MatchRecognizePattern){ + auto factorVar = [](const TString & v) { return TRowPatternFactor{v, 0, 0, false, false};}; + auto factorExpr = [](const TRowPattern& p) { return TRowPatternFactor{p, 0, 0, false, false};}; + auto expected = [](std::initializer_list<TString> list) { return THashSet<TString>(list); }; + Y_UNIT_TEST(SingleVarPattern) { + const TRowPattern pattern = {{factorVar("A")}}; + UNIT_ASSERT_VALUES_EQUAL(expected({"A"}), GetPatternVars(pattern)); + } + Y_UNIT_TEST(DistinctVarsPattern) { + const TRowPattern pattern = {{factorVar("A"), factorVar("B"), factorVar("C")}}; + UNIT_ASSERT_VALUES_EQUAL(expected({"A", "B", "C"}), GetPatternVars(pattern)); + } + Y_UNIT_TEST(RepeatedVarsPattern) { + const TRowPattern pattern = {{factorVar("A"), factorVar("B"), factorVar("B"), factorVar("C")}}; + UNIT_ASSERT_VALUES_EQUAL(expected({"A", "B", "C"}), GetPatternVars(pattern)); + } + Y_UNIT_TEST(NestedPattern) { + const TRowPattern pattern = { + {factorVar("A"), factorVar("B"), factorVar("B"), factorVar("C")}, + {factorVar("B"), factorVar("C"), factorVar("D"), factorExpr( + {{factorExpr({ + {factorVar("C"), factorVar("D"), factorVar("E")}, + {factorVar("F")} + })}} + ) + }, + {factorVar("C")}, + {factorVar("D")}, + {factorVar("E")} + }; + UNIT_ASSERT_VALUES_EQUAL(expected({"A", "B", "C", "D", "E", "F"}), GetPatternVars(pattern)); + } +} diff --git a/ydb/library/yql/core/sql_types/ut/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/core/sql_types/ut/CMakeLists.darwin-x86_64.txt new file mode 100644 index 00000000000..f48e3fb5186 --- /dev/null +++ b/ydb/library/yql/core/sql_types/ut/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,65 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(ydb-library-yql-core-sql_types-ut) +target_include_directories(ydb-library-yql-core-sql_types-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types +) +target_link_libraries(ydb-library-yql-core-sql_types-ut PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + cpp-testing-unittest_main + yql-core-sql_types +) +target_link_options(ydb-library-yql-core-sql_types-ut PRIVATE + -Wl,-platform_version,macos,11.0,11.0 + -fPIC + -fPIC +) +target_sources(ydb-library-yql-core-sql_types-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/match_recognize_ut.cpp +) +set_property( + TARGET + ydb-library-yql-core-sql_types-ut + PROPERTY + SPLIT_FACTOR + 1 +) +add_yunittest( + NAME + ydb-library-yql-core-sql_types-ut + TEST_TARGET + ydb-library-yql-core-sql_types-ut + TEST_ARG + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +set_yunittest_property( + TEST + ydb-library-yql-core-sql_types-ut + PROPERTY + LABELS + SMALL +) +set_yunittest_property( + TEST + ydb-library-yql-core-sql_types-ut + PROPERTY + PROCESSORS + 1 +) +target_allocator(ydb-library-yql-core-sql_types-ut + system_allocator +) +vcs_info(ydb-library-yql-core-sql_types-ut) diff --git a/ydb/library/yql/core/sql_types/ut/CMakeLists.linux-aarch64.txt b/ydb/library/yql/core/sql_types/ut/CMakeLists.linux-aarch64.txt new file mode 100644 index 00000000000..4e43cbcc6d3 --- /dev/null +++ b/ydb/library/yql/core/sql_types/ut/CMakeLists.linux-aarch64.txt @@ -0,0 +1,70 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(ydb-library-yql-core-sql_types-ut) +target_include_directories(ydb-library-yql-core-sql_types-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types +) +target_link_libraries(ydb-library-yql-core-sql_types-ut PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-testing-unittest_main + yql-core-sql_types +) +target_link_options(ydb-library-yql-core-sql_types-ut PRIVATE + -ldl + -lrt + -Wl,--no-as-needed + -fPIC + -fPIC + -lpthread + -lrt + -ldl +) +target_sources(ydb-library-yql-core-sql_types-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/match_recognize_ut.cpp +) +set_property( + TARGET + ydb-library-yql-core-sql_types-ut + PROPERTY + SPLIT_FACTOR + 1 +) +add_yunittest( + NAME + ydb-library-yql-core-sql_types-ut + TEST_TARGET + ydb-library-yql-core-sql_types-ut + TEST_ARG + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +set_yunittest_property( + TEST + ydb-library-yql-core-sql_types-ut + PROPERTY + LABELS + SMALL +) +set_yunittest_property( + TEST + ydb-library-yql-core-sql_types-ut + PROPERTY + PROCESSORS + 1 +) +target_allocator(ydb-library-yql-core-sql_types-ut + cpp-malloc-jemalloc +) +vcs_info(ydb-library-yql-core-sql_types-ut) diff --git a/ydb/library/yql/core/sql_types/ut/CMakeLists.linux-x86_64.txt b/ydb/library/yql/core/sql_types/ut/CMakeLists.linux-x86_64.txt new file mode 100644 index 00000000000..c360cf20127 --- /dev/null +++ b/ydb/library/yql/core/sql_types/ut/CMakeLists.linux-x86_64.txt @@ -0,0 +1,72 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(ydb-library-yql-core-sql_types-ut) +target_include_directories(ydb-library-yql-core-sql_types-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types +) +target_link_libraries(ydb-library-yql-core-sql_types-ut PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + cpp-testing-unittest_main + yql-core-sql_types +) +target_link_options(ydb-library-yql-core-sql_types-ut PRIVATE + -ldl + -lrt + -Wl,--no-as-needed + -fPIC + -fPIC + -lpthread + -lrt + -ldl +) +target_sources(ydb-library-yql-core-sql_types-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/match_recognize_ut.cpp +) +set_property( + TARGET + ydb-library-yql-core-sql_types-ut + PROPERTY + SPLIT_FACTOR + 1 +) +add_yunittest( + NAME + ydb-library-yql-core-sql_types-ut + TEST_TARGET + ydb-library-yql-core-sql_types-ut + TEST_ARG + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +set_yunittest_property( + TEST + ydb-library-yql-core-sql_types-ut + PROPERTY + LABELS + SMALL +) +set_yunittest_property( + TEST + ydb-library-yql-core-sql_types-ut + PROPERTY + PROCESSORS + 1 +) +target_allocator(ydb-library-yql-core-sql_types-ut + cpp-malloc-tcmalloc + libs-tcmalloc-no_percpu_cache +) +vcs_info(ydb-library-yql-core-sql_types-ut) diff --git a/ydb/library/yql/core/sql_types/ut/CMakeLists.txt b/ydb/library/yql/core/sql_types/ut/CMakeLists.txt new file mode 100644 index 00000000000..f8b31df0c11 --- /dev/null +++ b/ydb/library/yql/core/sql_types/ut/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/library/yql/core/sql_types/ut/CMakeLists.windows-x86_64.txt b/ydb/library/yql/core/sql_types/ut/CMakeLists.windows-x86_64.txt new file mode 100644 index 00000000000..6e38dd6b7b6 --- /dev/null +++ b/ydb/library/yql/core/sql_types/ut/CMakeLists.windows-x86_64.txt @@ -0,0 +1,60 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(ydb-library-yql-core-sql_types-ut) +target_include_directories(ydb-library-yql-core-sql_types-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types +) +target_link_libraries(ydb-library-yql-core-sql_types-ut PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + cpp-testing-unittest_main + yql-core-sql_types +) +target_sources(ydb-library-yql-core-sql_types-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/sql_types/match_recognize_ut.cpp +) +set_property( + TARGET + ydb-library-yql-core-sql_types-ut + PROPERTY + SPLIT_FACTOR + 1 +) +add_yunittest( + NAME + ydb-library-yql-core-sql_types-ut + TEST_TARGET + ydb-library-yql-core-sql_types-ut + TEST_ARG + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +set_yunittest_property( + TEST + ydb-library-yql-core-sql_types-ut + PROPERTY + LABELS + SMALL +) +set_yunittest_property( + TEST + ydb-library-yql-core-sql_types-ut + PROPERTY + PROCESSORS + 1 +) +target_allocator(ydb-library-yql-core-sql_types-ut + system_allocator +) +vcs_info(ydb-library-yql-core-sql_types-ut) diff --git a/ydb/library/yql/core/sql_types/ut/ya.make b/ydb/library/yql/core/sql_types/ut/ya.make new file mode 100644 index 00000000000..f6f5ddd20d1 --- /dev/null +++ b/ydb/library/yql/core/sql_types/ut/ya.make @@ -0,0 +1,12 @@ +UNITTEST_FOR(ydb/library/yql/core/sql_types) +SRCS( + match_recognize_ut.cpp +) + +PEERDIR( + ydb/library/yql/core/sql_types +) + +SIZE(SMALL) + +END() diff --git a/ydb/library/yql/core/sql_types/ya.make b/ydb/library/yql/core/sql_types/ya.make index 2ccf3b837d8..ad018c89957 100644 --- a/ydb/library/yql/core/sql_types/ya.make +++ b/ydb/library/yql/core/sql_types/ya.make @@ -2,9 +2,14 @@ LIBRARY() SRCS( match_recognize.h + match_recognize.cpp simple_types.h simple_types.cpp ) GENERATE_ENUM_SERIALIZATION(match_recognize.h) END() + +RECURSE_FOR_TESTS( + ut +) diff --git a/ydb/library/yql/core/type_ann/type_ann_match_recognize.cpp b/ydb/library/yql/core/type_ann/type_ann_match_recognize.cpp index 2f985dec3a5..081d2209cae 100644 --- a/ydb/library/yql/core/type_ann/type_ann_match_recognize.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_match_recognize.cpp @@ -1,5 +1,6 @@ #include "type_ann_match_recognize.h" #include <ydb/library/yql/core/sql_types/match_recognize.h> +#include <ydb/library/yql/core/yql_match_recognize.h> namespace NYql::NTypeAnnImpl { @@ -54,26 +55,8 @@ MatchRecognizeParamsWrapper(const TExprNode::TPtr &input, TExprNode::TPtr &outpu namespace { - -const std::unordered_set<TString> GetPrimaryVars(const TExprNode::TPtr& pattern, TContext &ctx, size_t nestingLevel) { - std::unordered_set<TString> result; - for (const auto& term: pattern->Children()) { - for (const auto& factor: term->Children()) { - YQL_ENSURE(EnsureArgsCount(*factor, 5, ctx.Expr), "Expect 5 args"); - if (factor->ChildRef(0)->IsAtom()) { - result.insert(TString(factor->ChildRef(0)->Content())); - } else { - YQL_ENSURE(nestingLevel < NYql::NMatchRecognize::MaxPatternNesting, "To big nesting level in the pattern"); - auto subExprVars = GetPrimaryVars(factor->ChildRef(0), ctx, ++nestingLevel); - result.insert(subExprVars.begin(), subExprVars.end()); - } - } - } - return result; -} - const TStructExprType* GetMatchedRowsRangesType(const TExprNode::TPtr& pattern, TContext &ctx) { - auto vars = GetPrimaryVars(pattern, ctx, 0); + auto vars = GetPatternVars(NYql::NMatchRecognize::ConvertPattern(pattern, ctx.Expr, 0)); TVector<const TItemExprType*> items; for (const auto& var: vars) { const auto& item = ctx.Expr.MakeType<TStructExprType>(TVector<const TItemExprType*>{ diff --git a/ydb/library/yql/core/yql_match_recognize.h b/ydb/library/yql/core/yql_match_recognize.h new file mode 100644 index 00000000000..3afefb06ba7 --- /dev/null +++ b/ydb/library/yql/core/yql_match_recognize.h @@ -0,0 +1,28 @@ +#pragma once +#include <ydb/library/yql/core/sql_types/match_recognize.h> +#include "ydb/library/yql/ast/yql_expr.h" + +namespace NYql::NMatchRecognize { + +inline TRowPattern ConvertPattern(const TExprNode::TPtr& pattern, TExprContext &ctx, size_t nestingLevel = 0) { + YQL_ENSURE(nestingLevel <= MaxPatternNesting, "To big nesting level in the pattern"); + TRowPattern result; + for (const auto& term: pattern->Children()) { + result.push_back(TRowPatternTerm{}); + for (const auto& factor: term->Children()) { + YQL_ENSURE(factor->ChildrenSize() == 5, "Expect 5 args"); + result.back().push_back(TRowPatternFactor{ + factor->ChildRef(0)->IsAtom() ? + TRowPatternPrimary(TString(factor->ChildRef(0)->Content())) : + ConvertPattern(factor->ChildRef(0), ctx, ++nestingLevel), + FromString<ui64>(factor->ChildRef(1)->Content()), + FromString<ui64>(factor->ChildRef(2)->Content()), + FromString<bool>(factor->ChildRef(3)->Content()), + FromString<bool>(factor->ChildRef(4)->Content()) + }); + } + } + return result; +} + +} //namespace NYql::NMatchRecognize diff --git a/ydb/library/yql/core/yql_opt_match_recognize.cpp b/ydb/library/yql/core/yql_opt_match_recognize.cpp index f7cc70ca3d8..11d5cb08c94 100644 --- a/ydb/library/yql/core/yql_opt_match_recognize.cpp +++ b/ydb/library/yql/core/yql_opt_match_recognize.cpp @@ -71,5 +71,4 @@ TExprNode::TPtr ExpandMatchRecognize(const TExprNode::TPtr& node, TExprContext& return result; } - } //namespace NYql diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp index dd42271bdd8..74504eb06f7 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp @@ -8,20 +8,20 @@ namespace NKikimr::NMiniKQL { -namespace { +namespace NMatchRecognize { enum class EMeasureColumnSource {Classifier = 0, MatchNumber = 1, Input}; -using TMeasureInputColumnOrder = std::vector<std::pair<EMeasureColumnSource, size_t>>; +using TMeasureInputColumnOrder = TVector<std::pair<EMeasureColumnSource, size_t>>; enum class EOutputColumnSource {PartitionKey, Measure}; -using TOutputColumnOrder = std::vector<std::pair<EOutputColumnSource, size_t>>; +using TOutputColumnOrder = TVector<std::pair<EOutputColumnSource, size_t>>; -using namespace NMatchRecognize; +using namespace NYql::NMatchRecognize; //Process one partition of input data struct IProcessMatchRecognize { ///return true if it has output data ready - virtual bool ProcessInputRow(NUdf::TUnboxedValue&& row) = 0; + virtual bool ProcessInputRow(NUdf::TUnboxedValue&& row, TComputationContext& ctx) = 0; virtual NUdf::TUnboxedValue GetOutputIfReady(TComputationContext& ctx) = 0; virtual bool ProcessEndOfData() = 0; virtual ~IProcessMatchRecognize(){} @@ -32,34 +32,42 @@ public: TStreamMatchRecognize( NUdf::TUnboxedValue&& partitionKey, IComputationExternalNode* matchedVarsArg, - std::vector<IComputationNode*>& measures, + TVector<IComputationNode*>& measures, const TOutputColumnOrder& outputColumnOrder, + IComputationExternalNode* currentRowIndexArg, + const TVector<IComputationNode*>& defines, const TContainerCacheOnContext& cache ) : PartitionKey(std::move(partitionKey)) , MatchedVarsArg(matchedVarsArg) , Measures(measures) , OutputColumnOrder(outputColumnOrder) + , CurrentRowIndexArg(currentRowIndexArg) + , Defines(defines) , Cache(cache) - , MatchedVars(2) //Assume pattern (A B B)*, where A matches every 3rd row (i%3 == 0) and B matches the rest + , MatchedVars(Defines.size()) , HasMatch(false) , RowCount(0) { } - bool ProcessInputRow(NUdf::TUnboxedValue&& row) override{ + bool ProcessInputRow(NUdf::TUnboxedValue&& row, TComputationContext& ctx) override{ Y_UNUSED(row); - //Assume pattern (A B B)*, where A matches every 3rd row (i%3 == 0) and B matches the rest - switch (RowCount % 3) { - case 0: - MatchedVars[0].push_back({RowCount, RowCount}); - break; - case 1: - MatchedVars[1].push_back({RowCount, RowCount}); - break; - case 2: - MatchedVars[1].back().second++; - break; + CurrentRowIndexArg->SetValue(ctx, NUdf::TUnboxedValuePod(RowCount)); + for (size_t i = 0; i != Defines.size(); ++i) { + const auto& d = Defines[i]->GetValue(ctx); + if (d && d.GetOptionalValue().Get<bool>()) { + auto& var = MatchedVars[i]; + if (var.empty()) { + var.emplace_back(RowCount, RowCount); + } + else if (var.back().second + 1 == RowCount) { + ++var.back().second; + } + else { + var.emplace_back(RowCount, RowCount); + } + } } ++RowCount; return HasMatch; @@ -86,14 +94,17 @@ public: return result; } bool ProcessEndOfData() override { + //Assume match at the the end of each partition HasMatch = true; return HasMatch; } private: const NUdf::TUnboxedValue PartitionKey; IComputationExternalNode* const MatchedVarsArg; - const std::vector<IComputationNode*>& Measures; + const TVector<IComputationNode*>& Measures; const TOutputColumnOrder& OutputColumnOrder; + IComputationExternalNode* const CurrentRowIndexArg; + const TVector<IComputationNode*>& Defines; const TContainerCacheOnContext& Cache; TMatchedVars MatchedVars; bool HasMatch; @@ -109,8 +120,10 @@ public: IComputationNode *partitionKey, TType* partitionKeyType, IComputationExternalNode* matchedVarsArg, - std::vector<IComputationNode*>&& measures, - TOutputColumnOrder&& outputColumnOrder + TVector<IComputationNode*>&& measures, + TOutputColumnOrder&& outputColumnOrder, + IComputationExternalNode* currentRowIndexArg, + const TVector<IComputationNode*>& defines ) :TBaseComputation(mutables, inputFlow, kind, EValueRepresentation::Embedded) , InputFlow(inputFlow) @@ -120,6 +133,8 @@ public: , MatchedVarsArg(matchedVarsArg) , Measures(measures) , OutputColumnOrder(outputColumnOrder) + , CurrentRowIndexArg(currentRowIndexArg) + , Defines(defines) , Cache(mutables) {} @@ -132,6 +147,8 @@ public: MatchedVarsArg, Measures, OutputColumnOrder, + CurrentRowIndexArg, + Defines, Cache ); } @@ -161,8 +178,10 @@ private: IComputationNode* partitionKey, TType* partitionKeyType, IComputationExternalNode* matchedVarsArg, - const std::vector<IComputationNode*>& measures, + const TVector<IComputationNode*>& measures, const TOutputColumnOrder& outputColumnOrder, + IComputationExternalNode* currentRowIndexArg, + const TVector<IComputationNode*>& defines, const TContainerCacheOnContext& cache ) : TComputationValue<TState>(memInfo) @@ -172,13 +191,15 @@ private: , MatchedVarsArg(matchedVarsArg) , Measures(measures) , OutputColumnOrder(outputColumnOrder) + , CurrentRowIndexArg(currentRowIndexArg) + , Defines(defines) , Cache(cache) { } void ProcessInputRow(NUdf::TUnboxedValue&& row, TComputationContext& ctx) { auto partition = GetPartitionHandler(row, ctx); - if (partition->second->ProcessInputRow(std::move(row))) { + if (partition->second->ProcessInputRow(std::move(row), ctx)) { HasReadyOutput.push(partition); } } @@ -220,6 +241,8 @@ private: MatchedVarsArg, Measures, OutputColumnOrder, + CurrentRowIndexArg, + Defines, Cache )); } @@ -238,8 +261,10 @@ private: //to be passed to partitions IComputationExternalNode* const MatchedVarsArg; - std::vector<IComputationNode*> Measures; + TVector<IComputationNode*> Measures; const TOutputColumnOrder& OutputColumnOrder; + IComputationExternalNode* const CurrentRowIndexArg; + const TVector<IComputationNode*>& Defines; const TContainerCacheOnContext& Cache; }; @@ -248,10 +273,14 @@ private: if (const auto flow = FlowDependsOn(InputFlow)) { Own(flow, InputRowArg); Own(flow, MatchedVarsArg); + Own(flow, CurrentRowIndexArg); DependsOn(flow, PartitionKey); for (auto& m: Measures) { DependsOn(flow, m); } + for (auto& d: Defines) { + DependsOn(flow, d); + } } } @@ -260,8 +289,10 @@ private: IComputationNode* const PartitionKey; TType* const PartitionKeyType; IComputationExternalNode* const MatchedVarsArg; - std::vector<IComputationNode*> Measures; + TVector<IComputationNode*> Measures; TOutputColumnOrder OutputColumnOrder; + IComputationExternalNode* const CurrentRowIndexArg; + const TVector<IComputationNode*> Defines; const TContainerCacheOnContext Cache; }; @@ -291,8 +322,33 @@ TOutputColumnOrder GetOutputColumnOrder(TRuntimeNode partitionKyeColumnsIndexes, return result; } -std::vector<IComputationNode*> ConvertVectorOfCallables(const std::vector<TRuntimeNode>& v, const TComputationNodeFactoryContext& ctx) { - std::vector<IComputationNode*> result; +TRowPattern ConvertPattern(const TRuntimeNode& pattern) { + TVector<TRowPatternTerm> result; + const auto& inputPattern = AS_VALUE(TTupleLiteral, pattern); + for (ui32 i = 0; i != inputPattern->GetValuesCount(); ++i) { + const auto& inputTerm = AS_VALUE(TTupleLiteral, inputPattern->GetValue(i)); + TVector<TRowPatternFactor> term; + for (ui32 j = 0; j != inputTerm->GetValuesCount(); ++j) { + const auto& inputFactor = AS_VALUE(TTupleLiteral, inputTerm->GetValue(j)); + MKQL_ENSURE(inputFactor->GetValuesCount() == 5, "Internal logic error"); + const auto& primary = inputFactor->GetValue(0); + term.push_back(TRowPatternFactor{ + primary.IsImmediate() ? + TRowPatternPrimary(TString(AS_VALUE(TDataLiteral, primary)->AsValue().AsStringRef())) : + ConvertPattern(primary), + AS_VALUE(TDataLiteral, inputFactor->GetValue(1))->AsValue().Get<ui64>(), + AS_VALUE(TDataLiteral, inputFactor->GetValue(2))->AsValue().Get<ui64>(), + AS_VALUE(TDataLiteral, inputFactor->GetValue(3))->AsValue().Get<bool>(), + AS_VALUE(TDataLiteral, inputFactor->GetValue(4))->AsValue().Get<bool>() + }); + } + result.push_back(std::move(term)); + } + return result; +} + +TVector<IComputationNode*> ConvertVectorOfCallables(const TVector<TRuntimeNode>& v, const TComputationNodeFactoryContext& ctx) { + TVector<IComputationNode*> result; result.reserve(v.size()); for (auto& c: v) { result.push_back(LocateNode(ctx.NodeLocator, *c.GetNode())); @@ -300,10 +356,12 @@ std::vector<IComputationNode*> ConvertVectorOfCallables(const std::vector<TRunti return result; } -} //namespace +} //namespace NMatchRecognize + IComputationNode* WrapMatchRecognizeCore(TCallable& callable, const TComputationNodeFactoryContext& ctx) { + using namespace NMatchRecognize; size_t inputIndex = 0; const auto& inputFlow = callable.GetInput(inputIndex++); const auto& inputRowArg = callable.GetInput(inputIndex++); @@ -314,16 +372,28 @@ IComputationNode* WrapMatchRecognizeCore(TCallable& callable, const TComputation const auto& inputRowColumnCount = callable.GetInput(inputIndex++); const auto& matchedVarsArg = callable.GetInput(inputIndex++); const auto& measureColumnIndexes = callable.GetInput(inputIndex++); - std::vector<TRuntimeNode> measures; + TVector<TRuntimeNode> measures; for (size_t i = 0; i != AS_VALUE(TListLiteral, measureColumnIndexes)->GetItemsCount(); ++i) { measures.push_back(callable.GetInput(inputIndex++)); } + const auto& pattern = callable.GetInput(inputIndex++); + const auto& currentRowIndexArg = callable.GetInput(inputIndex++); + const auto& defineInputDataArg = callable.GetInput(inputIndex++); + const auto& defineNames = callable.GetInput(inputIndex++); + TVector<TRuntimeNode> defines ; + for (size_t i = 0; i != AS_VALUE(TListLiteral, defineNames)->GetItemsCount(); ++i) { + defines.push_back(callable.GetInput(inputIndex++)); + } MKQL_ENSURE(callable.GetInputsCount() == inputIndex, "Wrong input count"); + Y_UNUSED(measureInputDataArg); Y_UNUSED(measureSpecialColumnIndexes); Y_UNUSED(inputRowColumnCount); + Y_UNUSED(pattern); + Y_UNUSED(defineInputDataArg); - return new TMatchRecognizeWrapper(ctx.Mutables, GetValueRepresentation(inputFlow.GetStaticType()) + return new TMatchRecognizeWrapper(ctx.Mutables + , GetValueRepresentation(inputFlow.GetStaticType()) , LocateNode(ctx.NodeLocator, *inputFlow.GetNode()) , static_cast<IComputationExternalNode*>(LocateNode(ctx.NodeLocator, *inputRowArg.GetNode())) , LocateNode(ctx.NodeLocator, *partitionKeySelector.GetNode()) @@ -331,6 +401,8 @@ IComputationNode* WrapMatchRecognizeCore(TCallable& callable, const TComputation , static_cast<IComputationExternalNode*>(LocateNode(ctx.NodeLocator, *matchedVarsArg.GetNode())) , ConvertVectorOfCallables(measures, ctx) , GetOutputColumnOrder(partitionColumnIndexes, measureColumnIndexes) + , static_cast<IComputationExternalNode*>(LocateNode(ctx.NodeLocator, *currentRowIndexArg.GetNode())) + , ConvertVectorOfCallables(defines, ctx) ); } diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_matched_vars.h b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_matched_vars.h index 91ae930f6f8..cb585571934 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_matched_vars.h +++ b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_matched_vars.h @@ -92,4 +92,5 @@ public: private: const std::vector<TMatchedVar>& Vars; }; + }//namespace NKikimr::NMiniKQL::NMatchRecognize diff --git a/ydb/library/yql/minikql/mkql_program_builder.cpp b/ydb/library/yql/minikql/mkql_program_builder.cpp index 27be8ff034e..0904c0d3b93 100644 --- a/ydb/library/yql/minikql/mkql_program_builder.cpp +++ b/ydb/library/yql/minikql/mkql_program_builder.cpp @@ -5791,11 +5791,39 @@ TRuntimeNode TProgramBuilder::ScalarApply(const TArrayRef<const TRuntimeNode>& a return TRuntimeNode(builder.Build(), false); } +namespace { +using namespace NYql::NMatchRecognize; +TRuntimeNode PatternToRuntimeNode(const TRowPattern& pattern, const TProgramBuilder& programBuilder) { + const auto& env = programBuilder.GetTypeEnvironment(); + TTupleLiteralBuilder patternBuilder(env); + for (const auto& term: pattern) { + TTupleLiteralBuilder termBuilder(env); + for (const auto& factor: term) { + TStructLiteralBuilder factorBuilder(env); + factorBuilder.Add("Primary", factor.Primary.index() == 0 ? + programBuilder.NewDataLiteral<NUdf::EDataSlot::String>(std::get<0>(factor.Primary)) : + PatternToRuntimeNode(std::get<1>(factor.Primary), programBuilder) + ); + factorBuilder.Add("QuantityMin", programBuilder.NewDataLiteral<ui64>(factor.QuantityMin)); + factorBuilder.Add("QuantityMax", programBuilder.NewDataLiteral<ui64>(factor.QuantityMax)); + factorBuilder.Add("Greedy", programBuilder.NewDataLiteral<bool>(factor.Greedy)); + factorBuilder.Add("Output", programBuilder.NewDataLiteral<bool>(factor.Output)); + termBuilder.Add({factorBuilder.Build(), true}); + } + patternBuilder.Add({termBuilder.Build(), true}); + } + return {patternBuilder.Build(), true}; +}; + +} //namespace + TRuntimeNode TProgramBuilder::MatchRecognizeCore( TRuntimeNode inputStream, const TUnaryLambda& getPartitionKeySelectorNode, const TArrayRef<TStringBuf>& partitionColumns, - const TArrayRef<std::pair<TStringBuf, TBinaryLambda>>& getMeasures + const TArrayRef<std::pair<TStringBuf, TBinaryLambda>>& getMeasures, + const NYql::NMatchRecognize::TRowPattern& pattern, + const TArrayRef<std::pair<TStringBuf, TTernaryLambda>>& getDefines ) { MKQL_ENSURE(RuntimeVersion >= 42, "MatchRecognize is not supported in runtime version " << RuntimeVersion); @@ -5808,8 +5836,9 @@ TRuntimeNode TProgramBuilder::MatchRecognizeCore( indexRangeTypeBuilder.Add("To", TDataType::Create(NUdf::TDataType<ui64>::Id, Env)); const auto& rangeList = TListType::Create(indexRangeTypeBuilder.Build(), Env); TStructTypeBuilder matchedVarsTypeBuilder(Env); - //assume simple pattern with one var "A" that always match TODO fixme - matchedVarsTypeBuilder.Add("A", rangeList); + for (const auto& var: GetPatternVars(pattern)) { + matchedVarsTypeBuilder.Add(var, rangeList); + } TRuntimeNode matchedVarsArg = Arg(matchedVarsTypeBuilder.Build()); //---These vars may be empty in case of no measures @@ -5889,6 +5918,35 @@ TRuntimeNode TProgramBuilder::MatchRecognizeCore( } auto outputType = (TType*)TFlowType::Create(outputRowType, Env); + THashMap<TStringBuf , size_t> patternVarLookup; + for (ui32 i = 0; i != AS_TYPE(TStructType, matchedVarsArg.GetStaticType())->GetMembersCount(); ++i){ + patternVarLookup[AS_TYPE(TStructType, matchedVarsArg.GetStaticType())->GetMemberName(i)] = i; + } + + THashMap<TStringBuf , size_t> defineLookup; + for (size_t i = 0; i != getDefines.size(); ++i) { + defineLookup[getDefines[i].first] = i; + } + std::vector<TRuntimeNode> defineNames(patternVarLookup.size()); + std::vector<TRuntimeNode> defineNodes(patternVarLookup.size()); + + const auto& defineInputDataArg = Arg(TListType::Create(inputRowType, Env)); + const auto& currentRowIndexArg = Arg(TDataType::Create(NUdf::TDataType<ui64>::Id, Env)); + for (const auto& [v, i]: patternVarLookup) { + defineNames[i] = NewDataLiteral<NUdf::EDataSlot::String>(v); + if (const auto it = defineLookup.find(v); it != defineLookup.end()) { + defineNodes[i] = getDefines[it->second].second(defineInputDataArg, matchedVarsArg, currentRowIndexArg); + } + else { //no predicate for var + if ("$" == v || "^" == v) { + //DO nothing, //will be handled in a specific way + } + else { // a var without a predicate matches any row + defineNodes[i] = NewDataLiteral<bool>(true); + } + } + } + TCallableBuilder callableBuilder(GetTypeEnvironment(), "MatchRecognizeCore", outputType); auto indexType = TDataType::Create(NUdf::TDataType<ui32>::Id, Env); auto indexListType = TListType::Create(indexType, Env); @@ -5908,6 +5966,16 @@ TRuntimeNode TProgramBuilder::MatchRecognizeCore( for (const auto& m: measures) { callableBuilder.Add(m); } + + callableBuilder.Add(PatternToRuntimeNode(pattern, *this)); + + callableBuilder.Add(currentRowIndexArg); + callableBuilder.Add(defineInputDataArg); + const auto stringType = NewDataType(NUdf::EDataSlot::String); + callableBuilder.Add(TRuntimeNode(TListLiteral::Create(defineNames.begin(), defineNames.size(), TListType::Create(stringType, Env), Env), true)); + for (const auto& d: defineNodes) { + callableBuilder.Add(d); + } return TRuntimeNode(callableBuilder.Build(), false); } diff --git a/ydb/library/yql/minikql/mkql_program_builder.h b/ydb/library/yql/minikql/mkql_program_builder.h index 92aefa416c6..3e9a951327f 100644 --- a/ydb/library/yql/minikql/mkql_program_builder.h +++ b/ydb/library/yql/minikql/mkql_program_builder.h @@ -700,7 +700,9 @@ public: TRuntimeNode inputStream, const TUnaryLambda& getPartitionKeySelectorNode, const TArrayRef<TStringBuf>& partitionColumns, - const TArrayRef<std::pair<TStringBuf, TBinaryLambda>>& getMeasures + const TArrayRef<std::pair<TStringBuf, TBinaryLambda>>& getMeasures, + const NYql::NMatchRecognize::TRowPattern& pattern, + const TArrayRef<std::pair<TStringBuf, TTernaryLambda>>& getDefines ); protected: diff --git a/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp b/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp index ce0b02f4dad..a3ede6f1668 100644 --- a/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp +++ b/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp @@ -5,6 +5,8 @@ #include <ydb/library/yql/core/yql_expr_type_annotation.h> #include <ydb/library/yql/core/expr_nodes/yql_expr_nodes.h> #include <ydb/library/yql/core/yql_expr_type_annotation.h> +#include <ydb/library/yql/core/yql_match_recognize.h> + #include <ydb/library/yql/core/yql_join.h> #include <ydb/library/yql/core/yql_opt_utils.h> @@ -860,11 +862,17 @@ TMkqlCommonCallableCompiler::TShared::TShared() { //explore params const auto& measures = params->ChildRef(0); + const auto& pattern = params->ChildRef(3); + const auto& defines = params->ChildRef(4); //explore measures const auto measureNames = measures->ChildRef(2); constexpr size_t FirstMeasureLambdaIndex = 3; + //explore defines + const auto defineNames = defines->ChildRef(2); + const size_t FirstDefineLambdaIndex = 3; + TVector<TStringBuf> partitionColumnNames; for (const auto& n: partitionColumns->Children()) { partitionColumnNames.push_back(n->Content()); @@ -874,6 +882,17 @@ TMkqlCommonCallableCompiler::TShared::TShared() { return MkqlBuildLambda(*partitionKeySelector, ctx, {inputRowArg}); }; + TVector<std::pair<TStringBuf, TProgramBuilder::TTernaryLambda>> getDefines(defineNames->ChildrenSize()); + for (size_t i = 0; i != defineNames->ChildrenSize(); ++i) { + getDefines[i] = std::pair{ + defineNames->ChildRef(i)->Content(), + [i, defines, &ctx](TRuntimeNode data, TRuntimeNode matchedVars, TRuntimeNode rowIndex) { + return MkqlBuildLambda(*defines->ChildRef(FirstDefineLambdaIndex + i), ctx, + {data, matchedVars, rowIndex}); + } + }; + } + TVector<std::pair<TStringBuf, TProgramBuilder::TBinaryLambda>> getMeasures(measureNames->ChildrenSize()); for (size_t i = 0; i != measureNames->ChildrenSize(); ++i) { getMeasures[i] = std::pair{ @@ -889,7 +908,9 @@ TMkqlCommonCallableCompiler::TShared::TShared() { MkqlBuildExpr(*inputStream, ctx), getPartitionKeySelector, partitionColumnNames, - getMeasures + getMeasures, + NYql::NMatchRecognize::ConvertPattern(pattern, ctx.ExprCtx), + getDefines ); }); diff --git a/ydb/library/yql/sql/v1/match_recognize.cpp b/ydb/library/yql/sql/v1/match_recognize.cpp index 54e52aee634..5906a709ad7 100644 --- a/ydb/library/yql/sql/v1/match_recognize.cpp +++ b/ydb/library/yql/sql/v1/match_recognize.cpp @@ -14,7 +14,7 @@ public: std::pair<TPosition, TVector<TNamedLambda>>&& measures, std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch, std::pair<TPosition, TAfterMatchSkipTo>&& skipTo, - std::pair<TPosition, TRowPatternPtr>&& pattern, + std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern, std::pair<TPosition, TNodePtr>&& subset, std::pair<TPosition, TVector<TNamedLambda>>&& definitions ): TAstListNode(pos, {BuildAtom(pos, "block")}) @@ -43,7 +43,7 @@ private: std::pair<TPosition, TVector<TNamedLambda>>&& measures, std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch, std::pair<TPosition, TAfterMatchSkipTo>&& skipTo, - std::pair<TPosition, TRowPatternPtr>&& pattern, + std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern, std::pair<TPosition, TNodePtr>&& subset, std::pair<TPosition, TVector<TNamedLambda>>&& definitions ) { @@ -110,7 +110,7 @@ private: )); } - TPtr PatternFactor(const TPosition& pos, const TRowPatternFactor& factor) { + TPtr PatternFactor(const TPosition& pos, const NYql::NMatchRecognize::TRowPatternFactor& factor) { return BuildTuple(pos, { factor.Primary.index() == 0 ? BuildQuotedAtom(pos, std::get<0>(factor.Primary)) : @@ -123,16 +123,16 @@ private: } - TPtr PatternTerm(const TPosition& pos, const TRowPatternTerm& term) { + TPtr PatternTerm(const TPosition& pos, const NYql::NMatchRecognize::TRowPatternTerm& term) { auto factors = Y(); for (const auto& f: term) factors->Add(PatternFactor(pos, f)); return Q(std::move(factors)); } - TPtr Pattern(const TPosition& pos, const TRowPatternPtr& pattern) { + TPtr Pattern(const TPosition& pos, const NYql::NMatchRecognize::TRowPattern& pattern) { TNodePtr patternNode = Y("MatchRecognizePattern"); - for (const auto& t: pattern->Terms) { + for (const auto& t: pattern) { patternNode->Add(PatternTerm(pos, t)); } return patternNode; diff --git a/ydb/library/yql/sql/v1/match_recognize.h b/ydb/library/yql/sql/v1/match_recognize.h index 190849dab1f..101ac432c49 100644 --- a/ydb/library/yql/sql/v1/match_recognize.h +++ b/ydb/library/yql/sql/v1/match_recognize.h @@ -1,5 +1,6 @@ #pragma once #include "node.h" +#include <ydb/library/yql/core/sql_types/match_recognize.h> #include <util/generic/ptr.h> namespace NSQLTranslationV1 { @@ -36,26 +37,6 @@ struct TAfterMatchSkipTo { TString Var; }; -struct TRowPattern; - -using TRowPatternPtr = std::unique_ptr<TRowPattern>; - -using TRowPatternPrimary = std::variant<TString, TRowPatternPtr>; - -struct TRowPatternFactor{ - TRowPatternPrimary Primary; - uint64_t QuantityMin; - uint64_t QuantityMax; - bool Greedy; - bool Output; //include in output with ALL ROW PER MATCH -}; - -using TRowPatternTerm = std::vector<TRowPatternFactor>; - -struct TRowPattern { - std::vector<TRowPatternTerm> Terms; -}; - class TMatchRecognizeBuilder: public TSimpleRefCount<TMatchRecognizeBuilder> { public: TMatchRecognizeBuilder( @@ -65,7 +46,7 @@ public: std::pair<TPosition, TVector<TNamedLambda>>&& measures, std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch, std::pair<TPosition, TAfterMatchSkipTo>&& skipTo, - std::pair<TPosition, TRowPatternPtr>&& pattern, + std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern, std::pair<TPosition, TNodePtr>&& subset, std::pair<TPosition, TVector<TNamedLambda>>&& definitions ) @@ -88,11 +69,11 @@ private: std::pair<TPosition, TVector<TNamedLambda>> Measures; std::pair<TPosition, ERowsPerMatch> RowsPerMatch; std::pair<TPosition, TAfterMatchSkipTo> SkipTo; - std::pair<TPosition, TRowPatternPtr> Pattern; + std::pair<TPosition, NYql::NMatchRecognize::TRowPattern> Pattern; std::pair<TPosition, TNodePtr> Subset; std::pair<TPosition, TVector<TNamedLambda>> Definitions; }; using TMatchRecognizeBuilderPtr=TIntrusivePtr<TMatchRecognizeBuilder> ; -} // namespace NSQLTranslationV1
\ No newline at end of file +} // namespace NSQLTranslationV1 diff --git a/ydb/library/yql/sql/v1/sql_match_recognize.cpp b/ydb/library/yql/sql/v1/sql_match_recognize.cpp index 3b05ce64093..a0413a6eb95 100644 --- a/ydb/library/yql/sql/v1/sql_match_recognize.cpp +++ b/ydb/library/yql/sql/v1/sql_match_recognize.cpp @@ -17,22 +17,6 @@ TString PatternVar(const TRule_row_pattern_variable_name& node, TSqlMatchRecogni return Id(node.GetRule_identifier1(), ctx); } -std::unordered_set<TString> GetAllPatternVars(const TRowPatternPtr& pattern){ - std::unordered_set<TString> result; - for (const auto& t: pattern->Terms) { - for (const auto& f: t) { - if (f.Primary.index() == 0) { - result.insert(std::get<0>(f.Primary)); - } - else { - auto nested = GetAllPatternVars(std::get<1>(f.Primary)); - result.insert(nested.cbegin(), nested.cend()); - } - } - } - return result; -} - } //namespace TMatchRecognizeBuilderPtr TSqlMatchRecognizeClause::CreateBuilder(const NSQLv1Generated::TRule_row_pattern_recognition_clause &matchRecognizeClause) { @@ -101,7 +85,7 @@ TMatchRecognizeBuilderPtr TSqlMatchRecognizeClause::CreateBuilder(const NSQLv1Ge EAfterMatchSkipTo::ToLast == skipTo.second.To || EAfterMatchSkipTo::To == skipTo.second.To; if (varRequired) { - const auto& allVars = GetAllPatternVars(pattern); + const auto& allVars = NYql::NMatchRecognize::GetPatternVars(pattern); if (allVars.find(skipTo.second.Var) == allVars.cend()) { Ctx.Error(skipTo.first) << "Unknown pattern variable in AFTER MATCH"; return {}; @@ -234,12 +218,12 @@ std::pair<TPosition, TAfterMatchSkipTo> TSqlMatchRecognizeClause::ParseAfterMatc } } -TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTerm(const TRule_row_pattern_term& node){ - TRowPatternTerm term; +NYql::NMatchRecognize::TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTerm(const TRule_row_pattern_term& node){ + NYql::NMatchRecognize::TRowPatternTerm term; TPosition pos; for (const auto& factor: node.GetBlock1()) { const auto& primaryVar = factor.GetRule_row_pattern_factor1().GetRule_row_pattern_primary1(); - TRowPatternPrimary primary; + NYql::NMatchRecognize::TRowPatternPrimary primary; bool output = true; switch(primaryVar.GetAltCase()){ case TRule_row_pattern_primary::kAltRowPatternPrimary1: @@ -259,7 +243,7 @@ TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTerm(const TRule_row_patte } else { Ctx.Error(TokenPosition(primaryVar.GetAlt_row_pattern_primary4().GetToken1())) << "To big nesting level in the pattern"; - return TRowPatternTerm{}; + return NYql::NMatchRecognize::TRowPatternTerm{}; } break; } @@ -318,17 +302,17 @@ TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTerm(const TRule_row_patte Y_FAIL("You should change implementation according to grammar changes"); } } - term.push_back(TRowPatternFactor{std::move(primary), quantityMin, quantityMax, greedy, output}); + term.push_back(NYql::NMatchRecognize::TRowPatternFactor{std::move(primary), quantityMin, quantityMax, greedy, output}); } return term; } -TRowPatternPtr TSqlMatchRecognizeClause::ParsePattern(const TRule_row_pattern& node){ - TVector<TRowPatternTerm> result; - result.emplace_back(ParsePatternTerm(node.GetRule_row_pattern_term1())); +NYql::NMatchRecognize::TRowPattern TSqlMatchRecognizeClause::ParsePattern(const TRule_row_pattern& node){ + TVector<NYql::NMatchRecognize::TRowPatternTerm> result; + result.push_back(ParsePatternTerm(node.GetRule_row_pattern_term1())); for (const auto& term: node.GetBlock2()) result.push_back(ParsePatternTerm(term.GetRule_row_pattern_term2())); - return std::make_unique<TRowPattern>(TRowPattern{std::move(result)}); + return result; } TNamedLambda TSqlMatchRecognizeClause::ParseOneDefinition(const TRule_row_pattern_definition& node){ diff --git a/ydb/library/yql/sql/v1/sql_match_recognize.h b/ydb/library/yql/sql/v1/sql_match_recognize.h index 65a49d2162d..cd32223675b 100644 --- a/ydb/library/yql/sql/v1/sql_match_recognize.h +++ b/ydb/library/yql/sql/v1/sql_match_recognize.h @@ -19,12 +19,12 @@ private: TVector<TNamedLambda> ParseMeasures(const TRule_row_pattern_measure_list& node); std::pair<TPosition, ERowsPerMatch> ParseRowsPerMatch(const TRule_row_pattern_rows_per_match& rowsPerMatchClause); std::pair<TPosition, TAfterMatchSkipTo> ParseAfterMatchSkipTo(const TRule_row_pattern_skip_to& skipToClause); - TRowPatternTerm ParsePatternTerm(const TRule_row_pattern_term& node); - TRowPatternPtr ParsePattern(const TRule_row_pattern& node); + NYql::NMatchRecognize::TRowPatternTerm ParsePatternTerm(const TRule_row_pattern_term& node); + NYql::NMatchRecognize::TRowPattern ParsePattern(const TRule_row_pattern& node); TNamedLambda ParseOneDefinition(const TRule_row_pattern_definition& node); TVector<TNamedLambda> ParseDefinitions(const TRule_row_pattern_definition_list& node); private: size_t PatternNestingLevel = 0; }; -} // namespace NSQLTranslationV1
\ No newline at end of file +} // namespace NSQLTranslationV1 diff --git a/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp b/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp index c66c96d79e3..2606380b05e 100644 --- a/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp +++ b/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp @@ -1,6 +1,7 @@ #include "sql_ut.h" #include "match_recognize.h" #include <ydb/library/yql/providers/common/provider/yql_provider_names.h> +#include <ydb/library/yql/core/sql_types/match_recognize.h> #include <ydb/library/yql/sql/sql.h> #include <util/generic/map.h> @@ -426,7 +427,7 @@ FROM Input MATCH_RECOGNIZE( auto getTheFactor = [](const NYql::TAstNode* root) { const auto& patternCallable = FindMatchRecognizeParam(root, "pattern"); const auto& factor = patternCallable->GetChild(1)->GetChild(1)->GetChild(0)->GetChild(1); - return NSQLTranslationV1::TRowPatternFactor{ + return NYql::NMatchRecognize::TRowPatternFactor{ TString(), //primary var or subexpression, not used in this test FromString<uint64_t>(factor->GetChild(1)->GetChild(1)->GetContent()), //QuantityMin FromString<uint64_t>(factor->GetChild(2)->GetChild(1)->GetContent()), //QuantityMax diff --git a/ydb/library/yql/sql/v1/ut/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/sql/v1/ut/CMakeLists.darwin-x86_64.txt index 17379347b27..80dcef14ef0 100644 --- a/ydb/library/yql/sql/v1/ut/CMakeLists.darwin-x86_64.txt +++ b/ydb/library/yql/sql/v1/ut/CMakeLists.darwin-x86_64.txt @@ -18,6 +18,7 @@ target_link_libraries(ydb-library-yql-sql-v1-ut PUBLIC cpp-testing-unittest_main yql-sql-v1 udf-service-exception_policy + yql-core-sql_types library-yql-sql yql-sql-pg_dummy sql-v1-format diff --git a/ydb/library/yql/sql/v1/ut/CMakeLists.linux-aarch64.txt b/ydb/library/yql/sql/v1/ut/CMakeLists.linux-aarch64.txt index e752dcf4475..54041a09026 100644 --- a/ydb/library/yql/sql/v1/ut/CMakeLists.linux-aarch64.txt +++ b/ydb/library/yql/sql/v1/ut/CMakeLists.linux-aarch64.txt @@ -18,6 +18,7 @@ target_link_libraries(ydb-library-yql-sql-v1-ut PUBLIC cpp-testing-unittest_main yql-sql-v1 udf-service-exception_policy + yql-core-sql_types library-yql-sql yql-sql-pg_dummy sql-v1-format diff --git a/ydb/library/yql/sql/v1/ut/CMakeLists.linux-x86_64.txt b/ydb/library/yql/sql/v1/ut/CMakeLists.linux-x86_64.txt index cbe9969c009..0cda4c5e185 100644 --- a/ydb/library/yql/sql/v1/ut/CMakeLists.linux-x86_64.txt +++ b/ydb/library/yql/sql/v1/ut/CMakeLists.linux-x86_64.txt @@ -19,6 +19,7 @@ target_link_libraries(ydb-library-yql-sql-v1-ut PUBLIC cpp-testing-unittest_main yql-sql-v1 udf-service-exception_policy + yql-core-sql_types library-yql-sql yql-sql-pg_dummy sql-v1-format diff --git a/ydb/library/yql/sql/v1/ut/CMakeLists.windows-x86_64.txt b/ydb/library/yql/sql/v1/ut/CMakeLists.windows-x86_64.txt index 2352e1c720f..0e8469627af 100644 --- a/ydb/library/yql/sql/v1/ut/CMakeLists.windows-x86_64.txt +++ b/ydb/library/yql/sql/v1/ut/CMakeLists.windows-x86_64.txt @@ -18,6 +18,7 @@ target_link_libraries(ydb-library-yql-sql-v1-ut PUBLIC cpp-testing-unittest_main yql-sql-v1 udf-service-exception_policy + yql-core-sql_types library-yql-sql yql-sql-pg_dummy sql-v1-format diff --git a/ydb/library/yql/sql/v1/ut/ya.make b/ydb/library/yql/sql/v1/ut/ya.make index c3a04bc05c5..0d1b12449bf 100644 --- a/ydb/library/yql/sql/v1/ut/ya.make +++ b/ydb/library/yql/sql/v1/ut/ya.make @@ -7,6 +7,7 @@ SRCS( PEERDIR( ydb/library/yql/public/udf/service/exception_policy + ydb/library/yql/core/sql_types ydb/library/yql/sql ydb/library/yql/sql/pg_dummy ydb/library/yql/sql/v1/format |