diff options
author | zverevgeny <zverevgeny@ydb.tech> | 2023-08-11 13:26:18 +0300 |
---|---|---|
committer | zverevgeny <zverevgeny@ydb.tech> | 2023-08-11 15:05:14 +0300 |
commit | 95d881de13521f9f5d4310978920044743695017 (patch) | |
tree | 5b9c7b81459adb4115dae675de0b69c6d4fc0296 | |
parent | f2dd52a9b24467877c3de53794427a4f05bd60fd (diff) | |
download | ydb-95d881de13521f9f5d4310978920044743695017.tar.gz |
YQL-16186 enable paranthesis in row pattern
-rw-r--r-- | ydb/library/yql/sql/v1/match_recognize.cpp | 26 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/match_recognize.h | 27 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/sql_match_recognize.cpp | 36 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/sql_match_recognize.h | 4 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp | 84 |
5 files changed, 125 insertions, 52 deletions
diff --git a/ydb/library/yql/sql/v1/match_recognize.cpp b/ydb/library/yql/sql/v1/match_recognize.cpp index cffc5045325..73df93e7964 100644 --- a/ydb/library/yql/sql/v1/match_recognize.cpp +++ b/ydb/library/yql/sql/v1/match_recognize.cpp @@ -14,7 +14,7 @@ public: std::pair<TPosition, TVector<TNamedLambda>>&& measures, std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch, std::pair<TPosition, TAfterMatchSkipTo>&& skipTo, - std::pair<TPosition, TVector<TRowPatternTerm>>&& pattern, + std::pair<TPosition, TRowPatternPtr>&& pattern, std::pair<TPosition, TNodePtr>&& subset, std::pair<TPosition, TVector<TNamedLambda>>&& definitions ): TCallNode(pos, "block", {BuildBlockStatements( @@ -41,18 +41,15 @@ private: std::pair<TPosition, TVector<TNamedLambda>>&& measures, std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch, std::pair<TPosition, TAfterMatchSkipTo>&& skipTo, - std::pair<TPosition, TVector<TRowPatternTerm>>&& pattern, + std::pair<TPosition, TRowPatternPtr>&& pattern, std::pair<TPosition, TNodePtr>&& subset, std::pair<TPosition, TVector<TNamedLambda>>&& definitions ) { Y_UNUSED(pos); auto inputRowType = Y("ListItemType",Y("TypeOf", inputTable)); - TNodePtr patternNode = Y(); - for (const auto& t: pattern.second) { - patternNode->Add(PatternTerm(pos, t)); - } - patternNode = Q(patternNode); + + auto patternNode = Pattern(pattern.first, pattern.second); auto partitionColumns = Y(); for (const auto& p: partitioners.second){ @@ -113,7 +110,9 @@ private: TPtr PatternFactor(const TPosition& pos, const TRowPatternFactor& factor) { return BuildTuple(pos, { - BuildQuotedAtom(pos, factor.Name), + factor.Primary.index() == 0 ? + BuildQuotedAtom(pos, std::get<0>(factor.Primary)) : + Pattern(pos, std::get<1>(factor.Primary)), BuildQuotedAtom(pos, ToString(factor.QuantityMin)), BuildQuotedAtom(pos, ToString(factor.QuantityMax)), BuildQuotedAtom(pos, ToString(factor.Greedy)), @@ -128,6 +127,15 @@ private: factors->Add(PatternFactor(pos, f)); return Q(std::move(factors)); } + + TPtr Pattern(const TPosition& pos, const TRowPatternPtr& pattern) { + TNodePtr patternNode = Y("MatchRecognizePattern"); + for (const auto& t: pattern->Terms) { + patternNode->Add(PatternTerm(pos, t)); + } + return patternNode; + } + TPtr DoClone() const final{ return new TMatchRecognize(*this); } @@ -153,4 +161,4 @@ TNodePtr TMatchRecognizeBuilder::Build(TContext& ctx, TString&& inputTable, ISou return node; } -} // namespace NSQLTranslationV1
\ No newline at end of file +} // namespace NSQLTranslationV1 diff --git a/ydb/library/yql/sql/v1/match_recognize.h b/ydb/library/yql/sql/v1/match_recognize.h index 515916207f0..190849dab1f 100644 --- a/ydb/library/yql/sql/v1/match_recognize.h +++ b/ydb/library/yql/sql/v1/match_recognize.h @@ -36,16 +36,25 @@ struct TAfterMatchSkipTo { TString Var; }; +struct TRowPattern; + +using TRowPatternPtr = std::unique_ptr<TRowPattern>; + +using TRowPatternPrimary = std::variant<TString, TRowPatternPtr>; + struct TRowPatternFactor{ - TString Name; - uint64_t QuantityMin; //uint64 literal - uint64_t QuantityMax; //uint64 literal - bool Greedy; //bool literal; - bool Output; //bool literal, include in output with ALL ROW PER MATCH + TRowPatternPrimary Primary; + uint64_t QuantityMin; + uint64_t QuantityMax; + bool Greedy; + bool Output; //include in output with ALL ROW PER MATCH }; -using TRowPatternTerm = TVector<TRowPatternFactor>; -using TRowPattern = TVector<TRowPatternTerm>; +using TRowPatternTerm = std::vector<TRowPatternFactor>; + +struct TRowPattern { + std::vector<TRowPatternTerm> Terms; +}; class TMatchRecognizeBuilder: public TSimpleRefCount<TMatchRecognizeBuilder> { public: @@ -56,7 +65,7 @@ public: std::pair<TPosition, TVector<TNamedLambda>>&& measures, std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch, std::pair<TPosition, TAfterMatchSkipTo>&& skipTo, - std::pair<TPosition, TRowPattern>&& pattern, + std::pair<TPosition, TRowPatternPtr>&& pattern, std::pair<TPosition, TNodePtr>&& subset, std::pair<TPosition, TVector<TNamedLambda>>&& definitions ) @@ -79,7 +88,7 @@ private: std::pair<TPosition, TVector<TNamedLambda>> Measures; std::pair<TPosition, ERowsPerMatch> RowsPerMatch; std::pair<TPosition, TAfterMatchSkipTo> SkipTo; - std::pair<TPosition, TRowPattern> Pattern; + std::pair<TPosition, TRowPatternPtr> Pattern; std::pair<TPosition, TNodePtr> Subset; std::pair<TPosition, TVector<TNamedLambda>> Definitions; }; diff --git a/ydb/library/yql/sql/v1/sql_match_recognize.cpp b/ydb/library/yql/sql/v1/sql_match_recognize.cpp index dc292935393..a0c805fb17d 100644 --- a/ydb/library/yql/sql/v1/sql_match_recognize.cpp +++ b/ydb/library/yql/sql/v1/sql_match_recognize.cpp @@ -75,7 +75,7 @@ TMatchRecognizeBuilderPtr TSqlMatchRecognizeClause::CreateBuilder(const NSQLv1Ge return {}; } - const auto& pattern = ParsePattern(commonSyntax.GetRule_row_pattern5()); + auto pattern = ParsePattern(commonSyntax.GetRule_row_pattern5()); const auto& patternPos = TokenPosition(commonSyntax.token3()); TNodePtr subset; @@ -207,24 +207,31 @@ TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTerm(const TRule_row_patte TPosition pos; for (const auto& factor: node.GetBlock1()) { const auto& primaryVar = factor.GetRule_row_pattern_factor1().GetRule_row_pattern_primary1(); - TString varName; + TRowPatternPrimary primary; bool output = true; switch(primaryVar.GetAltCase()){ case TRule_row_pattern_primary::kAltRowPatternPrimary1: - varName = PatternVar(primaryVar.GetAlt_row_pattern_primary1().GetRule_row_pattern_primary_variable_name1().GetRule_row_pattern_variable_name1(), *this); + primary = PatternVar(primaryVar.GetAlt_row_pattern_primary1().GetRule_row_pattern_primary_variable_name1().GetRule_row_pattern_variable_name1(), *this); break; case TRule_row_pattern_primary::kAltRowPatternPrimary2: - varName = primaryVar.GetAlt_row_pattern_primary2().GetToken1().GetValue(); - Y_ENSURE("$" == varName); + primary = primaryVar.GetAlt_row_pattern_primary2().GetToken1().GetValue(); + Y_ENSURE("$" == std::get<0>(primary)); break; case TRule_row_pattern_primary::kAltRowPatternPrimary3: - varName = primaryVar.GetAlt_row_pattern_primary3().GetToken1().GetValue(); - Y_ENSURE("^" == varName); + primary = primaryVar.GetAlt_row_pattern_primary3().GetToken1().GetValue(); + Y_ENSURE("^" == std::get<0>(primary)); break; - case TRule_row_pattern_primary::kAltRowPatternPrimary4: - Ctx.Error(TokenPosition(primaryVar.GetAlt_row_pattern_primary4().GetToken1())) - << "Grouping is not supported yet"; //https://st.yandex-team.ru/YQL-16226 + case TRule_row_pattern_primary::kAltRowPatternPrimary4: { + constexpr size_t MaxNesting = 20; //Limit recursion + if (++PatternNestingLevel <= MaxNesting) { + primary = ParsePattern(primaryVar.GetAlt_row_pattern_primary4().GetBlock2().GetRule_row_pattern1()); + } else { + Ctx.Error(TokenPosition(primaryVar.GetAlt_row_pattern_primary4().GetToken1())) + << "To big nesting level in the pattern"; + return TRowPatternTerm{}; + } break; + } case TRule_row_pattern_primary::kAltRowPatternPrimary5: output = false; Ctx.Error(TokenPosition(primaryVar.GetAlt_row_pattern_primary4().GetToken1())) @@ -280,16 +287,17 @@ TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTerm(const TRule_row_patte Y_FAIL("You should change implementation according to grammar changes"); } } - term.push_back(TRowPatternFactor{varName, quantityMin, quantityMax, greedy, output}); + term.push_back(TRowPatternFactor{std::move(primary), quantityMin, quantityMax, greedy, output}); } return term; } -TVector<TRowPatternTerm> TSqlMatchRecognizeClause::ParsePattern(const TRule_row_pattern& node){ - TVector<TRowPatternTerm> result{ ParsePatternTerm(node.GetRule_row_pattern_term1()) }; +TRowPatternPtr TSqlMatchRecognizeClause::ParsePattern(const TRule_row_pattern& node){ + TVector<TRowPatternTerm> result; + result.emplace_back(ParsePatternTerm(node.GetRule_row_pattern_term1())); for (const auto& term: node.GetBlock2()) result.push_back(ParsePatternTerm(term.GetRule_row_pattern_term2())); - return result; + return std::make_unique<TRowPattern>(TRowPattern{std::move(result)}); } TNamedLambda TSqlMatchRecognizeClause::ParseOneDefinition(const TRule_row_pattern_definition& node){ diff --git a/ydb/library/yql/sql/v1/sql_match_recognize.h b/ydb/library/yql/sql/v1/sql_match_recognize.h index 9433c5531b4..65a49d2162d 100644 --- a/ydb/library/yql/sql/v1/sql_match_recognize.h +++ b/ydb/library/yql/sql/v1/sql_match_recognize.h @@ -20,9 +20,11 @@ private: std::pair<TPosition, ERowsPerMatch> ParseRowsPerMatch(const TRule_row_pattern_rows_per_match& rowsPerMatchClause); std::pair<TPosition, TAfterMatchSkipTo> ParseAfterMatchSkipTo(const TRule_row_pattern_skip_to& skipToClause); TRowPatternTerm ParsePatternTerm(const TRule_row_pattern_term& node); - TRowPattern ParsePattern(const TRule_row_pattern& node); + TRowPatternPtr ParsePattern(const TRule_row_pattern& node); TNamedLambda ParseOneDefinition(const TRule_row_pattern_definition& node); TVector<TNamedLambda> ParseDefinitions(const TRule_row_pattern_definition_list& node); +private: + size_t PatternNestingLevel = 0; }; } // namespace NSQLTranslationV1
\ No newline at end of file diff --git a/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp b/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp index 2909bd90ea1..2a1982183e2 100644 --- a/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp +++ b/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp @@ -135,7 +135,7 @@ FROM Input MATCH_RECOGNIZE( //TODO https://st.yandex-team.ru/YQL-16186 } Y_UNIT_TEST(PatternSimple) { - auto stmt = R"( + const auto stmt = R"( USE plato; SELECT * FROM Input MATCH_RECOGNIZE( @@ -145,45 +145,91 @@ FROM Input MATCH_RECOGNIZE( )"; const auto& r = MatchRecognizeSqlToYql(stmt); UNIT_ASSERT(r.IsOk()); - auto pattern = FindMatchRecognizeParam(r.Root, "pattern"); - UNIT_ASSERT(IsQuotedListOfSize(pattern, 1)); - const auto& term = pattern->GetChild(1)->GetChild(0); + const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern"); + UNIT_ASSERT_EQUAL(patternCallable->GetChild(0)->GetContent(), "MatchRecognizePattern"); + UNIT_ASSERT_EQUAL(patternCallable->GetChildrenCount(), 1 + 1); + const auto& term = patternCallable->GetChild(1); UNIT_ASSERT(IsQuotedListOfSize(term, 3)); } - Y_UNIT_TEST(PatternMedium) { - auto stmt = R"( + Y_UNIT_TEST(PatternMultiTerm) { + const auto stmt = R"( USE plato; SELECT * FROM Input MATCH_RECOGNIZE( - PATTERN ($ A+ B{1,3} | C{3} D{1,4} E? | F?? | G{3,}? H*? ^) + PATTERN ($ A+ B{1,3} | C{3} D{1,4} E? | F?? | G{3,}? H*? I J ^) DEFINE A as A ) )"; const auto& r = MatchRecognizeSqlToYql(stmt); UNIT_ASSERT(r.IsOk()); - auto pattern = FindMatchRecognizeParam(r.Root, "pattern"); - UNIT_ASSERT(IsQuotedListOfSize(pattern, 4)); + const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern"); + UNIT_ASSERT_EQUAL(patternCallable->GetChild(0)->GetContent(), "MatchRecognizePattern"); + UNIT_ASSERT_EQUAL(patternCallable->GetChildrenCount(), 1 + 4); + const auto& lastTerm = patternCallable->GetChild(4); + UNIT_ASSERT(IsQuotedListOfSize(lastTerm, 5)); } - //TODO add tests for factors, quantifiers and greediness https://st.yandex-team.ru/YQL-16186 - - - Y_UNIT_TEST(PatternDieHard) { - auto stmt = R"( + Y_UNIT_TEST(PatternWithParanthesis) { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + PATTERN ( + A | ($ B)+ C D + ) + DEFINE A as A + ) +)"; + const auto& r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern"); + UNIT_ASSERT_EQUAL(patternCallable->GetChild(0)->GetContent(), "MatchRecognizePattern"); + UNIT_ASSERT_EQUAL(patternCallable->GetChildrenCount(), 1 + 2); + const auto& firstTerm = patternCallable->GetChild(1); + UNIT_ASSERT(IsQuotedListOfSize(firstTerm, 1)); + const auto& lastTerm = patternCallable->GetChild(2); + UNIT_ASSERT(IsQuotedListOfSize(lastTerm, 3)); + const auto& firstFactorOfLastTerm = lastTerm->GetChild(1)->GetChild(0); + UNIT_ASSERT(IsQuotedListOfSize(firstFactorOfLastTerm, 5)); + const auto nestedPattern = firstFactorOfLastTerm->GetChild(1)->GetChild(0); + UNIT_ASSERT_EQUAL(nestedPattern->GetChildrenCount(), 1 + 1); + UNIT_ASSERT_EQUAL(nestedPattern->GetChild(0)->GetContent(), "MatchRecognizePattern"); + UNIT_ASSERT(IsQuotedListOfSize(nestedPattern->GetChild(1), 2)); + } + + Y_UNIT_TEST(PatternLimietedNesting) { + const size_t MaxNesting = 20; + for (size_t extraNesting = 0; extraNesting <= 1; ++extraNesting) { + std::string pattern; + for (size_t i = 0; i != MaxNesting + extraNesting; ++i) + pattern.push_back('('); + pattern.push_back('A'); + for (size_t i = 0; i != MaxNesting + extraNesting; ++i) + pattern.push_back(')'); + const auto stmt = TString(R"( USE plato; SELECT * FROM Input MATCH_RECOGNIZE( - PATTERN (^ S1 S2*? ( {- S3 -} S4 )+ | PERMUTE(S1, S2){1,2} $) + PATTERN( +)") + pattern + R"( + ) DEFINE A as A ) )"; - Y_UNUSED(stmt); - //TODO implement me - //UNIT_ASSERT( MatchRecognizeSqlToYql(stmt).IsOk()); + const auto &r = MatchRecognizeSqlToYql(stmt); + if (not extraNesting) { + UNIT_ASSERT(r.IsOk()); + } else { + UNIT_ASSERT(not r.IsOk()); + } + } } - Y_UNIT_TEST(row_pattern_subset_clause) { + + //TODO add tests for factors, quantifiers and greediness https://st.yandex-team.ru/YQL-16186 + + Y_UNIT_TEST(row_pattern_subset_clause) { //TODO https://st.yandex-team.ru/YQL-16186 } |