diff options
author | vokayndzop <vokayndzop@yandex-team.com> | 2024-12-16 15:55:05 +0300 |
---|---|---|
committer | vokayndzop <vokayndzop@yandex-team.com> | 2024-12-16 16:34:36 +0300 |
commit | b1cde7dcb055fb6f3367e81fd0f57bd55b8bb93c (patch) | |
tree | 230bddb8bb4ce7d8290a16a4465ec98dbf513a5a /yql/essentials/sql | |
parent | 88e0ad5922cea1349ec1f8cbf133524cf865d696 (diff) | |
download | ydb-b1cde7dcb055fb6f3367e81fd0f57bd55b8bb93c.tar.gz |
MR: support ALL ROWS PER MATCH
commit_hash:9e2ba38d0d523bb870f6dc76717a3bec5d8ffadc
Diffstat (limited to 'yql/essentials/sql')
-rw-r--r-- | yql/essentials/sql/v1/SQLv1.g.in | 2 | ||||
-rw-r--r-- | yql/essentials/sql/v1/SQLv1Antlr4.g.in | 2 | ||||
-rw-r--r-- | yql/essentials/sql/v1/match_recognize.cpp | 4 | ||||
-rw-r--r-- | yql/essentials/sql/v1/match_recognize.h | 9 | ||||
-rw-r--r-- | yql/essentials/sql/v1/sql_match_recognize.cpp | 42 | ||||
-rw-r--r-- | yql/essentials/sql/v1/sql_match_recognize.h | 8 | ||||
-rw-r--r-- | yql/essentials/sql/v1/sql_match_recognize_ut.cpp | 2 |
7 files changed, 26 insertions, 43 deletions
diff --git a/yql/essentials/sql/v1/SQLv1.g.in b/yql/essentials/sql/v1/SQLv1.g.in index e9685c5094..670ad27e3e 100644 --- a/yql/essentials/sql/v1/SQLv1.g.in +++ b/yql/essentials/sql/v1/SQLv1.g.in @@ -460,7 +460,7 @@ row_pattern_primary: | DOLLAR | CARET | LPAREN row_pattern? RPAREN - | LBRACE_CURLY MINUS row_pattern MINUS RBRACE_CURLY //TODO This rule accepts spaces between brace and minus sign, i.e: { - S2 - } that is not supposed to. Handle this case in https://st.yandex-team.ru/YQL-16227 + | LBRACE_CURLY MINUS row_pattern MINUS RBRACE_CURLY | row_pattern_permute ; diff --git a/yql/essentials/sql/v1/SQLv1Antlr4.g.in b/yql/essentials/sql/v1/SQLv1Antlr4.g.in index 40593fe075..89131437e9 100644 --- a/yql/essentials/sql/v1/SQLv1Antlr4.g.in +++ b/yql/essentials/sql/v1/SQLv1Antlr4.g.in @@ -459,7 +459,7 @@ row_pattern_primary: | DOLLAR | CARET | LPAREN row_pattern? RPAREN - | LBRACE_CURLY MINUS row_pattern MINUS RBRACE_CURLY //TODO This rule accepts spaces between brace and minus sign, i.e: { - S2 - } that is not supposed to. Handle this case in https://st.yandex-team.ru/YQL-16227 + | LBRACE_CURLY MINUS row_pattern MINUS RBRACE_CURLY | row_pattern_permute ; diff --git a/yql/essentials/sql/v1/match_recognize.cpp b/yql/essentials/sql/v1/match_recognize.cpp index 47055e2f3d..84a20ae273 100644 --- a/yql/essentials/sql/v1/match_recognize.cpp +++ b/yql/essentials/sql/v1/match_recognize.cpp @@ -21,7 +21,7 @@ public: std::pair<TPosition, TVector<TNamedFunction>>&& partitioners, std::pair<TPosition, TVector<TSortSpecificationPtr>>&& sortSpecs, std::pair<TPosition, TVector<TNamedFunction>>&& measures, - std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch, + std::pair<TPosition, NYql::NMatchRecognize::ERowsPerMatch>&& rowsPerMatch, std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo>&& skipTo, std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern, std::pair<TPosition, TNodePtr>&& subset, @@ -56,7 +56,7 @@ private: std::pair<TPosition, TVector<TNamedFunction>>&& partitioners, std::pair<TPosition, TVector<TSortSpecificationPtr>>&& sortSpecs, std::pair<TPosition, TVector<TNamedFunction>>&& measures, - std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch, + std::pair<TPosition, NYql::NMatchRecognize::ERowsPerMatch>&& rowsPerMatch, std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo>&& skipTo, std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern, std::pair<TPosition, TNodePtr>&& subset, diff --git a/yql/essentials/sql/v1/match_recognize.h b/yql/essentials/sql/v1/match_recognize.h index b78c0faf65..4b0e98b9b7 100644 --- a/yql/essentials/sql/v1/match_recognize.h +++ b/yql/essentials/sql/v1/match_recognize.h @@ -10,11 +10,6 @@ struct TNamedFunction { TString name; }; -enum class ERowsPerMatch { - OneRow, - AllRows -}; - class TMatchRecognizeBuilder: public TSimpleRefCount<TMatchRecognizeBuilder> { public: TMatchRecognizeBuilder( @@ -22,7 +17,7 @@ public: std::pair<TPosition, TVector<TNamedFunction>>&& partitioners, std::pair<TPosition, TVector<TSortSpecificationPtr>>&& sortSpecs, std::pair<TPosition, TVector<TNamedFunction>>&& measures, - std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch, + std::pair<TPosition, NYql::NMatchRecognize::ERowsPerMatch>&& rowsPerMatch, std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo>&& skipTo, std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern, std::pair<TPosition, TNodePtr>&& subset, @@ -45,7 +40,7 @@ private: std::pair<TPosition, TVector<TNamedFunction>> Partitioners; std::pair<TPosition, TVector<TSortSpecificationPtr>> SortSpecs; std::pair<TPosition, TVector<TNamedFunction>> Measures; - std::pair<TPosition, ERowsPerMatch> RowsPerMatch; + std::pair<TPosition, NYql::NMatchRecognize::ERowsPerMatch> RowsPerMatch; std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo> SkipTo; std::pair<TPosition, NYql::NMatchRecognize::TRowPattern> Pattern; std::pair<TPosition, TNodePtr> Subset; diff --git a/yql/essentials/sql/v1/sql_match_recognize.cpp b/yql/essentials/sql/v1/sql_match_recognize.cpp index 47e001efbb..41415b7f23 100644 --- a/yql/essentials/sql/v1/sql_match_recognize.cpp +++ b/yql/essentials/sql/v1/sql_match_recognize.cpp @@ -53,15 +53,9 @@ TMatchRecognizeBuilderPtr TSqlMatchRecognizeClause::CreateBuilder(const NSQLv1Ge measures = ParseMeasures(measuresClause.GetRule_row_pattern_measure_list2()); } - TPosition rowsPerMatchPos = pos; - ERowsPerMatch rowsPerMatch = ERowsPerMatch::OneRow; + auto rowsPerMatch = std::pair {pos, NYql::NMatchRecognize::ERowsPerMatch::OneRow}; if (matchRecognizeClause.HasBlock6()) { - std::tie(rowsPerMatchPos, rowsPerMatch) = ParseRowsPerMatch(matchRecognizeClause.GetBlock6().GetRule_row_pattern_rows_per_match1()); - if (ERowsPerMatch::AllRows == rowsPerMatch) { - //https://st.yandex-team.ru/YQL-16213 - Ctx.Error(pos, TIssuesIds::CORE) << "ALL ROWS PER MATCH is not supported yet"; - return {}; - } + rowsPerMatch = ParseRowsPerMatch(matchRecognizeClause.GetBlock6().GetRule_row_pattern_rows_per_match1()); } const auto& commonSyntax = matchRecognizeClause.GetRule_row_pattern_common_syntax7(); @@ -126,7 +120,7 @@ TMatchRecognizeBuilderPtr TSqlMatchRecognizeClause::CreateBuilder(const NSQLv1Ge std::pair{partitionsPos, std::move(partitioners)}, std::pair{orderByPos, std::move(sortSpecs)}, std::pair{measuresPos, measures}, - std::pair{rowsPerMatchPos, rowsPerMatch}, + std::move(rowsPerMatch), std::move(skipTo), std::pair{patternPos, std::move(pattern)}, std::pair{subsetPos, std::move(subset)}, @@ -159,7 +153,6 @@ TNamedFunction TSqlMatchRecognizeClause::ParseOneMeasure(const TRule_row_pattern TColumnRefScope scope(Ctx, EColumnRefState::MatchRecognize); const auto& expr = TSqlExpression(Ctx, Mode).Build(node.GetRule_expr1()); const auto& name = Id(node.GetRule_an_id3(), *this); - //TODO https://st.yandex-team.ru/YQL-16186 //Each measure must be a lambda, that accepts 2 args: // - List<InputTableColumns + _yql_Classifier, _yql_MatchNumber> // - Struct that maps row pattern variables to ranges in the queue @@ -174,18 +167,18 @@ TVector<TNamedFunction> TSqlMatchRecognizeClause::ParseMeasures(const TRule_row_ return result; } -std::pair<TPosition, ERowsPerMatch> TSqlMatchRecognizeClause::ParseRowsPerMatch(const TRule_row_pattern_rows_per_match& rowsPerMatchClause) { +std::pair<TPosition, NYql::NMatchRecognize::ERowsPerMatch> TSqlMatchRecognizeClause::ParseRowsPerMatch(const TRule_row_pattern_rows_per_match& rowsPerMatchClause) { switch(rowsPerMatchClause.GetAltCase()) { case TRule_row_pattern_rows_per_match::kAltRowPatternRowsPerMatch1: return std::pair { TokenPosition(rowsPerMatchClause.GetAlt_row_pattern_rows_per_match1().GetToken1()), - ERowsPerMatch::OneRow + NYql::NMatchRecognize::ERowsPerMatch::OneRow }; case TRule_row_pattern_rows_per_match::kAltRowPatternRowsPerMatch2: return std::pair { TokenPosition(rowsPerMatchClause.GetAlt_row_pattern_rows_per_match2().GetToken1()), - ERowsPerMatch::AllRows + NYql::NMatchRecognize::ERowsPerMatch::AllRows }; case TRule_row_pattern_rows_per_match::ALT_NOT_SET: Y_ABORT("You should change implementation according to grammar changes"); @@ -233,13 +226,13 @@ std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo> TSqlMatchRecogniz } } -NYql::NMatchRecognize::TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTerm(const TRule_row_pattern_term& node){ +NYql::NMatchRecognize::TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTerm(const TRule_row_pattern_term& node, size_t patternNestingLevel, bool outputArg) { NYql::NMatchRecognize::TRowPatternTerm term; TPosition pos; for (const auto& factor: node.GetBlock1()) { const auto& primaryVar = factor.GetRule_row_pattern_factor1().GetRule_row_pattern_primary1(); NYql::NMatchRecognize::TRowPatternPrimary primary; - bool output = true; + bool output = outputArg; switch (primaryVar.GetAltCase()) { case TRule_row_pattern_primary::kAltRowPatternPrimary1: primary = PatternVar(primaryVar.GetAlt_row_pattern_primary1().GetRule_row_pattern_primary_variable_name1().GetRule_row_pattern_variable_name1(), *this); @@ -253,9 +246,8 @@ NYql::NMatchRecognize::TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTer Y_ENSURE("^" == std::get<0>(primary)); break; case TRule_row_pattern_primary::kAltRowPatternPrimary4: { - if (++PatternNestingLevel <= NYql::NMatchRecognize::MaxPatternNesting) { - primary = ParsePattern(primaryVar.GetAlt_row_pattern_primary4().GetBlock2().GetRule_row_pattern1()); - --PatternNestingLevel; + if (patternNestingLevel <= NYql::NMatchRecognize::MaxPatternNesting) { + primary = ParsePattern(primaryVar.GetAlt_row_pattern_primary4().GetBlock2().GetRule_row_pattern1(), patternNestingLevel + 1, output); } else { Ctx.Error(TokenPosition(primaryVar.GetAlt_row_pattern_primary4().GetToken1())) << "To big nesting level in the pattern"; @@ -265,15 +257,14 @@ NYql::NMatchRecognize::TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTer } case TRule_row_pattern_primary::kAltRowPatternPrimary5: output = false; - Ctx.Error(TokenPosition(primaryVar.GetAlt_row_pattern_primary4().GetToken1())) - << "ALL ROWS PER MATCH and {- -} are not supported yet"; //https://st.yandex-team.ru/YQL-16227 + primary = ParsePattern(primaryVar.GetAlt_row_pattern_primary5().GetRule_row_pattern3(), patternNestingLevel + 1, output); break; case TRule_row_pattern_primary::kAltRowPatternPrimary6: { std::vector<NYql::NMatchRecognize::TRowPatternPrimary> items{ParsePattern( - primaryVar.GetAlt_row_pattern_primary6().GetRule_row_pattern_permute1().GetRule_row_pattern3()) + primaryVar.GetAlt_row_pattern_primary6().GetRule_row_pattern_permute1().GetRule_row_pattern3(), patternNestingLevel + 1, output) }; for (const auto& p: primaryVar.GetAlt_row_pattern_primary6().GetRule_row_pattern_permute1().GetBlock4()) { - items.push_back(ParsePattern(p.GetRule_row_pattern2())); + items.push_back(ParsePattern(p.GetRule_row_pattern2(), patternNestingLevel + 1, output)); } //Permutations now is a syntactic sugar and converted to all possible alternatives if (items.size() > NYql::NMatchRecognize::MaxPermutedItems) { @@ -346,11 +337,11 @@ NYql::NMatchRecognize::TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTer return term; } -NYql::NMatchRecognize::TRowPattern TSqlMatchRecognizeClause::ParsePattern(const TRule_row_pattern& node){ +NYql::NMatchRecognize::TRowPattern TSqlMatchRecognizeClause::ParsePattern(const TRule_row_pattern& node, size_t patternNestingLevel, bool output){ TVector<NYql::NMatchRecognize::TRowPatternTerm> result; - result.push_back(ParsePatternTerm(node.GetRule_row_pattern_term1())); + result.push_back(ParsePatternTerm(node.GetRule_row_pattern_term1(), patternNestingLevel, output)); for (const auto& term: node.GetBlock2()) - result.push_back(ParsePatternTerm(term.GetRule_row_pattern_term2())); + result.push_back(ParsePatternTerm(term.GetRule_row_pattern_term2(), patternNestingLevel, output)); return result; } @@ -364,7 +355,6 @@ TNamedFunction TSqlMatchRecognizeClause::ParseOneDefinition(const TRule_row_patt TVector<TNamedFunction> TSqlMatchRecognizeClause::ParseDefinitions(const TRule_row_pattern_definition_list& node) { TVector<TNamedFunction> result { ParseOneDefinition(node.GetRule_row_pattern_definition1())}; for (const auto& d: node.GetBlock2()) { - //TODO https://st.yandex-team.ru/YQL-16186 //Each define must be a predicate lambda, that accepts 3 args: // - List<input table rows> // - A struct that maps row pattern variables to ranges in the queue diff --git a/yql/essentials/sql/v1/sql_match_recognize.h b/yql/essentials/sql/v1/sql_match_recognize.h index 6766acc953..219baeaa09 100644 --- a/yql/essentials/sql/v1/sql_match_recognize.h +++ b/yql/essentials/sql/v1/sql_match_recognize.h @@ -17,14 +17,12 @@ private: TVector<TNamedFunction> ParsePartitionBy(const TRule_window_partition_clause& partitionClause); TNamedFunction ParseOneMeasure(const TRule_row_pattern_measure_definition& node); TVector<TNamedFunction> ParseMeasures(const TRule_row_pattern_measure_list& node); - std::pair<TPosition, ERowsPerMatch> ParseRowsPerMatch(const TRule_row_pattern_rows_per_match& rowsPerMatchClause); + std::pair<TPosition, NYql::NMatchRecognize::ERowsPerMatch> ParseRowsPerMatch(const TRule_row_pattern_rows_per_match& rowsPerMatchClause); std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo> ParseAfterMatchSkipTo(const TRule_row_pattern_skip_to& skipToClause); - NYql::NMatchRecognize::TRowPatternTerm ParsePatternTerm(const TRule_row_pattern_term& node); - NYql::NMatchRecognize::TRowPattern ParsePattern(const TRule_row_pattern& node); + NYql::NMatchRecognize::TRowPatternTerm ParsePatternTerm(const TRule_row_pattern_term& node, size_t patternNestingLevel, bool output); + NYql::NMatchRecognize::TRowPattern ParsePattern(const TRule_row_pattern& node, size_t patternNestingLevel = 1, bool output = true); TNamedFunction ParseOneDefinition(const TRule_row_pattern_definition& node); TVector<TNamedFunction> ParseDefinitions(const TRule_row_pattern_definition_list& node); -private: - size_t PatternNestingLevel = 0; }; } // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql_match_recognize_ut.cpp b/yql/essentials/sql/v1/sql_match_recognize_ut.cpp index 20c5e6ab7b..f591ef0647 100644 --- a/yql/essentials/sql/v1/sql_match_recognize_ut.cpp +++ b/yql/essentials/sql/v1/sql_match_recognize_ut.cpp @@ -183,7 +183,7 @@ FROM Input MATCH_RECOGNIZE( ) )"; auto r = MatchRecognizeSqlToYql(stmt); - UNIT_ASSERT(not r.IsOk()); ///https://st.yandex-team.ru/YQL-16213 + UNIT_ASSERT(r.IsOk()); } { //default const auto stmt = R"( |