aboutsummaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1
diff options
context:
space:
mode:
authorvokayndzop <vokayndzop@yandex-team.com>2024-12-16 15:55:05 +0300
committervokayndzop <vokayndzop@yandex-team.com>2024-12-16 16:34:36 +0300
commitb1cde7dcb055fb6f3367e81fd0f57bd55b8bb93c (patch)
tree230bddb8bb4ce7d8290a16a4465ec98dbf513a5a /yql/essentials/sql/v1
parent88e0ad5922cea1349ec1f8cbf133524cf865d696 (diff)
downloadydb-b1cde7dcb055fb6f3367e81fd0f57bd55b8bb93c.tar.gz
MR: support ALL ROWS PER MATCH
commit_hash:9e2ba38d0d523bb870f6dc76717a3bec5d8ffadc
Diffstat (limited to 'yql/essentials/sql/v1')
-rw-r--r--yql/essentials/sql/v1/SQLv1.g.in2
-rw-r--r--yql/essentials/sql/v1/SQLv1Antlr4.g.in2
-rw-r--r--yql/essentials/sql/v1/match_recognize.cpp4
-rw-r--r--yql/essentials/sql/v1/match_recognize.h9
-rw-r--r--yql/essentials/sql/v1/sql_match_recognize.cpp42
-rw-r--r--yql/essentials/sql/v1/sql_match_recognize.h8
-rw-r--r--yql/essentials/sql/v1/sql_match_recognize_ut.cpp2
7 files changed, 26 insertions, 43 deletions
diff --git a/yql/essentials/sql/v1/SQLv1.g.in b/yql/essentials/sql/v1/SQLv1.g.in
index e9685c5094..670ad27e3e 100644
--- a/yql/essentials/sql/v1/SQLv1.g.in
+++ b/yql/essentials/sql/v1/SQLv1.g.in
@@ -460,7 +460,7 @@ row_pattern_primary:
| DOLLAR
| CARET
| LPAREN row_pattern? RPAREN
- | LBRACE_CURLY MINUS row_pattern MINUS RBRACE_CURLY //TODO This rule accepts spaces between brace and minus sign, i.e: { - S2 - } that is not supposed to. Handle this case in https://st.yandex-team.ru/YQL-16227
+ | LBRACE_CURLY MINUS row_pattern MINUS RBRACE_CURLY
| row_pattern_permute
;
diff --git a/yql/essentials/sql/v1/SQLv1Antlr4.g.in b/yql/essentials/sql/v1/SQLv1Antlr4.g.in
index 40593fe075..89131437e9 100644
--- a/yql/essentials/sql/v1/SQLv1Antlr4.g.in
+++ b/yql/essentials/sql/v1/SQLv1Antlr4.g.in
@@ -459,7 +459,7 @@ row_pattern_primary:
| DOLLAR
| CARET
| LPAREN row_pattern? RPAREN
- | LBRACE_CURLY MINUS row_pattern MINUS RBRACE_CURLY //TODO This rule accepts spaces between brace and minus sign, i.e: { - S2 - } that is not supposed to. Handle this case in https://st.yandex-team.ru/YQL-16227
+ | LBRACE_CURLY MINUS row_pattern MINUS RBRACE_CURLY
| row_pattern_permute
;
diff --git a/yql/essentials/sql/v1/match_recognize.cpp b/yql/essentials/sql/v1/match_recognize.cpp
index 47055e2f3d..84a20ae273 100644
--- a/yql/essentials/sql/v1/match_recognize.cpp
+++ b/yql/essentials/sql/v1/match_recognize.cpp
@@ -21,7 +21,7 @@ public:
std::pair<TPosition, TVector<TNamedFunction>>&& partitioners,
std::pair<TPosition, TVector<TSortSpecificationPtr>>&& sortSpecs,
std::pair<TPosition, TVector<TNamedFunction>>&& measures,
- std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch,
+ std::pair<TPosition, NYql::NMatchRecognize::ERowsPerMatch>&& rowsPerMatch,
std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo>&& skipTo,
std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern,
std::pair<TPosition, TNodePtr>&& subset,
@@ -56,7 +56,7 @@ private:
std::pair<TPosition, TVector<TNamedFunction>>&& partitioners,
std::pair<TPosition, TVector<TSortSpecificationPtr>>&& sortSpecs,
std::pair<TPosition, TVector<TNamedFunction>>&& measures,
- std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch,
+ std::pair<TPosition, NYql::NMatchRecognize::ERowsPerMatch>&& rowsPerMatch,
std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo>&& skipTo,
std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern,
std::pair<TPosition, TNodePtr>&& subset,
diff --git a/yql/essentials/sql/v1/match_recognize.h b/yql/essentials/sql/v1/match_recognize.h
index b78c0faf65..4b0e98b9b7 100644
--- a/yql/essentials/sql/v1/match_recognize.h
+++ b/yql/essentials/sql/v1/match_recognize.h
@@ -10,11 +10,6 @@ struct TNamedFunction {
TString name;
};
-enum class ERowsPerMatch {
- OneRow,
- AllRows
-};
-
class TMatchRecognizeBuilder: public TSimpleRefCount<TMatchRecognizeBuilder> {
public:
TMatchRecognizeBuilder(
@@ -22,7 +17,7 @@ public:
std::pair<TPosition, TVector<TNamedFunction>>&& partitioners,
std::pair<TPosition, TVector<TSortSpecificationPtr>>&& sortSpecs,
std::pair<TPosition, TVector<TNamedFunction>>&& measures,
- std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch,
+ std::pair<TPosition, NYql::NMatchRecognize::ERowsPerMatch>&& rowsPerMatch,
std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo>&& skipTo,
std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern,
std::pair<TPosition, TNodePtr>&& subset,
@@ -45,7 +40,7 @@ private:
std::pair<TPosition, TVector<TNamedFunction>> Partitioners;
std::pair<TPosition, TVector<TSortSpecificationPtr>> SortSpecs;
std::pair<TPosition, TVector<TNamedFunction>> Measures;
- std::pair<TPosition, ERowsPerMatch> RowsPerMatch;
+ std::pair<TPosition, NYql::NMatchRecognize::ERowsPerMatch> RowsPerMatch;
std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo> SkipTo;
std::pair<TPosition, NYql::NMatchRecognize::TRowPattern> Pattern;
std::pair<TPosition, TNodePtr> Subset;
diff --git a/yql/essentials/sql/v1/sql_match_recognize.cpp b/yql/essentials/sql/v1/sql_match_recognize.cpp
index 47e001efbb..41415b7f23 100644
--- a/yql/essentials/sql/v1/sql_match_recognize.cpp
+++ b/yql/essentials/sql/v1/sql_match_recognize.cpp
@@ -53,15 +53,9 @@ TMatchRecognizeBuilderPtr TSqlMatchRecognizeClause::CreateBuilder(const NSQLv1Ge
measures = ParseMeasures(measuresClause.GetRule_row_pattern_measure_list2());
}
- TPosition rowsPerMatchPos = pos;
- ERowsPerMatch rowsPerMatch = ERowsPerMatch::OneRow;
+ auto rowsPerMatch = std::pair {pos, NYql::NMatchRecognize::ERowsPerMatch::OneRow};
if (matchRecognizeClause.HasBlock6()) {
- std::tie(rowsPerMatchPos, rowsPerMatch) = ParseRowsPerMatch(matchRecognizeClause.GetBlock6().GetRule_row_pattern_rows_per_match1());
- if (ERowsPerMatch::AllRows == rowsPerMatch) {
- //https://st.yandex-team.ru/YQL-16213
- Ctx.Error(pos, TIssuesIds::CORE) << "ALL ROWS PER MATCH is not supported yet";
- return {};
- }
+ rowsPerMatch = ParseRowsPerMatch(matchRecognizeClause.GetBlock6().GetRule_row_pattern_rows_per_match1());
}
const auto& commonSyntax = matchRecognizeClause.GetRule_row_pattern_common_syntax7();
@@ -126,7 +120,7 @@ TMatchRecognizeBuilderPtr TSqlMatchRecognizeClause::CreateBuilder(const NSQLv1Ge
std::pair{partitionsPos, std::move(partitioners)},
std::pair{orderByPos, std::move(sortSpecs)},
std::pair{measuresPos, measures},
- std::pair{rowsPerMatchPos, rowsPerMatch},
+ std::move(rowsPerMatch),
std::move(skipTo),
std::pair{patternPos, std::move(pattern)},
std::pair{subsetPos, std::move(subset)},
@@ -159,7 +153,6 @@ TNamedFunction TSqlMatchRecognizeClause::ParseOneMeasure(const TRule_row_pattern
TColumnRefScope scope(Ctx, EColumnRefState::MatchRecognize);
const auto& expr = TSqlExpression(Ctx, Mode).Build(node.GetRule_expr1());
const auto& name = Id(node.GetRule_an_id3(), *this);
- //TODO https://st.yandex-team.ru/YQL-16186
//Each measure must be a lambda, that accepts 2 args:
// - List<InputTableColumns + _yql_Classifier, _yql_MatchNumber>
// - Struct that maps row pattern variables to ranges in the queue
@@ -174,18 +167,18 @@ TVector<TNamedFunction> TSqlMatchRecognizeClause::ParseMeasures(const TRule_row_
return result;
}
-std::pair<TPosition, ERowsPerMatch> TSqlMatchRecognizeClause::ParseRowsPerMatch(const TRule_row_pattern_rows_per_match& rowsPerMatchClause) {
+std::pair<TPosition, NYql::NMatchRecognize::ERowsPerMatch> TSqlMatchRecognizeClause::ParseRowsPerMatch(const TRule_row_pattern_rows_per_match& rowsPerMatchClause) {
switch(rowsPerMatchClause.GetAltCase()) {
case TRule_row_pattern_rows_per_match::kAltRowPatternRowsPerMatch1:
return std::pair {
TokenPosition(rowsPerMatchClause.GetAlt_row_pattern_rows_per_match1().GetToken1()),
- ERowsPerMatch::OneRow
+ NYql::NMatchRecognize::ERowsPerMatch::OneRow
};
case TRule_row_pattern_rows_per_match::kAltRowPatternRowsPerMatch2:
return std::pair {
TokenPosition(rowsPerMatchClause.GetAlt_row_pattern_rows_per_match2().GetToken1()),
- ERowsPerMatch::AllRows
+ NYql::NMatchRecognize::ERowsPerMatch::AllRows
};
case TRule_row_pattern_rows_per_match::ALT_NOT_SET:
Y_ABORT("You should change implementation according to grammar changes");
@@ -233,13 +226,13 @@ std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo> TSqlMatchRecogniz
}
}
-NYql::NMatchRecognize::TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTerm(const TRule_row_pattern_term& node){
+NYql::NMatchRecognize::TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTerm(const TRule_row_pattern_term& node, size_t patternNestingLevel, bool outputArg) {
NYql::NMatchRecognize::TRowPatternTerm term;
TPosition pos;
for (const auto& factor: node.GetBlock1()) {
const auto& primaryVar = factor.GetRule_row_pattern_factor1().GetRule_row_pattern_primary1();
NYql::NMatchRecognize::TRowPatternPrimary primary;
- bool output = true;
+ bool output = outputArg;
switch (primaryVar.GetAltCase()) {
case TRule_row_pattern_primary::kAltRowPatternPrimary1:
primary = PatternVar(primaryVar.GetAlt_row_pattern_primary1().GetRule_row_pattern_primary_variable_name1().GetRule_row_pattern_variable_name1(), *this);
@@ -253,9 +246,8 @@ NYql::NMatchRecognize::TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTer
Y_ENSURE("^" == std::get<0>(primary));
break;
case TRule_row_pattern_primary::kAltRowPatternPrimary4: {
- if (++PatternNestingLevel <= NYql::NMatchRecognize::MaxPatternNesting) {
- primary = ParsePattern(primaryVar.GetAlt_row_pattern_primary4().GetBlock2().GetRule_row_pattern1());
- --PatternNestingLevel;
+ if (patternNestingLevel <= NYql::NMatchRecognize::MaxPatternNesting) {
+ primary = ParsePattern(primaryVar.GetAlt_row_pattern_primary4().GetBlock2().GetRule_row_pattern1(), patternNestingLevel + 1, output);
} else {
Ctx.Error(TokenPosition(primaryVar.GetAlt_row_pattern_primary4().GetToken1()))
<< "To big nesting level in the pattern";
@@ -265,15 +257,14 @@ NYql::NMatchRecognize::TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTer
}
case TRule_row_pattern_primary::kAltRowPatternPrimary5:
output = false;
- Ctx.Error(TokenPosition(primaryVar.GetAlt_row_pattern_primary4().GetToken1()))
- << "ALL ROWS PER MATCH and {- -} are not supported yet"; //https://st.yandex-team.ru/YQL-16227
+ primary = ParsePattern(primaryVar.GetAlt_row_pattern_primary5().GetRule_row_pattern3(), patternNestingLevel + 1, output);
break;
case TRule_row_pattern_primary::kAltRowPatternPrimary6: {
std::vector<NYql::NMatchRecognize::TRowPatternPrimary> items{ParsePattern(
- primaryVar.GetAlt_row_pattern_primary6().GetRule_row_pattern_permute1().GetRule_row_pattern3())
+ primaryVar.GetAlt_row_pattern_primary6().GetRule_row_pattern_permute1().GetRule_row_pattern3(), patternNestingLevel + 1, output)
};
for (const auto& p: primaryVar.GetAlt_row_pattern_primary6().GetRule_row_pattern_permute1().GetBlock4()) {
- items.push_back(ParsePattern(p.GetRule_row_pattern2()));
+ items.push_back(ParsePattern(p.GetRule_row_pattern2(), patternNestingLevel + 1, output));
}
//Permutations now is a syntactic sugar and converted to all possible alternatives
if (items.size() > NYql::NMatchRecognize::MaxPermutedItems) {
@@ -346,11 +337,11 @@ NYql::NMatchRecognize::TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTer
return term;
}
-NYql::NMatchRecognize::TRowPattern TSqlMatchRecognizeClause::ParsePattern(const TRule_row_pattern& node){
+NYql::NMatchRecognize::TRowPattern TSqlMatchRecognizeClause::ParsePattern(const TRule_row_pattern& node, size_t patternNestingLevel, bool output){
TVector<NYql::NMatchRecognize::TRowPatternTerm> result;
- result.push_back(ParsePatternTerm(node.GetRule_row_pattern_term1()));
+ result.push_back(ParsePatternTerm(node.GetRule_row_pattern_term1(), patternNestingLevel, output));
for (const auto& term: node.GetBlock2())
- result.push_back(ParsePatternTerm(term.GetRule_row_pattern_term2()));
+ result.push_back(ParsePatternTerm(term.GetRule_row_pattern_term2(), patternNestingLevel, output));
return result;
}
@@ -364,7 +355,6 @@ TNamedFunction TSqlMatchRecognizeClause::ParseOneDefinition(const TRule_row_patt
TVector<TNamedFunction> TSqlMatchRecognizeClause::ParseDefinitions(const TRule_row_pattern_definition_list& node) {
TVector<TNamedFunction> result { ParseOneDefinition(node.GetRule_row_pattern_definition1())};
for (const auto& d: node.GetBlock2()) {
- //TODO https://st.yandex-team.ru/YQL-16186
//Each define must be a predicate lambda, that accepts 3 args:
// - List<input table rows>
// - A struct that maps row pattern variables to ranges in the queue
diff --git a/yql/essentials/sql/v1/sql_match_recognize.h b/yql/essentials/sql/v1/sql_match_recognize.h
index 6766acc953..219baeaa09 100644
--- a/yql/essentials/sql/v1/sql_match_recognize.h
+++ b/yql/essentials/sql/v1/sql_match_recognize.h
@@ -17,14 +17,12 @@ private:
TVector<TNamedFunction> ParsePartitionBy(const TRule_window_partition_clause& partitionClause);
TNamedFunction ParseOneMeasure(const TRule_row_pattern_measure_definition& node);
TVector<TNamedFunction> ParseMeasures(const TRule_row_pattern_measure_list& node);
- std::pair<TPosition, ERowsPerMatch> ParseRowsPerMatch(const TRule_row_pattern_rows_per_match& rowsPerMatchClause);
+ std::pair<TPosition, NYql::NMatchRecognize::ERowsPerMatch> ParseRowsPerMatch(const TRule_row_pattern_rows_per_match& rowsPerMatchClause);
std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo> ParseAfterMatchSkipTo(const TRule_row_pattern_skip_to& skipToClause);
- NYql::NMatchRecognize::TRowPatternTerm ParsePatternTerm(const TRule_row_pattern_term& node);
- NYql::NMatchRecognize::TRowPattern ParsePattern(const TRule_row_pattern& node);
+ NYql::NMatchRecognize::TRowPatternTerm ParsePatternTerm(const TRule_row_pattern_term& node, size_t patternNestingLevel, bool output);
+ NYql::NMatchRecognize::TRowPattern ParsePattern(const TRule_row_pattern& node, size_t patternNestingLevel = 1, bool output = true);
TNamedFunction ParseOneDefinition(const TRule_row_pattern_definition& node);
TVector<TNamedFunction> ParseDefinitions(const TRule_row_pattern_definition_list& node);
-private:
- size_t PatternNestingLevel = 0;
};
} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql_match_recognize_ut.cpp b/yql/essentials/sql/v1/sql_match_recognize_ut.cpp
index 20c5e6ab7b..f591ef0647 100644
--- a/yql/essentials/sql/v1/sql_match_recognize_ut.cpp
+++ b/yql/essentials/sql/v1/sql_match_recognize_ut.cpp
@@ -183,7 +183,7 @@ FROM Input MATCH_RECOGNIZE(
)
)";
auto r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(not r.IsOk()); ///https://st.yandex-team.ru/YQL-16213
+ UNIT_ASSERT(r.IsOk());
}
{ //default
const auto stmt = R"(