aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorzverevgeny <zverevgeny@ydb.tech>2023-08-24 18:36:23 +0300
committerzverevgeny <zverevgeny@ydb.tech>2023-08-24 18:52:18 +0300
commit322c6917156bdea5d883e6c42c0cb6f455cb68be (patch)
tree1f3c821e25472498813ee5ce4996a069610190c1
parent3d40978be0626fb1242c81ca9242c45464375574 (diff)
downloadydb-322c6917156bdea5d883e6c42c0cb6f455cb68be.tar.gz
YQL-16186 parse AFTER MATCH SKIP
-rw-r--r--ydb/library/yql/sql/v1/sql_match_recognize.cpp46
-rw-r--r--ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp118
2 files changed, 155 insertions, 9 deletions
diff --git a/ydb/library/yql/sql/v1/sql_match_recognize.cpp b/ydb/library/yql/sql/v1/sql_match_recognize.cpp
index 2aeee6d412..db48964329 100644
--- a/ydb/library/yql/sql/v1/sql_match_recognize.cpp
+++ b/ydb/library/yql/sql/v1/sql_match_recognize.cpp
@@ -17,6 +17,22 @@ TString PatternVar(const TRule_row_pattern_variable_name& node, TSqlMatchRecogni
return Id(node.GetRule_identifier1(), ctx);
}
+std::unordered_set<TString> GetAllPatternVars(const TRowPatternPtr& pattern){
+ std::unordered_set<TString> result;
+ for (const auto& t: pattern->Terms) {
+ for (const auto& f: t) {
+ if (f.Primary.index() == 0) {
+ result.insert(std::get<0>(f.Primary));
+ }
+ else {
+ auto nested = GetAllPatternVars(std::get<1>(f.Primary));
+ result.insert(nested.cbegin(), nested.cend());
+ }
+ }
+ }
+ return result;
+}
+
} //namespace
TMatchRecognizeBuilderPtr TSqlMatchRecognizeClause::CreateBuilder(const NSQLv1Generated::TRule_row_pattern_recognition_clause &matchRecognizeClause) {
@@ -65,11 +81,6 @@ TMatchRecognizeBuilderPtr TSqlMatchRecognizeClause::CreateBuilder(const NSQLv1Ge
const auto& commonSyntax = matchRecognizeClause.GetRule_row_pattern_common_syntax7();
- std::pair<TPosition, TAfterMatchSkipTo> skipTo { pos, TAfterMatchSkipTo{EAfterMatchSkipTo::NextRow, TString()} };
- if (commonSyntax.HasBlock1()){
- skipTo = ParseAfterMatchSkipTo(commonSyntax.GetBlock1().GetRule_row_pattern_skip_to3());
- //TODO validate var with the name defined in the pattern https://st.yandex-team.ru/YQL-16186
- }
if (commonSyntax.HasBlock2()) {
const auto& initialOrSeek = commonSyntax.GetBlock2().GetRule_row_pattern_initial_or_seek1();
@@ -80,6 +91,25 @@ TMatchRecognizeBuilderPtr TSqlMatchRecognizeClause::CreateBuilder(const NSQLv1Ge
auto pattern = ParsePattern(commonSyntax.GetRule_row_pattern5());
const auto& patternPos = TokenPosition(commonSyntax.token3());
+ //this block is located before pattern block in grammar,
+ // but depends on it, so it is processed after pattern block
+ std::pair<TPosition, TAfterMatchSkipTo> skipTo { pos, TAfterMatchSkipTo{EAfterMatchSkipTo::NextRow, TString()} };
+ if (commonSyntax.HasBlock1()){
+ skipTo = ParseAfterMatchSkipTo(commonSyntax.GetBlock1().GetRule_row_pattern_skip_to3());
+ const auto varRequired =
+ EAfterMatchSkipTo::ToFirst == skipTo.second.To ||
+ EAfterMatchSkipTo::ToLast == skipTo.second.To ||
+ EAfterMatchSkipTo::To == skipTo.second.To;
+ if (varRequired) {
+ const auto& allVars = GetAllPatternVars(pattern);
+ if (allVars.find(skipTo.second.Var) == allVars.cend()) {
+ Ctx.Error(skipTo.first) << "Unknown pattern variable in AFTER MATCH";
+ return {};
+ }
+ }
+ }
+
+
TNodePtr subset;
TPosition subsetPos = pos;
if (commonSyntax.HasBlock7()) {
@@ -180,7 +210,7 @@ std::pair<TPosition, TAfterMatchSkipTo> TSqlMatchRecognizeClause::ParseAfterMatc
TokenPosition(skipToClause.GetAlt_row_pattern_skip_to3().GetToken1()),
TAfterMatchSkipTo{
EAfterMatchSkipTo::ToFirst,
- skipToClause.GetAlt_row_pattern_skip_to3().GetToken1().GetValue()
+ skipToClause.GetAlt_row_pattern_skip_to3().GetRule_row_pattern_skip_to_variable_name4().GetRule_row_pattern_variable_name1().GetRule_identifier1().GetToken1().GetValue()
}
};
case TRule_row_pattern_skip_to::kAltRowPatternSkipTo4:
@@ -188,7 +218,7 @@ std::pair<TPosition, TAfterMatchSkipTo> TSqlMatchRecognizeClause::ParseAfterMatc
TokenPosition(skipToClause.GetAlt_row_pattern_skip_to4().GetToken1()),
TAfterMatchSkipTo{
EAfterMatchSkipTo::ToLast,
- skipToClause.GetAlt_row_pattern_skip_to4().GetToken1().GetValue()
+ skipToClause.GetAlt_row_pattern_skip_to4().GetRule_row_pattern_skip_to_variable_name4().GetRule_row_pattern_variable_name1().GetRule_identifier1().GetToken1().GetValue()
}
};
case TRule_row_pattern_skip_to::kAltRowPatternSkipTo5:
@@ -196,7 +226,7 @@ std::pair<TPosition, TAfterMatchSkipTo> TSqlMatchRecognizeClause::ParseAfterMatc
TokenPosition(skipToClause.GetAlt_row_pattern_skip_to5().GetToken1()),
TAfterMatchSkipTo{
EAfterMatchSkipTo::To,
- skipToClause.GetAlt_row_pattern_skip_to5().GetToken1().GetValue()
+ skipToClause.GetAlt_row_pattern_skip_to5().GetRule_row_pattern_skip_to_variable_name3().GetRule_row_pattern_variable_name1().GetRule_identifier1().GetToken1().GetValue()
}
};
case TRule_row_pattern_skip_to::ALT_NOT_SET:
diff --git a/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp b/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp
index 10d92175d4..f54530f718 100644
--- a/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp
+++ b/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp
@@ -130,7 +130,123 @@ FROM Input MATCH_RECOGNIZE(
//TODO https://st.yandex-team.ru/YQL-16186
}
Y_UNIT_TEST(SkipAfterMatch) {
- //TODO https://st.yandex-team.ru/YQL-16186
+ {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ AFTER MATCH SKIP TO NEXT ROW
+ PATTERN (A)
+ DEFINE A as A
+)
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
+ UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_NextRow", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
+ }
+ {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN (A)
+ DEFINE A as A
+)
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
+ UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_PastLastRow", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
+ }
+ {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ AFTER MATCH SKIP TO FIRST Y
+ PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
+ DEFINE A as A
+)
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
+ UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_ToFirst", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
+ UNIT_ASSERT_VALUES_EQUAL("Y", skipTo->GetChild(1)->GetChild(1)->GetChild(1)->GetContent());
+ }
+ {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ AFTER MATCH SKIP TO FIRST T -- unknown pattern var
+ PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
+ DEFINE A as A
+)
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(not r.IsOk());
+ }
+ {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ AFTER MATCH SKIP TO LAST Y
+ PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
+ DEFINE A as A
+)
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
+ UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_ToLast", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
+ UNIT_ASSERT_VALUES_EQUAL("Y", skipTo->GetChild(1)->GetChild(1)->GetChild(1)->GetContent());
+ }
+ {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ AFTER MATCH SKIP TO LAST T -- unknown pattern var
+ PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
+ DEFINE A as A
+)
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(not r.IsOk());
+ }
+ {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ AFTER MATCH SKIP TO Y
+ PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
+ DEFINE A as A
+)
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
+ UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_To", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
+ UNIT_ASSERT_VALUES_EQUAL("Y", skipTo->GetChild(1)->GetChild(1)->GetChild(1)->GetContent());
+ }
+ {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ AFTER MATCH SKIP TO T -- unknown pattern var
+ PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
+ DEFINE A as A
+)
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(not r.IsOk());
+ }
}
Y_UNIT_TEST(row_pattern_initial_or_seek) {
//TODO https://st.yandex-team.ru/YQL-16186