diff options
author | zverevgeny <zverevgeny@ydb.tech> | 2023-08-24 18:36:23 +0300 |
---|---|---|
committer | zverevgeny <zverevgeny@ydb.tech> | 2023-08-24 18:52:18 +0300 |
commit | 322c6917156bdea5d883e6c42c0cb6f455cb68be (patch) | |
tree | 1f3c821e25472498813ee5ce4996a069610190c1 | |
parent | 3d40978be0626fb1242c81ca9242c45464375574 (diff) | |
download | ydb-322c6917156bdea5d883e6c42c0cb6f455cb68be.tar.gz |
YQL-16186 parse AFTER MATCH SKIP
-rw-r--r-- | ydb/library/yql/sql/v1/sql_match_recognize.cpp | 46 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp | 118 |
2 files changed, 155 insertions, 9 deletions
diff --git a/ydb/library/yql/sql/v1/sql_match_recognize.cpp b/ydb/library/yql/sql/v1/sql_match_recognize.cpp index 2aeee6d412..db48964329 100644 --- a/ydb/library/yql/sql/v1/sql_match_recognize.cpp +++ b/ydb/library/yql/sql/v1/sql_match_recognize.cpp @@ -17,6 +17,22 @@ TString PatternVar(const TRule_row_pattern_variable_name& node, TSqlMatchRecogni return Id(node.GetRule_identifier1(), ctx); } +std::unordered_set<TString> GetAllPatternVars(const TRowPatternPtr& pattern){ + std::unordered_set<TString> result; + for (const auto& t: pattern->Terms) { + for (const auto& f: t) { + if (f.Primary.index() == 0) { + result.insert(std::get<0>(f.Primary)); + } + else { + auto nested = GetAllPatternVars(std::get<1>(f.Primary)); + result.insert(nested.cbegin(), nested.cend()); + } + } + } + return result; +} + } //namespace TMatchRecognizeBuilderPtr TSqlMatchRecognizeClause::CreateBuilder(const NSQLv1Generated::TRule_row_pattern_recognition_clause &matchRecognizeClause) { @@ -65,11 +81,6 @@ TMatchRecognizeBuilderPtr TSqlMatchRecognizeClause::CreateBuilder(const NSQLv1Ge const auto& commonSyntax = matchRecognizeClause.GetRule_row_pattern_common_syntax7(); - std::pair<TPosition, TAfterMatchSkipTo> skipTo { pos, TAfterMatchSkipTo{EAfterMatchSkipTo::NextRow, TString()} }; - if (commonSyntax.HasBlock1()){ - skipTo = ParseAfterMatchSkipTo(commonSyntax.GetBlock1().GetRule_row_pattern_skip_to3()); - //TODO validate var with the name defined in the pattern https://st.yandex-team.ru/YQL-16186 - } if (commonSyntax.HasBlock2()) { const auto& initialOrSeek = commonSyntax.GetBlock2().GetRule_row_pattern_initial_or_seek1(); @@ -80,6 +91,25 @@ TMatchRecognizeBuilderPtr TSqlMatchRecognizeClause::CreateBuilder(const NSQLv1Ge auto pattern = ParsePattern(commonSyntax.GetRule_row_pattern5()); const auto& patternPos = TokenPosition(commonSyntax.token3()); + //this block is located before pattern block in grammar, + // but depends on it, so it is processed after pattern block + std::pair<TPosition, TAfterMatchSkipTo> skipTo { pos, TAfterMatchSkipTo{EAfterMatchSkipTo::NextRow, TString()} }; + if (commonSyntax.HasBlock1()){ + skipTo = ParseAfterMatchSkipTo(commonSyntax.GetBlock1().GetRule_row_pattern_skip_to3()); + const auto varRequired = + EAfterMatchSkipTo::ToFirst == skipTo.second.To || + EAfterMatchSkipTo::ToLast == skipTo.second.To || + EAfterMatchSkipTo::To == skipTo.second.To; + if (varRequired) { + const auto& allVars = GetAllPatternVars(pattern); + if (allVars.find(skipTo.second.Var) == allVars.cend()) { + Ctx.Error(skipTo.first) << "Unknown pattern variable in AFTER MATCH"; + return {}; + } + } + } + + TNodePtr subset; TPosition subsetPos = pos; if (commonSyntax.HasBlock7()) { @@ -180,7 +210,7 @@ std::pair<TPosition, TAfterMatchSkipTo> TSqlMatchRecognizeClause::ParseAfterMatc TokenPosition(skipToClause.GetAlt_row_pattern_skip_to3().GetToken1()), TAfterMatchSkipTo{ EAfterMatchSkipTo::ToFirst, - skipToClause.GetAlt_row_pattern_skip_to3().GetToken1().GetValue() + skipToClause.GetAlt_row_pattern_skip_to3().GetRule_row_pattern_skip_to_variable_name4().GetRule_row_pattern_variable_name1().GetRule_identifier1().GetToken1().GetValue() } }; case TRule_row_pattern_skip_to::kAltRowPatternSkipTo4: @@ -188,7 +218,7 @@ std::pair<TPosition, TAfterMatchSkipTo> TSqlMatchRecognizeClause::ParseAfterMatc TokenPosition(skipToClause.GetAlt_row_pattern_skip_to4().GetToken1()), TAfterMatchSkipTo{ EAfterMatchSkipTo::ToLast, - skipToClause.GetAlt_row_pattern_skip_to4().GetToken1().GetValue() + skipToClause.GetAlt_row_pattern_skip_to4().GetRule_row_pattern_skip_to_variable_name4().GetRule_row_pattern_variable_name1().GetRule_identifier1().GetToken1().GetValue() } }; case TRule_row_pattern_skip_to::kAltRowPatternSkipTo5: @@ -196,7 +226,7 @@ std::pair<TPosition, TAfterMatchSkipTo> TSqlMatchRecognizeClause::ParseAfterMatc TokenPosition(skipToClause.GetAlt_row_pattern_skip_to5().GetToken1()), TAfterMatchSkipTo{ EAfterMatchSkipTo::To, - skipToClause.GetAlt_row_pattern_skip_to5().GetToken1().GetValue() + skipToClause.GetAlt_row_pattern_skip_to5().GetRule_row_pattern_skip_to_variable_name3().GetRule_row_pattern_variable_name1().GetRule_identifier1().GetToken1().GetValue() } }; case TRule_row_pattern_skip_to::ALT_NOT_SET: diff --git a/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp b/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp index 10d92175d4..f54530f718 100644 --- a/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp +++ b/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp @@ -130,7 +130,123 @@ FROM Input MATCH_RECOGNIZE( //TODO https://st.yandex-team.ru/YQL-16186 } Y_UNIT_TEST(SkipAfterMatch) { - //TODO https://st.yandex-team.ru/YQL-16186 + { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE A as A +) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo"); + UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_NextRow", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent()); + } + { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A) + DEFINE A as A +) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo"); + UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_PastLastRow", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent()); + } + { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + AFTER MATCH SKIP TO FIRST Y + PATTERN (A | (U | (Q | Y)) | ($ B)+ C D) + DEFINE A as A +) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo"); + UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_ToFirst", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent()); + UNIT_ASSERT_VALUES_EQUAL("Y", skipTo->GetChild(1)->GetChild(1)->GetChild(1)->GetContent()); + } + { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + AFTER MATCH SKIP TO FIRST T -- unknown pattern var + PATTERN (A | (U | (Q | Y)) | ($ B)+ C D) + DEFINE A as A +) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(not r.IsOk()); + } + { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + AFTER MATCH SKIP TO LAST Y + PATTERN (A | (U | (Q | Y)) | ($ B)+ C D) + DEFINE A as A +) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo"); + UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_ToLast", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent()); + UNIT_ASSERT_VALUES_EQUAL("Y", skipTo->GetChild(1)->GetChild(1)->GetChild(1)->GetContent()); + } + { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + AFTER MATCH SKIP TO LAST T -- unknown pattern var + PATTERN (A | (U | (Q | Y)) | ($ B)+ C D) + DEFINE A as A +) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(not r.IsOk()); + } + { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + AFTER MATCH SKIP TO Y + PATTERN (A | (U | (Q | Y)) | ($ B)+ C D) + DEFINE A as A +) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo"); + UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_To", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent()); + UNIT_ASSERT_VALUES_EQUAL("Y", skipTo->GetChild(1)->GetChild(1)->GetChild(1)->GetContent()); + } + { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + AFTER MATCH SKIP TO T -- unknown pattern var + PATTERN (A | (U | (Q | Y)) | ($ B)+ C D) + DEFINE A as A +) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(not r.IsOk()); + } } Y_UNIT_TEST(row_pattern_initial_or_seek) { //TODO https://st.yandex-team.ru/YQL-16186 |