aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorzverevgeny <zverevgeny@ydb.tech>2023-08-11 13:26:18 +0300
committerzverevgeny <zverevgeny@ydb.tech>2023-08-11 15:05:14 +0300
commit95d881de13521f9f5d4310978920044743695017 (patch)
tree5b9c7b81459adb4115dae675de0b69c6d4fc0296
parentf2dd52a9b24467877c3de53794427a4f05bd60fd (diff)
downloadydb-95d881de13521f9f5d4310978920044743695017.tar.gz
YQL-16186 enable paranthesis in row pattern
-rw-r--r--ydb/library/yql/sql/v1/match_recognize.cpp26
-rw-r--r--ydb/library/yql/sql/v1/match_recognize.h27
-rw-r--r--ydb/library/yql/sql/v1/sql_match_recognize.cpp36
-rw-r--r--ydb/library/yql/sql/v1/sql_match_recognize.h4
-rw-r--r--ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp84
5 files changed, 125 insertions, 52 deletions
diff --git a/ydb/library/yql/sql/v1/match_recognize.cpp b/ydb/library/yql/sql/v1/match_recognize.cpp
index cffc5045325..73df93e7964 100644
--- a/ydb/library/yql/sql/v1/match_recognize.cpp
+++ b/ydb/library/yql/sql/v1/match_recognize.cpp
@@ -14,7 +14,7 @@ public:
std::pair<TPosition, TVector<TNamedLambda>>&& measures,
std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch,
std::pair<TPosition, TAfterMatchSkipTo>&& skipTo,
- std::pair<TPosition, TVector<TRowPatternTerm>>&& pattern,
+ std::pair<TPosition, TRowPatternPtr>&& pattern,
std::pair<TPosition, TNodePtr>&& subset,
std::pair<TPosition, TVector<TNamedLambda>>&& definitions
): TCallNode(pos, "block", {BuildBlockStatements(
@@ -41,18 +41,15 @@ private:
std::pair<TPosition, TVector<TNamedLambda>>&& measures,
std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch,
std::pair<TPosition, TAfterMatchSkipTo>&& skipTo,
- std::pair<TPosition, TVector<TRowPatternTerm>>&& pattern,
+ std::pair<TPosition, TRowPatternPtr>&& pattern,
std::pair<TPosition, TNodePtr>&& subset,
std::pair<TPosition, TVector<TNamedLambda>>&& definitions
) {
Y_UNUSED(pos);
auto inputRowType = Y("ListItemType",Y("TypeOf", inputTable));
- TNodePtr patternNode = Y();
- for (const auto& t: pattern.second) {
- patternNode->Add(PatternTerm(pos, t));
- }
- patternNode = Q(patternNode);
+
+ auto patternNode = Pattern(pattern.first, pattern.second);
auto partitionColumns = Y();
for (const auto& p: partitioners.second){
@@ -113,7 +110,9 @@ private:
TPtr PatternFactor(const TPosition& pos, const TRowPatternFactor& factor) {
return BuildTuple(pos, {
- BuildQuotedAtom(pos, factor.Name),
+ factor.Primary.index() == 0 ?
+ BuildQuotedAtom(pos, std::get<0>(factor.Primary)) :
+ Pattern(pos, std::get<1>(factor.Primary)),
BuildQuotedAtom(pos, ToString(factor.QuantityMin)),
BuildQuotedAtom(pos, ToString(factor.QuantityMax)),
BuildQuotedAtom(pos, ToString(factor.Greedy)),
@@ -128,6 +127,15 @@ private:
factors->Add(PatternFactor(pos, f));
return Q(std::move(factors));
}
+
+ TPtr Pattern(const TPosition& pos, const TRowPatternPtr& pattern) {
+ TNodePtr patternNode = Y("MatchRecognizePattern");
+ for (const auto& t: pattern->Terms) {
+ patternNode->Add(PatternTerm(pos, t));
+ }
+ return patternNode;
+ }
+
TPtr DoClone() const final{
return new TMatchRecognize(*this);
}
@@ -153,4 +161,4 @@ TNodePtr TMatchRecognizeBuilder::Build(TContext& ctx, TString&& inputTable, ISou
return node;
}
-} // namespace NSQLTranslationV1 \ No newline at end of file
+} // namespace NSQLTranslationV1
diff --git a/ydb/library/yql/sql/v1/match_recognize.h b/ydb/library/yql/sql/v1/match_recognize.h
index 515916207f0..190849dab1f 100644
--- a/ydb/library/yql/sql/v1/match_recognize.h
+++ b/ydb/library/yql/sql/v1/match_recognize.h
@@ -36,16 +36,25 @@ struct TAfterMatchSkipTo {
TString Var;
};
+struct TRowPattern;
+
+using TRowPatternPtr = std::unique_ptr<TRowPattern>;
+
+using TRowPatternPrimary = std::variant<TString, TRowPatternPtr>;
+
struct TRowPatternFactor{
- TString Name;
- uint64_t QuantityMin; //uint64 literal
- uint64_t QuantityMax; //uint64 literal
- bool Greedy; //bool literal;
- bool Output; //bool literal, include in output with ALL ROW PER MATCH
+ TRowPatternPrimary Primary;
+ uint64_t QuantityMin;
+ uint64_t QuantityMax;
+ bool Greedy;
+ bool Output; //include in output with ALL ROW PER MATCH
};
-using TRowPatternTerm = TVector<TRowPatternFactor>;
-using TRowPattern = TVector<TRowPatternTerm>;
+using TRowPatternTerm = std::vector<TRowPatternFactor>;
+
+struct TRowPattern {
+ std::vector<TRowPatternTerm> Terms;
+};
class TMatchRecognizeBuilder: public TSimpleRefCount<TMatchRecognizeBuilder> {
public:
@@ -56,7 +65,7 @@ public:
std::pair<TPosition, TVector<TNamedLambda>>&& measures,
std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch,
std::pair<TPosition, TAfterMatchSkipTo>&& skipTo,
- std::pair<TPosition, TRowPattern>&& pattern,
+ std::pair<TPosition, TRowPatternPtr>&& pattern,
std::pair<TPosition, TNodePtr>&& subset,
std::pair<TPosition, TVector<TNamedLambda>>&& definitions
)
@@ -79,7 +88,7 @@ private:
std::pair<TPosition, TVector<TNamedLambda>> Measures;
std::pair<TPosition, ERowsPerMatch> RowsPerMatch;
std::pair<TPosition, TAfterMatchSkipTo> SkipTo;
- std::pair<TPosition, TRowPattern> Pattern;
+ std::pair<TPosition, TRowPatternPtr> Pattern;
std::pair<TPosition, TNodePtr> Subset;
std::pair<TPosition, TVector<TNamedLambda>> Definitions;
};
diff --git a/ydb/library/yql/sql/v1/sql_match_recognize.cpp b/ydb/library/yql/sql/v1/sql_match_recognize.cpp
index dc292935393..a0c805fb17d 100644
--- a/ydb/library/yql/sql/v1/sql_match_recognize.cpp
+++ b/ydb/library/yql/sql/v1/sql_match_recognize.cpp
@@ -75,7 +75,7 @@ TMatchRecognizeBuilderPtr TSqlMatchRecognizeClause::CreateBuilder(const NSQLv1Ge
return {};
}
- const auto& pattern = ParsePattern(commonSyntax.GetRule_row_pattern5());
+ auto pattern = ParsePattern(commonSyntax.GetRule_row_pattern5());
const auto& patternPos = TokenPosition(commonSyntax.token3());
TNodePtr subset;
@@ -207,24 +207,31 @@ TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTerm(const TRule_row_patte
TPosition pos;
for (const auto& factor: node.GetBlock1()) {
const auto& primaryVar = factor.GetRule_row_pattern_factor1().GetRule_row_pattern_primary1();
- TString varName;
+ TRowPatternPrimary primary;
bool output = true;
switch(primaryVar.GetAltCase()){
case TRule_row_pattern_primary::kAltRowPatternPrimary1:
- varName = PatternVar(primaryVar.GetAlt_row_pattern_primary1().GetRule_row_pattern_primary_variable_name1().GetRule_row_pattern_variable_name1(), *this);
+ primary = PatternVar(primaryVar.GetAlt_row_pattern_primary1().GetRule_row_pattern_primary_variable_name1().GetRule_row_pattern_variable_name1(), *this);
break;
case TRule_row_pattern_primary::kAltRowPatternPrimary2:
- varName = primaryVar.GetAlt_row_pattern_primary2().GetToken1().GetValue();
- Y_ENSURE("$" == varName);
+ primary = primaryVar.GetAlt_row_pattern_primary2().GetToken1().GetValue();
+ Y_ENSURE("$" == std::get<0>(primary));
break;
case TRule_row_pattern_primary::kAltRowPatternPrimary3:
- varName = primaryVar.GetAlt_row_pattern_primary3().GetToken1().GetValue();
- Y_ENSURE("^" == varName);
+ primary = primaryVar.GetAlt_row_pattern_primary3().GetToken1().GetValue();
+ Y_ENSURE("^" == std::get<0>(primary));
break;
- case TRule_row_pattern_primary::kAltRowPatternPrimary4:
- Ctx.Error(TokenPosition(primaryVar.GetAlt_row_pattern_primary4().GetToken1()))
- << "Grouping is not supported yet"; //https://st.yandex-team.ru/YQL-16226
+ case TRule_row_pattern_primary::kAltRowPatternPrimary4: {
+ constexpr size_t MaxNesting = 20; //Limit recursion
+ if (++PatternNestingLevel <= MaxNesting) {
+ primary = ParsePattern(primaryVar.GetAlt_row_pattern_primary4().GetBlock2().GetRule_row_pattern1());
+ } else {
+ Ctx.Error(TokenPosition(primaryVar.GetAlt_row_pattern_primary4().GetToken1()))
+ << "To big nesting level in the pattern";
+ return TRowPatternTerm{};
+ }
break;
+ }
case TRule_row_pattern_primary::kAltRowPatternPrimary5:
output = false;
Ctx.Error(TokenPosition(primaryVar.GetAlt_row_pattern_primary4().GetToken1()))
@@ -280,16 +287,17 @@ TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTerm(const TRule_row_patte
Y_FAIL("You should change implementation according to grammar changes");
}
}
- term.push_back(TRowPatternFactor{varName, quantityMin, quantityMax, greedy, output});
+ term.push_back(TRowPatternFactor{std::move(primary), quantityMin, quantityMax, greedy, output});
}
return term;
}
-TVector<TRowPatternTerm> TSqlMatchRecognizeClause::ParsePattern(const TRule_row_pattern& node){
- TVector<TRowPatternTerm> result{ ParsePatternTerm(node.GetRule_row_pattern_term1()) };
+TRowPatternPtr TSqlMatchRecognizeClause::ParsePattern(const TRule_row_pattern& node){
+ TVector<TRowPatternTerm> result;
+ result.emplace_back(ParsePatternTerm(node.GetRule_row_pattern_term1()));
for (const auto& term: node.GetBlock2())
result.push_back(ParsePatternTerm(term.GetRule_row_pattern_term2()));
- return result;
+ return std::make_unique<TRowPattern>(TRowPattern{std::move(result)});
}
TNamedLambda TSqlMatchRecognizeClause::ParseOneDefinition(const TRule_row_pattern_definition& node){
diff --git a/ydb/library/yql/sql/v1/sql_match_recognize.h b/ydb/library/yql/sql/v1/sql_match_recognize.h
index 9433c5531b4..65a49d2162d 100644
--- a/ydb/library/yql/sql/v1/sql_match_recognize.h
+++ b/ydb/library/yql/sql/v1/sql_match_recognize.h
@@ -20,9 +20,11 @@ private:
std::pair<TPosition, ERowsPerMatch> ParseRowsPerMatch(const TRule_row_pattern_rows_per_match& rowsPerMatchClause);
std::pair<TPosition, TAfterMatchSkipTo> ParseAfterMatchSkipTo(const TRule_row_pattern_skip_to& skipToClause);
TRowPatternTerm ParsePatternTerm(const TRule_row_pattern_term& node);
- TRowPattern ParsePattern(const TRule_row_pattern& node);
+ TRowPatternPtr ParsePattern(const TRule_row_pattern& node);
TNamedLambda ParseOneDefinition(const TRule_row_pattern_definition& node);
TVector<TNamedLambda> ParseDefinitions(const TRule_row_pattern_definition_list& node);
+private:
+ size_t PatternNestingLevel = 0;
};
} // namespace NSQLTranslationV1 \ No newline at end of file
diff --git a/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp b/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp
index 2909bd90ea1..2a1982183e2 100644
--- a/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp
+++ b/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp
@@ -135,7 +135,7 @@ FROM Input MATCH_RECOGNIZE(
//TODO https://st.yandex-team.ru/YQL-16186
}
Y_UNIT_TEST(PatternSimple) {
- auto stmt = R"(
+ const auto stmt = R"(
USE plato;
SELECT *
FROM Input MATCH_RECOGNIZE(
@@ -145,45 +145,91 @@ FROM Input MATCH_RECOGNIZE(
)";
const auto& r = MatchRecognizeSqlToYql(stmt);
UNIT_ASSERT(r.IsOk());
- auto pattern = FindMatchRecognizeParam(r.Root, "pattern");
- UNIT_ASSERT(IsQuotedListOfSize(pattern, 1));
- const auto& term = pattern->GetChild(1)->GetChild(0);
+ const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern");
+ UNIT_ASSERT_EQUAL(patternCallable->GetChild(0)->GetContent(), "MatchRecognizePattern");
+ UNIT_ASSERT_EQUAL(patternCallable->GetChildrenCount(), 1 + 1);
+ const auto& term = patternCallable->GetChild(1);
UNIT_ASSERT(IsQuotedListOfSize(term, 3));
}
- Y_UNIT_TEST(PatternMedium) {
- auto stmt = R"(
+ Y_UNIT_TEST(PatternMultiTerm) {
+ const auto stmt = R"(
USE plato;
SELECT *
FROM Input MATCH_RECOGNIZE(
- PATTERN ($ A+ B{1,3} | C{3} D{1,4} E? | F?? | G{3,}? H*? ^)
+ PATTERN ($ A+ B{1,3} | C{3} D{1,4} E? | F?? | G{3,}? H*? I J ^)
DEFINE A as A
)
)";
const auto& r = MatchRecognizeSqlToYql(stmt);
UNIT_ASSERT(r.IsOk());
- auto pattern = FindMatchRecognizeParam(r.Root, "pattern");
- UNIT_ASSERT(IsQuotedListOfSize(pattern, 4));
+ const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern");
+ UNIT_ASSERT_EQUAL(patternCallable->GetChild(0)->GetContent(), "MatchRecognizePattern");
+ UNIT_ASSERT_EQUAL(patternCallable->GetChildrenCount(), 1 + 4);
+ const auto& lastTerm = patternCallable->GetChild(4);
+ UNIT_ASSERT(IsQuotedListOfSize(lastTerm, 5));
}
- //TODO add tests for factors, quantifiers and greediness https://st.yandex-team.ru/YQL-16186
-
-
- Y_UNIT_TEST(PatternDieHard) {
- auto stmt = R"(
+ Y_UNIT_TEST(PatternWithParanthesis) {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ PATTERN (
+ A | ($ B)+ C D
+ )
+ DEFINE A as A
+ )
+)";
+ const auto& r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern");
+ UNIT_ASSERT_EQUAL(patternCallable->GetChild(0)->GetContent(), "MatchRecognizePattern");
+ UNIT_ASSERT_EQUAL(patternCallable->GetChildrenCount(), 1 + 2);
+ const auto& firstTerm = patternCallable->GetChild(1);
+ UNIT_ASSERT(IsQuotedListOfSize(firstTerm, 1));
+ const auto& lastTerm = patternCallable->GetChild(2);
+ UNIT_ASSERT(IsQuotedListOfSize(lastTerm, 3));
+ const auto& firstFactorOfLastTerm = lastTerm->GetChild(1)->GetChild(0);
+ UNIT_ASSERT(IsQuotedListOfSize(firstFactorOfLastTerm, 5));
+ const auto nestedPattern = firstFactorOfLastTerm->GetChild(1)->GetChild(0);
+ UNIT_ASSERT_EQUAL(nestedPattern->GetChildrenCount(), 1 + 1);
+ UNIT_ASSERT_EQUAL(nestedPattern->GetChild(0)->GetContent(), "MatchRecognizePattern");
+ UNIT_ASSERT(IsQuotedListOfSize(nestedPattern->GetChild(1), 2));
+ }
+
+ Y_UNIT_TEST(PatternLimietedNesting) {
+ const size_t MaxNesting = 20;
+ for (size_t extraNesting = 0; extraNesting <= 1; ++extraNesting) {
+ std::string pattern;
+ for (size_t i = 0; i != MaxNesting + extraNesting; ++i)
+ pattern.push_back('(');
+ pattern.push_back('A');
+ for (size_t i = 0; i != MaxNesting + extraNesting; ++i)
+ pattern.push_back(')');
+ const auto stmt = TString(R"(
USE plato;
SELECT *
FROM Input MATCH_RECOGNIZE(
- PATTERN (^ S1 S2*? ( {- S3 -} S4 )+ | PERMUTE(S1, S2){1,2} $)
+ PATTERN(
+)") + pattern + R"(
+ )
DEFINE A as A
)
)";
- Y_UNUSED(stmt);
- //TODO implement me
- //UNIT_ASSERT( MatchRecognizeSqlToYql(stmt).IsOk());
+ const auto &r = MatchRecognizeSqlToYql(stmt);
+ if (not extraNesting) {
+ UNIT_ASSERT(r.IsOk());
+ } else {
+ UNIT_ASSERT(not r.IsOk());
+ }
+ }
}
- Y_UNIT_TEST(row_pattern_subset_clause) {
+
+ //TODO add tests for factors, quantifiers and greediness https://st.yandex-team.ru/YQL-16186
+
+ Y_UNIT_TEST(row_pattern_subset_clause) {
//TODO https://st.yandex-team.ru/YQL-16186
}