diff options
author | zverevgeny <zverevgeny@ydb.tech> | 2023-09-13 12:31:08 +0300 |
---|---|---|
committer | zverevgeny <zverevgeny@ydb.tech> | 2023-09-13 12:50:53 +0300 |
commit | 2315ad2aea7e5e74f89eae6eab3a437e97b4c562 (patch) | |
tree | c0c3a34ca31921a4419dc77059a47ad0d4f529af | |
parent | 2bbcf2a0266e5466629010c3a272279075356ad7 (diff) | |
download | ydb-2315ad2aea7e5e74f89eae6eab3a437e97b4c562.tar.gz |
YQL-16186 MATCH_RECOGNIZE parse simple navigation function
-rw-r--r-- | ydb/library/yql/sql/v1/builtin.cpp | 5 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/context.h | 15 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/match_recognize.cpp | 101 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/match_recognize.h | 100 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/node.cpp | 4 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/node.h | 1 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/sql_expression.cpp | 66 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/sql_expression.h | 2 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/sql_match_recognize.cpp | 28 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/sql_match_recognize.h | 10 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp | 58 |
11 files changed, 342 insertions, 48 deletions
diff --git a/ydb/library/yql/sql/v1/builtin.cpp b/ydb/library/yql/sql/v1/builtin.cpp index dc67b0a4e44..fc4945e4363 100644 --- a/ydb/library/yql/sql/v1/builtin.cpp +++ b/ydb/library/yql/sql/v1/builtin.cpp @@ -2,6 +2,7 @@ #include "context.h" #include "list_builtin.h" +#include "match_recognize.h" #include <ydb/library/yql/ast/yql_type_string.h> #include <ydb/library/yql/core/yql_expr_type_annotation.h> @@ -3089,6 +3090,10 @@ struct TBuiltinFuncData { // Hopping intervals time functions {"hopstart", BuildSimpleBuiltinFactoryCallback<THoppingTime<true>>()}, {"hopend", BuildSimpleBuiltinFactoryCallback<THoppingTime<false>>()}, + + //MatchRecognize navigation functions + {"first", BuildNamedBuiltinFactoryCallback<TMatchRecognizeNavigate>("FIRST")}, + {"last", BuildNamedBuiltinFactoryCallback<TMatchRecognizeNavigate>("LAST")}, }; return builtinFuncs; } diff --git a/ydb/library/yql/sql/v1/context.h b/ydb/library/yql/sql/v1/context.h index 8896c1cb1ac..8ec2a4df6a3 100644 --- a/ydb/library/yql/sql/v1/context.h +++ b/ydb/library/yql/sql/v1/context.h @@ -78,6 +78,7 @@ namespace NSQLTranslationV1 { Allow, AsStringLiteral, AsPgType, + MatchRecognize, }; class TContext { @@ -195,6 +196,12 @@ namespace NSQLTranslationV1 { return TopLevelColumnReferenceState; } + TStringBuf GetMatchRecognizeDefineVar() const { + YQL_ENSURE(EColumnRefState::MatchRecognize == ColumnReferenceState, + "DefineVar can only be accessed within processing of MATCH_RECOGNIZE lambdas"); + return MatchRecognizeDefineVar; + } + TVector<NSQLTranslation::TSQLHint> PullHintForToken(NYql::TPosition tokenPos); void WarnUnusedHints(); @@ -215,6 +222,7 @@ namespace NSQLTranslationV1 { EColumnRefState ColumnReferenceState = EColumnRefState::Deny; EColumnRefState TopLevelColumnReferenceState = EColumnRefState::Deny; + TString MatchRecognizeDefineVar; TString NoColumnErrorContext = "in current scope"; TVector<TBlocks*> CurrentBlocks; @@ -305,10 +313,11 @@ namespace NSQLTranslationV1 { class TColumnRefScope { public: - TColumnRefScope(TContext& ctx, EColumnRefState state, bool isTopLevelExpr = true) + TColumnRefScope(TContext& ctx, EColumnRefState state, bool isTopLevelExpr = true, const TString& defineVar = "") : PrevTop(ctx.TopLevelColumnReferenceState) , Prev(ctx.ColumnReferenceState) , PrevErr(ctx.NoColumnErrorContext) + , PrevDefineVar(ctx.MatchRecognizeDefineVar) , Ctx(ctx) { if (isTopLevelExpr) { @@ -316,6 +325,8 @@ namespace NSQLTranslationV1 { } else { Ctx.ColumnReferenceState = state; } + YQL_ENSURE(defineVar.empty() || EColumnRefState::MatchRecognize == state, "Internal logic error"); + ctx.MatchRecognizeDefineVar = defineVar; } void SetNoColumnErrContext(const TString& msg) { @@ -326,11 +337,13 @@ namespace NSQLTranslationV1 { Ctx.TopLevelColumnReferenceState = PrevTop; Ctx.ColumnReferenceState = Prev; std::swap(Ctx.NoColumnErrorContext, PrevErr); + std::swap(Ctx.MatchRecognizeDefineVar, PrevDefineVar); } private: const EColumnRefState PrevTop; const EColumnRefState Prev; TString PrevErr; + TString PrevDefineVar; TContext& Ctx; }; diff --git a/ydb/library/yql/sql/v1/match_recognize.cpp b/ydb/library/yql/sql/v1/match_recognize.cpp index 5906a709ad7..4012bd36f94 100644 --- a/ydb/library/yql/sql/v1/match_recognize.cpp +++ b/ydb/library/yql/sql/v1/match_recognize.cpp @@ -1,22 +1,31 @@ #include "match_recognize.h" #include "source.h" +#include "context.h" namespace NSQLTranslationV1 { +namespace { + +const auto VarDataName = "data"; +const auto VarMatchedVarsName = "vars"; +const auto VarLastRowIndexName = "lri"; + +} //namespace { + class TMatchRecognize: public TAstListNode { public: TMatchRecognize( TPosition pos, ISource* source, const TString& inputTable, - std::pair<TPosition, TVector<TPartitioner>>&& partitioners, + std::pair<TPosition, TVector<TNamedFunction>>&& partitioners, std::pair<TPosition, TVector<TSortSpecificationPtr>>&& sortSpecs, - std::pair<TPosition, TVector<TNamedLambda>>&& measures, + std::pair<TPosition, TVector<TNamedFunction>>&& measures, std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch, std::pair<TPosition, TAfterMatchSkipTo>&& skipTo, std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern, std::pair<TPosition, TNodePtr>&& subset, - std::pair<TPosition, TVector<TNamedLambda>>&& definitions + std::pair<TPosition, TVector<TNamedFunction>>&& definitions ): TAstListNode(pos, {BuildAtom(pos, "block")}) { Add(BuildBlockStatements( @@ -38,14 +47,14 @@ private: TPosition pos, ISource* source, const TString& inputTable, - std::pair<TPosition, TVector<TPartitioner>>&& partitioners, + std::pair<TPosition, TVector<TNamedFunction>>&& partitioners, std::pair<TPosition, TVector<TSortSpecificationPtr>>&& sortSpecs, - std::pair<TPosition, TVector<TNamedLambda>>&& measures, + std::pair<TPosition, TVector<TNamedFunction>>&& measures, std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch, std::pair<TPosition, TAfterMatchSkipTo>&& skipTo, std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern, std::pair<TPosition, TNodePtr>&& subset, - std::pair<TPosition, TVector<TNamedLambda>>&& definitions + std::pair<TPosition, TVector<TNamedFunction>>&& definitions ) { Y_UNUSED(pos); @@ -66,20 +75,20 @@ private: auto measureNames = Y(); for (const auto& m: measures.second){ - measureNames->Add(BuildQuotedAtom(m.lambda->GetPos(), m.name)); + measureNames->Add(BuildQuotedAtom(m.callable->GetPos(), m.name)); } TNodePtr measuresNode = Y("MatchRecognizeMeasures", inputRowType, patternNode, Q(measureNames)); for (const auto& m: measures.second){ - measuresNode->Add(m.lambda); + measuresNode->Add(BuildLambda(m.callable->GetPos(), Y(VarDataName, VarMatchedVarsName), m.callable)); } auto defineNames = Y(); for (const auto& d: definitions.second) { - defineNames->Add(BuildQuotedAtom(d.lambda->GetPos(), d.name)); + defineNames->Add(BuildQuotedAtom(d.callable->GetPos(), d.name)); } TNodePtr defineNode = Y("MatchRecognizeDefines", inputRowType, patternNode, Q(defineNames)); for (const auto& d: definitions.second) { - defineNode->Add(d.lambda); + defineNode->Add(BuildLambda(d.callable->GetPos(), Y(VarDataName, VarMatchedVarsName, VarLastRowIndexName), d.callable)); } return Q(Y( @@ -143,7 +152,6 @@ private: } }; - TNodePtr TMatchRecognizeBuilder::Build(TContext& ctx, TString&& inputTable, ISource* source){ TNodePtr node = new TMatchRecognize( Pos, @@ -163,4 +171,75 @@ TNodePtr TMatchRecognizeBuilder::Build(TContext& ctx, TString&& inputTable, ISou return node; } +namespace { +const auto DefaultNavigatingFunction = "MatchRecognizeDefaultNavigating"; +} + +bool TMatchRecognizeVarAccessNode::DoInit(TContext& ctx, ISource* src) { + //If referenced var is the var that is currently being defined + //then it's a reference to the last row in a partition + Node = new TMatchRecognizeNavigate(ctx.Pos(), DefaultNavigatingFunction, TVector<TNodePtr>{this}); + return Node->Init(ctx, src); +} + +bool TMatchRecognizeNavigate::DoInit(TContext& ctx, ISource* src) { + Y_UNUSED(src); + if (Args.size() != 1) { + ctx.Error(Pos) << "Exactly one argument is required in MATCH_RECOGNIZE navigation function"; + return false; + } + const auto varColumn = dynamic_cast<TMatchRecognizeVarAccessNode *>(Args[0].Get()); + if (not varColumn) { + ctx.Error(Pos) << "Row pattern navigation operations are applicable to row pattern variable only"; + return false; + } + const auto varData = BuildAtom(ctx.Pos(), VarDataName); + const auto varMatchedVars = BuildAtom(ctx.Pos(), VarMatchedVarsName); + const auto varLastRowIndex = BuildAtom(ctx.Pos(), VarLastRowIndexName); + + const auto matchedRanges = Y("Member", varMatchedVars, Q(varColumn->GetVar())); + TNodePtr navigatedRowIndex; + if (DefaultNavigatingFunction == Name) { + if (not varColumn->IsTheSameVar()) { + ctx.Error(Pos) << "Row pattern navigation function is required"; + } + navigatedRowIndex = varLastRowIndex; + } + else if ("PREV" == Name) { + if (not varColumn->IsTheSameVar()) { + ctx.Error(Pos) << "PREV relative to matched vars is not implemented yet"; + return false; + } + navigatedRowIndex = Y( + "-", + varLastRowIndex, + Y("Uint64", Q("1")) + ); + } else if ("FIRST" == Name) { + navigatedRowIndex = Y( + "Member", + Y("Head", matchedRanges), + Q("From") + ); + } else if ("LAST" == Name) { + navigatedRowIndex = Y( + "Member", + Y("Last", matchedRanges), + Q("To") + ); + } else { + ctx.Error(Pos) << "Internal logic error"; + } + Add("Member"); + Add( + Y( + "Lookup", + Y("ToIndexDict", varData), + navigatedRowIndex + ) + ), + Add(Q(varColumn->GetColumn())); + return true; +} + } // namespace NSQLTranslationV1 diff --git a/ydb/library/yql/sql/v1/match_recognize.h b/ydb/library/yql/sql/v1/match_recognize.h index 101ac432c49..22d4292f205 100644 --- a/ydb/library/yql/sql/v1/match_recognize.h +++ b/ydb/library/yql/sql/v1/match_recognize.h @@ -5,13 +5,8 @@ namespace NSQLTranslationV1 { -struct TPartitioner { - TNodePtr callable; // a callable with one free variable: row - TString name; -}; - -struct TNamedLambda { - TNodePtr lambda; +struct TNamedFunction { + TNodePtr callable; //Callable with some free args TString name; }; @@ -41,14 +36,14 @@ class TMatchRecognizeBuilder: public TSimpleRefCount<TMatchRecognizeBuilder> { public: TMatchRecognizeBuilder( TPosition clausePos, - std::pair<TPosition, TVector<TPartitioner>>&& partitioners, + std::pair<TPosition, TVector<TNamedFunction>>&& partitioners, std::pair<TPosition, TVector<TSortSpecificationPtr>>&& sortSpecs, - std::pair<TPosition, TVector<TNamedLambda>>&& measures, + std::pair<TPosition, TVector<TNamedFunction>>&& measures, std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch, std::pair<TPosition, TAfterMatchSkipTo>&& skipTo, std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern, std::pair<TPosition, TNodePtr>&& subset, - std::pair<TPosition, TVector<TNamedLambda>>&& definitions + std::pair<TPosition, TVector<TNamedFunction>>&& definitions ) : Pos(clausePos) , Partitioners(std::move(partitioners)) @@ -64,16 +59,95 @@ public: TNodePtr Build(TContext& ctx, TString&& inputTable, ISource* source); private: TPosition Pos; - std::pair<TPosition, TVector<TPartitioner>> Partitioners; + std::pair<TPosition, TVector<TNamedFunction>> Partitioners; std::pair<TPosition, TVector<TSortSpecificationPtr>> SortSpecs; - std::pair<TPosition, TVector<TNamedLambda>> Measures; + std::pair<TPosition, TVector<TNamedFunction>> Measures; std::pair<TPosition, ERowsPerMatch> RowsPerMatch; std::pair<TPosition, TAfterMatchSkipTo> SkipTo; std::pair<TPosition, NYql::NMatchRecognize::TRowPattern> Pattern; std::pair<TPosition, TNodePtr> Subset; - std::pair<TPosition, TVector<TNamedLambda>> Definitions; + std::pair<TPosition, TVector<TNamedFunction>> Definitions; }; using TMatchRecognizeBuilderPtr=TIntrusivePtr<TMatchRecognizeBuilder> ; +class TMatchRecognizeVarAccessNode: public INode { +public: + TMatchRecognizeVarAccessNode(TPosition pos, const TString& var, const TString& column, bool theSameVar) + : INode(pos) + , Var(var) + , TheSameVar(theSameVar) + , Column(column) + , WithinNavigationFunction(false) + { + } + + TString GetVar() const { + return Var; + } + + bool IsTheSameVar() const { + return TheSameVar; + } + + TString GetColumn() const { + return Column; + } + + void SetWithinNavigationFunction() { + WithinNavigationFunction = true; + } + + bool DoInit(TContext& ctx, ISource* src) override; + + TAstNode* Translate(TContext& ctx) const override { + return Node->Translate(ctx); + } + + TPtr DoClone() const override { + YQL_ENSURE(!Node, "TMatchRecognizeVarAccessNode::Clone: Node must not be initialized"); + auto copy = new TMatchRecognizeVarAccessNode(Pos, Var, Column, TheSameVar); + return copy; + } + +protected: + void DoUpdateState() const override { + YQL_ENSURE(Node); + } + + void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const final { + Y_VERIFY_DEBUG(Node); + Node->VisitTree(func, visited); + } + +private: + TNodePtr Node; + const TString Var; + const bool TheSameVar; //reference the same var as being defined by this expression; + const TString Column; + bool WithinNavigationFunction; +}; + +class TMatchRecognizeNavigate: public TAstListNode { +public: + TMatchRecognizeNavigate(TPosition pos, const TString& name, const TVector<TNodePtr>& args) + : TAstListNode(pos) + , Name(name) + , Args(args) + { + } + +private: + TNodePtr DoClone() const override { + return new TMatchRecognizeNavigate(GetPos(), Name, Args); + } + + bool DoInit(TContext& ctx, ISource* src) override; + +private: + const TString Name; + const TVector<TNodePtr> Args; +}; + } // namespace NSQLTranslationV1 + diff --git a/ydb/library/yql/sql/v1/node.cpp b/ydb/library/yql/sql/v1/node.cpp index 2af3fa3bad6..6d307219728 100644 --- a/ydb/library/yql/sql/v1/node.cpp +++ b/ydb/library/yql/sql/v1/node.cpp @@ -2184,6 +2184,10 @@ TNodePtr BuildAccess(TPosition pos, const TVector<INode::TIdPart>& ids, bool isL return new TAccessNode(pos, ids, isLookup); } +TNodePtr BuildMatchRecognizeVarAccess(TPosition pos, const TString& var, const TString& column, bool theSameVar) { + return new TMatchRecognizeVarAccessNode(pos, var, column, theSameVar); +} + void WarnIfAliasFromSelectIsUsedInGroupBy(TContext& ctx, const TVector<TNodePtr>& selectTerms, const TVector<TNodePtr>& groupByTerms, const TVector<TNodePtr>& groupByExprTerms) { diff --git a/ydb/library/yql/sql/v1/node.h b/ydb/library/yql/sql/v1/node.h index f7e60ca9f7e..428a7f9de60 100644 --- a/ydb/library/yql/sql/v1/node.h +++ b/ydb/library/yql/sql/v1/node.h @@ -1157,6 +1157,7 @@ namespace NSQLTranslationV1 { TNodePtr BuildColumn(TPosition pos, const TDeferredAtom& column, const TString& source = TString()); TNodePtr BuildColumnOrType(TPosition pos, const TString& column = TString()); TNodePtr BuildAccess(TPosition pos, const TVector<INode::TIdPart>& ids, bool isLookup); + TNodePtr BuildMatchRecognizeVarAccess(TPosition pos, const TString& var, const TString& column, bool theSameVar); TNodePtr BuildBind(TPosition pos, const TString& module, const TString& alias); TNodePtr BuildLambda(TPosition pos, TNodePtr params, TNodePtr body, const TString& resName = TString()); TNodePtr BuildLambda(TPosition pos, TNodePtr params, const TVector<TNodePtr>& bodies); diff --git a/ydb/library/yql/sql/v1/sql_expression.cpp b/ydb/library/yql/sql/v1/sql_expression.cpp index d8800ab4854..d0ea13d67bf 100644 --- a/ydb/library/yql/sql/v1/sql_expression.cpp +++ b/ydb/library/yql/sql/v1/sql_expression.cpp @@ -815,6 +815,61 @@ TNodePtr TSqlExpression::JsonApiExpr(const TRule_json_api_expr& node) { return result; } +TNodePtr MatchRecognizeVarAccess(TTranslation& ctx, const TString& var, const TRule_an_id_or_type& suffix, bool theSameVar) { + switch (suffix.GetAltCase()) { + case TRule_an_id_or_type::kAltAnIdOrType1: + break; + case TRule_an_id_or_type::kAltAnIdOrType2: + break; + case TRule_an_id_or_type::ALT_NOT_SET: + break; + } + const auto& column = Id( + suffix.GetAlt_an_id_or_type1() + .GetRule_id_or_type1().GetAlt_id_or_type1().GetRule_id1(), + ctx + ); + return BuildMatchRecognizeVarAccess(TPosition{}, var, column, theSameVar); +} + +TNodePtr TSqlExpression::RowPatternVarAccess(const TString& alias, const TRule_unary_subexpr_suffix_TBlock1_TAlt3_TBlock1_TBlock2 block) { + switch (block.GetAltCase()) { + case TRule_unary_subexpr_suffix_TBlock1_TAlt3_TBlock1_TBlock2::kAlt1: + break; + case TRule_unary_subexpr_suffix_TBlock1_TAlt3_TBlock1_TBlock2::kAlt2: + break; + case TRule_unary_subexpr_suffix_TBlock1_TAlt3_TBlock1_TBlock2::kAlt3: + switch (block.GetAlt3().GetRule_an_id_or_type1().GetAltCase()) { + case TRule_an_id_or_type::kAltAnIdOrType1: { + const auto &idOrType = block.GetAlt3().GetRule_an_id_or_type1().GetAlt_an_id_or_type1().GetRule_id_or_type1(); + switch(idOrType.GetAltCase()) { + case TRule_id_or_type::kAltIdOrType1: + return BuildMatchRecognizeVarAccess( + Ctx.Pos(), + alias, + Id(idOrType.GetAlt_id_or_type1().GetRule_id1(), *this), + Ctx.GetMatchRecognizeDefineVar() == alias + ); + case TRule_id_or_type::kAltIdOrType2: + break; + + case TRule_id_or_type::ALT_NOT_SET: + break; + } + } + case TRule_an_id_or_type::kAltAnIdOrType2: + break; + case TRule_an_id_or_type::ALT_NOT_SET: + break; + } + return MatchRecognizeVarAccess(*this, alias, block.GetAlt3().GetRule_an_id_or_type1(), + Ctx.GetMatchRecognizeDefineVar() == alias); + case TRule_unary_subexpr_suffix_TBlock1_TAlt3_TBlock1_TBlock2::ALT_NOT_SET: + Y_FAIL("You should change implementation according to grammar changes"); + } + return TNodePtr{}; +} + template<typename TUnaryCasualExprRule> TNodePtr TSqlExpression::UnaryCasualExpr(const TUnaryCasualExprRule& node, const TTrailingQuestions& tail) { // unary_casual_subexpr: (id_expr | atom_expr) unary_subexpr_suffix; @@ -889,7 +944,16 @@ TNodePtr TSqlExpression::UnaryCasualExpr(const TUnaryCasualExprRule& node, const break; } case TRule_unary_subexpr_suffix::TBlock1::kAlt3: { - // dot + // In case of MATCH_RECOGNIZE lambdas + // X.Y is treated as Var.Column access + if (isColumnRef && EColumnRefState::MatchRecognize == Ctx.GetColumnReferenceState()) { + if (auto rowPatternVarAccess = RowPatternVarAccess( + name, + b.GetAlt3().GetBlock1().GetBlock2()) + ) { + return rowPatternVarAccess; + } + } break; } case TRule_unary_subexpr_suffix::TBlock1::ALT_NOT_SET: diff --git a/ydb/library/yql/sql/v1/sql_expression.h b/ydb/library/yql/sql/v1/sql_expression.h index a64225cfa4d..baf069c8ed1 100644 --- a/ydb/library/yql/sql/v1/sql_expression.h +++ b/ydb/library/yql/sql/v1/sql_expression.h @@ -108,6 +108,8 @@ private: TNodePtr BinOperList(const TString& opName, TVector<TNodePtr>::const_iterator begin, TVector<TNodePtr>::const_iterator end) const; + TNodePtr RowPatternVarAccess(const TString& alias, const TRule_unary_subexpr_suffix_TBlock1_TAlt3_TBlock1_TBlock2 block); + struct TCaseBranch { TNodePtr Pred; TNodePtr Value; diff --git a/ydb/library/yql/sql/v1/sql_match_recognize.cpp b/ydb/library/yql/sql/v1/sql_match_recognize.cpp index a0413a6eb95..357c251c5d9 100644 --- a/ydb/library/yql/sql/v1/sql_match_recognize.cpp +++ b/ydb/library/yql/sql/v1/sql_match_recognize.cpp @@ -25,7 +25,7 @@ TMatchRecognizeBuilderPtr TSqlMatchRecognizeClause::CreateBuilder(const NSQLv1Ge Ctx.Error(pos, TIssuesIds::CORE) << "Unexpected MATCH_RECOGNIZE"; return {}; } - TVector<TPartitioner> partitioners; + TVector<TNamedFunction> partitioners; TPosition partitionsPos = pos; if (matchRecognizeClause.HasBlock3()) { const auto& partitionClause = matchRecognizeClause.GetBlock3().GetRule_window_partition_clause1(); @@ -45,7 +45,7 @@ TMatchRecognizeBuilderPtr TSqlMatchRecognizeClause::CreateBuilder(const NSQLv1Ge } TPosition measuresPos = pos; - TVector<TNamedLambda> measures; + TVector<TNamedFunction> measures; if (matchRecognizeClause.HasBlock5()) { const auto& measuresClause = matchRecognizeClause.GetBlock5().GetRule_row_pattern_measures1(); measuresPos = TokenPosition(measuresClause.GetToken1()); @@ -121,7 +121,7 @@ TMatchRecognizeBuilderPtr TSqlMatchRecognizeClause::CreateBuilder(const NSQLv1Ge } -TVector<TPartitioner> TSqlMatchRecognizeClause::ParsePartitionBy(const TRule_window_partition_clause& partitionClause) { +TVector<TNamedFunction> TSqlMatchRecognizeClause::ParsePartitionBy(const TRule_window_partition_clause& partitionClause) { TColumnRefScope scope(Ctx, EColumnRefState::Allow); TVector<TNodePtr> partitionExprs; if (!NamedExprList( @@ -129,19 +129,19 @@ TVector<TPartitioner> TSqlMatchRecognizeClause::ParsePartitionBy(const TRule_win partitionExprs)) { return {}; } - TVector<TPartitioner> partitioners; + TVector<TNamedFunction> partitioners; for (const auto& p: partitionExprs) { auto label = p->GetLabel(); if (!label && p->GetColumnName()) { label = *p->GetColumnName(); } - partitioners.push_back(TPartitioner{p, label}); + partitioners.push_back(TNamedFunction{p, label}); } return partitioners; } -TNamedLambda TSqlMatchRecognizeClause::ParseOneMeasure(const TRule_row_pattern_measure_definition& node) { - TColumnRefScope scope(Ctx, EColumnRefState::Allow); +TNamedFunction TSqlMatchRecognizeClause::ParseOneMeasure(const TRule_row_pattern_measure_definition& node) { + TColumnRefScope scope(Ctx, EColumnRefState::MatchRecognize); const auto& expr = TSqlExpression(Ctx, Mode).Build(node.GetRule_expr1()); const auto& name = Id(node.GetRule_an_id3(), *this); //TODO https://st.yandex-team.ru/YQL-16186 @@ -151,8 +151,8 @@ TNamedLambda TSqlMatchRecognizeClause::ParseOneMeasure(const TRule_row_pattern_m return {expr, name}; } -TVector<TNamedLambda> TSqlMatchRecognizeClause::ParseMeasures(const TRule_row_pattern_measure_list& node) { - TVector<TNamedLambda> result{ ParseOneMeasure(node.GetRule_row_pattern_measure_definition1()) }; +TVector<TNamedFunction> TSqlMatchRecognizeClause::ParseMeasures(const TRule_row_pattern_measure_list& node) { + TVector<TNamedFunction> result{ ParseOneMeasure(node.GetRule_row_pattern_measure_definition1()) }; for (const auto& m: node.GetBlock2()) { result.push_back(ParseOneMeasure(m.GetRule_row_pattern_measure_definition2())); } @@ -315,15 +315,15 @@ NYql::NMatchRecognize::TRowPattern TSqlMatchRecognizeClause::ParsePattern(const return result; } -TNamedLambda TSqlMatchRecognizeClause::ParseOneDefinition(const TRule_row_pattern_definition& node){ +TNamedFunction TSqlMatchRecognizeClause::ParseOneDefinition(const TRule_row_pattern_definition& node){ const auto& varName = PatternVar(node.GetRule_row_pattern_definition_variable_name1().GetRule_row_pattern_variable_name1(), *this); - TColumnRefScope scope(Ctx, EColumnRefState::Allow); + TColumnRefScope scope(Ctx, EColumnRefState::MatchRecognize, true, varName); const auto& searchCondition = TSqlExpression(Ctx, Mode).Build(node.GetRule_row_pattern_definition_search_condition3().GetRule_search_condition1().GetRule_expr1()); - return TNamedLambda{searchCondition, varName}; + return TNamedFunction{searchCondition, varName}; } -TVector<TNamedLambda> TSqlMatchRecognizeClause::ParseDefinitions(const TRule_row_pattern_definition_list& node) { - TVector<TNamedLambda> result { ParseOneDefinition(node.GetRule_row_pattern_definition1())}; +TVector<TNamedFunction> TSqlMatchRecognizeClause::ParseDefinitions(const TRule_row_pattern_definition_list& node) { + TVector<TNamedFunction> result { ParseOneDefinition(node.GetRule_row_pattern_definition1())}; for (const auto& d: node.GetBlock2()) { //TODO https://st.yandex-team.ru/YQL-16186 //Each define must be a predicate lambda, that accepts 3 args: diff --git a/ydb/library/yql/sql/v1/sql_match_recognize.h b/ydb/library/yql/sql/v1/sql_match_recognize.h index cd32223675b..d8d618920aa 100644 --- a/ydb/library/yql/sql/v1/sql_match_recognize.h +++ b/ydb/library/yql/sql/v1/sql_match_recognize.h @@ -14,15 +14,15 @@ public: {} TMatchRecognizeBuilderPtr CreateBuilder(const TRule_row_pattern_recognition_clause& node); private: - TVector<TPartitioner> ParsePartitionBy(const TRule_window_partition_clause& partitionClause); - TNamedLambda ParseOneMeasure(const TRule_row_pattern_measure_definition& node); - TVector<TNamedLambda> ParseMeasures(const TRule_row_pattern_measure_list& node); + TVector<TNamedFunction> ParsePartitionBy(const TRule_window_partition_clause& partitionClause); + TNamedFunction ParseOneMeasure(const TRule_row_pattern_measure_definition& node); + TVector<TNamedFunction> ParseMeasures(const TRule_row_pattern_measure_list& node); std::pair<TPosition, ERowsPerMatch> ParseRowsPerMatch(const TRule_row_pattern_rows_per_match& rowsPerMatchClause); std::pair<TPosition, TAfterMatchSkipTo> ParseAfterMatchSkipTo(const TRule_row_pattern_skip_to& skipToClause); NYql::NMatchRecognize::TRowPatternTerm ParsePatternTerm(const TRule_row_pattern_term& node); NYql::NMatchRecognize::TRowPattern ParsePattern(const TRule_row_pattern& node); - TNamedLambda ParseOneDefinition(const TRule_row_pattern_definition& node); - TVector<TNamedLambda> ParseDefinitions(const TRule_row_pattern_definition_list& node); + TNamedFunction ParseOneDefinition(const TRule_row_pattern_definition& node); + TVector<TNamedFunction> ParseDefinitions(const TRule_row_pattern_definition_list& node); private: size_t PatternNestingLevel = 0; }; diff --git a/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp b/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp index 2606380b05e..0c7f93d190b 100644 --- a/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp +++ b/ydb/library/yql/sql/v1/sql_match_recognize_ut.cpp @@ -25,7 +25,6 @@ const NYql::TAstNode* FindMatchRecognizeParam(const NYql::TAstNode* root, TStrin return paramNode->GetChild(2); } - bool IsQuotedListOfSize(const NYql::TAstNode* node, ui32 size) { UNIT_ASSERT(node->IsListOfSize(2)); if (!node->IsListOfSize(2)) @@ -37,6 +36,16 @@ bool IsQuotedListOfSize(const NYql::TAstNode* node, ui32 size) { return node->GetChild(1)->IsListOfSize(size); } +bool IsLambda(const NYql::TAstNode* node, ui32 numberOfArgs) { + if (!node->IsListOfSize(3)) { + return false; + } + if (!node->GetChild(0)->IsAtom() || node->GetChild(0)->GetContent() != "lambda") { + return false; + } + return IsQuotedListOfSize(node->GetChild(1), numberOfArgs); +} + Y_UNIT_TEST_SUITE(MatchRecognize) { auto minValidMatchRecognizeSql = R"( USE plato; @@ -125,7 +134,27 @@ FROM Input MATCH_RECOGNIZE( UNIT_ASSERT(IsQuotedListOfSize(sortTraits->GetChild(3)->GetChild(2), 3)); } Y_UNIT_TEST(Measures) { - //TODO https://st.yandex-team.ru/YQL-16186 + auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + MEASURES + Last(Q.dt) as T, + First(Y.key) as Key + PATTERN ( A ) + DEFINE Y as true +) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + const auto measures = FindMatchRecognizeParam(r.Root, "measures"); + UNIT_ASSERT_VALUES_EQUAL(6, measures->GetChildrenCount()); + const auto columnNames = measures->GetChild(3); + UNIT_ASSERT(IsQuotedListOfSize(columnNames, 2)); + UNIT_ASSERT_VALUES_EQUAL("T", columnNames->GetChild(1)->GetChild(0)->GetChild(1)->GetContent()); + UNIT_ASSERT_VALUES_EQUAL("Key", columnNames->GetChild(1)->GetChild(1)->GetChild(1)->GetContent()); + UNIT_ASSERT(IsLambda(measures->GetChild(4), 2)); + UNIT_ASSERT(IsLambda(measures->GetChild(5), 2)); } Y_UNIT_TEST(RowsPerMatch) { { @@ -594,6 +623,29 @@ FROM Input MATCH_RECOGNIZE( } Y_UNIT_TEST(Defines) { - //TODO https://st.yandex-team.ru/YQL-16186 + auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + PATTERN ( Y Q L ) + DEFINE + Y as true, + Q as Q.V = "value", + L as L.V = LAST(Q.T) +) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + const auto defines = FindMatchRecognizeParam(r.Root, "define"); + UNIT_ASSERT_VALUES_EQUAL(7, defines->GetChildrenCount()); + const auto varNames = defines->GetChild(3); + UNIT_ASSERT(IsQuotedListOfSize(varNames, 3)); + UNIT_ASSERT_VALUES_EQUAL("Y", varNames->GetChild(1)->GetChild(0)->GetChild(1)->GetContent()); + UNIT_ASSERT_VALUES_EQUAL("Q", varNames->GetChild(1)->GetChild(1)->GetChild(1)->GetContent()); + UNIT_ASSERT_VALUES_EQUAL("L", varNames->GetChild(1)->GetChild(2)->GetChild(1)->GetContent()); + + UNIT_ASSERT(IsLambda(defines->GetChild(4), 3)); + UNIT_ASSERT(IsLambda(defines->GetChild(5), 3)); + UNIT_ASSERT(IsLambda(defines->GetChild(6), 3)); } } |