aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--yql/essentials/core/sql_types/match_recognize.h3
-rw-r--r--yql/essentials/core/type_ann/type_ann_core.cpp1
-rw-r--r--yql/essentials/core/type_ann/type_ann_match_recognize.cpp153
-rw-r--r--yql/essentials/core/type_ann/type_ann_match_recognize.h23
-rw-r--r--yql/essentials/core/yql_match_recognize.h5
-rw-r--r--yql/essentials/core/yql_opt_match_recognize.cpp166
-rw-r--r--yql/essentials/minikql/comp_nodes/mkql_match_recognize.cpp6
-rw-r--r--yql/essentials/minikql/comp_nodes/mkql_match_recognize_list.cpp124
-rw-r--r--yql/essentials/minikql/comp_nodes/mkql_match_recognize_list.h207
-rw-r--r--yql/essentials/minikql/comp_nodes/mkql_match_recognize_measure_arg.cpp128
-rw-r--r--yql/essentials/minikql/comp_nodes/mkql_match_recognize_measure_arg.h130
-rw-r--r--yql/essentials/minikql/comp_nodes/mkql_match_recognize_rows_formatter.cpp8
-rw-r--r--yql/essentials/minikql/comp_nodes/ut/mkql_match_recognize_list_ut.cpp106
-rw-r--r--yql/essentials/minikql/comp_nodes/ut/mkql_match_recognize_matched_vars_ut.cpp101
-rw-r--r--yql/essentials/minikql/comp_nodes/ya.make.inc2
-rw-r--r--yql/essentials/minikql/computation/mkql_computation_node_holders.h7
-rwxr-xr-xyql/essentials/mount/lib/yql/aggregate.yqls10
-rw-r--r--yql/essentials/sql/v1/builtin.cpp34
-rw-r--r--yql/essentials/sql/v1/context.h43
-rw-r--r--yql/essentials/sql/v1/match_recognize.cpp530
-rw-r--r--yql/essentials/sql/v1/match_recognize.h145
-rw-r--r--yql/essentials/sql/v1/node.cpp12
-rw-r--r--yql/essentials/sql/v1/node.h3
-rw-r--r--yql/essentials/sql/v1/source.cpp2
-rw-r--r--yql/essentials/sql/v1/sql_expression.cpp45
-rw-r--r--yql/essentials/sql/v1/sql_expression.h2
-rw-r--r--yql/essentials/sql/v1/sql_match_recognize.cpp662
-rw-r--r--yql/essentials/sql/v1/sql_match_recognize.h36
-rw-r--r--yql/essentials/sql/v1/sql_match_recognize_ut.cpp24
-rw-r--r--yql/essentials/sql/v1/sql_ut.cpp39
-rw-r--r--yql/essentials/sql/v1/sql_ut_antlr4.cpp39
-rw-r--r--yql/essentials/tests/sql/minirun/part0/canondata/result.json24
-rw-r--r--yql/essentials/tests/sql/minirun/part1/canondata/result.json6
-rw-r--r--yql/essentials/tests/sql/minirun/part2/canondata/result.json6
-rw-r--r--yql/essentials/tests/sql/minirun/part4/canondata/result.json14
-rw-r--r--yql/essentials/tests/sql/minirun/part5/canondata/result.json12
-rw-r--r--yql/essentials/tests/sql/minirun/part6/canondata/result.json6
-rw-r--r--yql/essentials/tests/sql/minirun/part7/canondata/result.json12
-rw-r--r--yql/essentials/tests/sql/minirun/part8/canondata/result.json12
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/result.json105
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_match_recognize-measures_aggregate_/formatted.sql49
-rw-r--r--yql/essentials/tests/sql/suites/aggr_factory/fail_on_match_recognize_specific.sqlx14
-rw-r--r--yql/essentials/tests/sql/suites/aggregate/fail_on_match_recognize_specific.sqlx14
-rw-r--r--yql/essentials/tests/sql/suites/match_recognize/measures_aggr_factory.sqlx14
-rw-r--r--yql/essentials/tests/sql/suites/match_recognize/measures_aggregate.sql39
45 files changed, 1976 insertions, 1147 deletions
diff --git a/yql/essentials/core/sql_types/match_recognize.h b/yql/essentials/core/sql_types/match_recognize.h
index e142587890..f64fe5e7fa 100644
--- a/yql/essentials/core/sql_types/match_recognize.h
+++ b/yql/essentials/core/sql_types/match_recognize.h
@@ -33,9 +33,6 @@ enum class EOutputColumnSource {
Other,
};
-constexpr size_t MaxPatternNesting = 20; //Limit recursion for patterns
-constexpr size_t MaxPermutedItems = 6;
-
//Mixin columns for calculating measures
enum class EMeasureInputDataSpecialColumns {
Classifier = 0,
diff --git a/yql/essentials/core/type_ann/type_ann_core.cpp b/yql/essentials/core/type_ann/type_ann_core.cpp
index 4ec574876f..6b87ac4343 100644
--- a/yql/essentials/core/type_ann/type_ann_core.cpp
+++ b/yql/essentials/core/type_ann/type_ann_core.cpp
@@ -12982,6 +12982,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
Functions["NextValue"] = &NextValueWrapper;
Functions["MatchRecognize"] = &MatchRecognizeWrapper;
+ Functions["MatchRecognizeMeasuresAggregates"] = &MatchRecognizeMeasuresAggregatesWrapper;
Functions["MatchRecognizeParams"] = &MatchRecognizeParamsWrapper;
Functions["MatchRecognizeMeasures"] = &MatchRecognizeMeasuresWrapper;
Functions["MatchRecognizePattern"] = &MatchRecognizePatternWrapper;
diff --git a/yql/essentials/core/type_ann/type_ann_match_recognize.cpp b/yql/essentials/core/type_ann/type_ann_match_recognize.cpp
index 4a6d61192e..696435cd31 100644
--- a/yql/essentials/core/type_ann/type_ann_match_recognize.cpp
+++ b/yql/essentials/core/type_ann/type_ann_match_recognize.cpp
@@ -1,12 +1,31 @@
#include "type_ann_match_recognize.h"
+
#include <yql/essentials/core/sql_types/match_recognize.h>
#include <yql/essentials/core/yql_match_recognize.h>
namespace NYql::NTypeAnnImpl {
-IGraphTransformer::TStatus
-MatchRecognizeWrapper(const TExprNode::TPtr &input, TExprNode::TPtr &output, TContext &ctx) {
- Y_UNUSED(output);
+namespace {
+
+const TStructExprType* GetMatchedRowsRangesType(const TExprNode::TPtr& patternVars, TContext &ctx) {
+ const auto itemType = ctx.Expr.MakeType<TStructExprType>(TVector{
+ ctx.Expr.MakeType<TItemExprType>("From", ctx.Expr.MakeType<TDataExprType>(EDataSlot::Uint64)),
+ ctx.Expr.MakeType<TItemExprType>("To", ctx.Expr.MakeType<TDataExprType>(EDataSlot::Uint64))
+ });
+
+ TVector<const TItemExprType*> items;
+ for (const auto& var : patternVars->Children()) {
+ items.push_back(ctx.Expr.MakeType<TItemExprType>(
+ var->Content(),
+ ctx.Expr.MakeType<TListExprType>(itemType)
+ ));
+ }
+ return ctx.Expr.MakeType<TStructExprType>(items);
+}
+
+} // anonymous namespace
+
+IGraphTransformer::TStatus MatchRecognizeWrapper(const TExprNode::TPtr& input, TExprNode::TPtr&, TContext& ctx) {
if (!EnsureArgsCount(*input, 5, ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}
@@ -15,7 +34,7 @@ MatchRecognizeWrapper(const TExprNode::TPtr &input, TExprNode::TPtr &output, TCo
const auto partitionColumns = input->Child(2);
const auto sortTraits = input->Child(3);
const auto params = input->Child(4);
- Y_UNUSED(source, sortTraits);
+ Y_UNUSED(sortTraits);
auto status = ConvertToLambda(partitionKeySelector, ctx.Expr, 1, 1);
if (status.Level != IGraphTransformer::TStatus::Ok) {
return status;
@@ -53,47 +72,89 @@ MatchRecognizeWrapper(const TExprNode::TPtr &input, TExprNode::TPtr &output, TCo
return IGraphTransformer::TStatus::Ok;
}
-IGraphTransformer::TStatus
-MatchRecognizeParamsWrapper(const TExprNode::TPtr &input, TExprNode::TPtr &output, TContext &ctx) {
- Y_UNUSED(output);
- if (!EnsureArgsCount(*input, 5, ctx.Expr)) {
+IGraphTransformer::TStatus MatchRecognizeMeasuresAggregatesWrapper(const TExprNode::TPtr& input, TExprNode::TPtr&, TContext& ctx) {
+ if (!EnsureMinArgsCount(*input, 4, ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}
- const auto measures = input->Child(0);
- input->SetTypeAnn(measures->GetTypeAnn());
- return IGraphTransformer::TStatus::Ok;
-}
+ const auto inputRowType = input->Child(0);
+ const auto patternVars = input->Child(1);
+ const auto names = input->Child(2);
+ const auto vars = input->Child(3);
+ constexpr size_t FirstLambdaIndex = 4;
-namespace {
+ if (!EnsureType(*inputRowType, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+ if (!EnsureTupleOfAtoms(*patternVars, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+ if (!EnsureTupleOfAtoms(*names, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+ if (!EnsureTupleOfAtoms(*vars, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+ if (!EnsureArgsCount(*vars, names->ChildrenSize(), ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+ if (!EnsureArgsCount(*input, FirstLambdaIndex + names->ChildrenSize(), ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
-const TStructExprType* GetMatchedRowsRangesType(const TExprNode::TPtr& pattern, TContext &ctx) {
- auto vars = GetPatternVars(NYql::NMatchRecognize::ConvertPattern(pattern, ctx.Expr, 0));
TVector<const TItemExprType*> items;
- for (const auto& var: vars) {
- const auto& item = ctx.Expr.MakeType<TStructExprType>(TVector<const TItemExprType*>{
- ctx.Expr.MakeType<TItemExprType>("From", ctx.Expr.MakeType<TDataExprType>(EDataSlot::Uint64)),
- ctx.Expr.MakeType<TItemExprType>("To", ctx.Expr.MakeType<TDataExprType>(EDataSlot::Uint64))
- });
- items.push_back(ctx.Expr.MakeType<TItemExprType>(var, ctx.Expr.MakeType<TListExprType>(item)));
+ for (size_t i = 0; i < names->ChildrenSize(); ++i) {
+ auto lambda = input->Child(FirstLambdaIndex + i);
+ if (const auto varName = vars->Child(i)->Content()) {
+ const auto traits = input->Child(FirstLambdaIndex + i);
+ if (!EnsureTupleMinSize(*traits, 2, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+ if (!EnsureTupleMaxSize(*traits, 3, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+ if (!EnsureAtom(traits->Head(), ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+ lambda = traits->Child(1)->Child(NNodes::TCoAggregationTraits::idx_FinishHandler);
+ }
+ if (auto type = lambda->GetTypeAnn()) {
+ if (type->GetKind() != ETypeAnnotationKind::Optional) {
+ type = ctx.Expr.MakeType<TOptionalExprType>(type);
+ }
+ items.push_back(ctx.Expr.MakeType<TItemExprType>(names->Child(i)->Content(), type));
+ } else {
+ return IGraphTransformer::TStatus::Repeat;
+ }
}
- return ctx.Expr.MakeType<TStructExprType>(items);
+ input->SetTypeAnn(ctx.Expr.MakeType<TStructExprType>(items));
+ return IGraphTransformer::TStatus::Ok;
}
-}//namespace {
+IGraphTransformer::TStatus MatchRecognizeParamsWrapper(const TExprNode::TPtr& input, TExprNode::TPtr&, TContext& ctx) {
+ if (!EnsureArgsCount(*input, 5, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+ const auto measures = input->Child(0);
+ input->SetTypeAnn(measures->GetTypeAnn());
+ return IGraphTransformer::TStatus::Ok;
+}
-IGraphTransformer::TStatus
-MatchRecognizeMeasuresWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output,
- TContext &ctx) {
- Y_UNUSED(output);
+IGraphTransformer::TStatus MatchRecognizeMeasuresWrapper(const TExprNode::TPtr& input, TExprNode::TPtr&, TContext& ctx) {
if (!EnsureMinArgsCount(*input, 3, ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}
const auto inputRowType = input->Child(0);
- const auto pattern = input->Child(1);
+ const auto patternVars = input->Child(1);
const auto names = input->Child(2);
constexpr size_t FirstLambdaIndex = 3;
- if (!EnsureTupleOfAtoms(*names, ctx.Expr)) {
+ if (!EnsureType(*inputRowType, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+ if (!EnsureTupleOfAtoms(*patternVars, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+ if (!EnsureTupleOfAtoms(*names, ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}
@@ -111,7 +172,7 @@ MatchRecognizeMeasuresWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& out
MeasureInputDataSpecialColumnName(EMeasureInputDataSpecialColumns::MatchNumber),
ctx.Expr.MakeType<TDataExprType>(EDataSlot::Uint64)));
auto lambdaInputRowType = ctx.Expr.MakeType<TStructExprType>(lambdaInputRowColumns);
- const auto& matchedRowsRanges = GetMatchedRowsRangesType(pattern, ctx);
+ const auto& matchedRowsRanges = GetMatchedRowsRangesType(patternVars, ctx);
YQL_ENSURE(matchedRowsRanges);
TVector<const TItemExprType*> items;
for (size_t i = 0; i != names->ChildrenSize(); ++i) {
@@ -139,27 +200,27 @@ MatchRecognizeMeasuresWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& out
return IGraphTransformer::TStatus::Ok;
}
-IGraphTransformer::TStatus
-MatchRecognizePatternWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output,
- TContext &ctx) {
- Y_UNUSED(output);
+IGraphTransformer::TStatus MatchRecognizePatternWrapper(const TExprNode::TPtr& input, TExprNode::TPtr&, TContext& ctx) {
input->SetTypeAnn(ctx.Expr.MakeType<TVoidExprType>());
return IGraphTransformer::TStatus::Ok;
}
-IGraphTransformer::TStatus
-MatchRecognizeDefinesWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output,
- TContext &ctx) {
- Y_UNUSED(output);
+IGraphTransformer::TStatus MatchRecognizeDefinesWrapper(const TExprNode::TPtr& input, TExprNode::TPtr&, TContext &ctx) {
if (!EnsureMinArgsCount(*input, 3, ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}
const auto inputRowType = input->Child(0);
- const auto pattern = input->Child(1);
+ const auto patternVars = input->Child(1);
const auto names = input->Child(2);
- const size_t FirstLambdaIndex = 3;
+ constexpr size_t FirstLambdaIndex = 3;
- if (!EnsureTupleOfAtoms(*names, ctx.Expr)) {
+ if (!EnsureType(*inputRowType, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+ if (!EnsureTupleOfAtoms(*patternVars, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+ if (!EnsureTupleOfAtoms(*names, ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}
@@ -167,7 +228,7 @@ MatchRecognizeDefinesWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& outp
return IGraphTransformer::TStatus::Error;
}
- const auto matchedRowsRanges = GetMatchedRowsRangesType(pattern, ctx);
+ const auto matchedRowsRanges = GetMatchedRowsRangesType(patternVars, ctx);
TVector<const TItemExprType*> items;
for (size_t i = 0; i != names->ChildrenSize(); ++i) {
auto& lambda = input->ChildRef(FirstLambdaIndex + i);
@@ -230,11 +291,9 @@ bool ValidateSettings(const TExprNode::TPtr& settings, TExprContext& ctx) {
return true;
}
-} //namespace
+} // anonymous namespace
-IGraphTransformer::TStatus
-MatchRecognizeCoreWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx) {
- Y_UNUSED(output);
+IGraphTransformer::TStatus MatchRecognizeCoreWrapper(const TExprNode::TPtr& input, TExprNode::TPtr&, TExtContext& ctx) {
if (not ctx.Types.MatchRecognize) {
ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), "MATCH_RECOGNIZE is disabled"));
return IGraphTransformer::TStatus::Error;
@@ -253,7 +312,7 @@ MatchRecognizeCoreWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output,
}
if (not ValidateSettings(settings, ctx.Expr)) {
- return IGraphTransformer::TStatus::Error;;
+ return IGraphTransformer::TStatus::Error;
}
if (!EnsureFlowType(*source, ctx.Expr)) {
diff --git a/yql/essentials/core/type_ann/type_ann_match_recognize.h b/yql/essentials/core/type_ann/type_ann_match_recognize.h
index 65a2d4d5b3..d21b7f36b4 100644
--- a/yql/essentials/core/type_ann/type_ann_match_recognize.h
+++ b/yql/essentials/core/type_ann/type_ann_match_recognize.h
@@ -1,19 +1,18 @@
#pragma once
-#include "type_ann_core.h"
#include "type_ann_impl.h"
#include <yql/essentials/ast/yql_expr.h>
-#include <yql/essentials/ast/yql_expr_types.h>
+#include <yql/essentials/core/yql_graph_transformer.h>
+namespace NYql::NTypeAnnImpl {
-namespace NYql {
- namespace NTypeAnnImpl {
- IGraphTransformer::TStatus MatchRecognizeWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx);
- IGraphTransformer::TStatus MatchRecognizeParamsWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx);
- IGraphTransformer::TStatus MatchRecognizeMeasuresWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx);
- IGraphTransformer::TStatus MatchRecognizePatternWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx);
- IGraphTransformer::TStatus MatchRecognizeDefinesWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx);
- IGraphTransformer::TStatus MatchRecognizeCoreWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx);
- }
-}
+IGraphTransformer::TStatus MatchRecognizeWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx);
+IGraphTransformer::TStatus MatchRecognizeMeasuresAggregatesWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx);
+IGraphTransformer::TStatus MatchRecognizeParamsWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx);
+IGraphTransformer::TStatus MatchRecognizeMeasuresWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx);
+IGraphTransformer::TStatus MatchRecognizePatternWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx);
+IGraphTransformer::TStatus MatchRecognizeDefinesWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx);
+IGraphTransformer::TStatus MatchRecognizeCoreWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx);
+
+} // namespace NYql::NTypeAnnImpl
diff --git a/yql/essentials/core/yql_match_recognize.h b/yql/essentials/core/yql_match_recognize.h
index ec38a837bc..6b741f5237 100644
--- a/yql/essentials/core/yql_match_recognize.h
+++ b/yql/essentials/core/yql_match_recognize.h
@@ -4,8 +4,7 @@
namespace NYql::NMatchRecognize {
-inline TRowPattern ConvertPattern(const TExprNode::TPtr& pattern, TExprContext &ctx, size_t nestingLevel = 0) {
- YQL_ENSURE(nestingLevel <= MaxPatternNesting, "To big nesting level in the pattern");
+inline TRowPattern ConvertPattern(const TExprNode::TPtr& pattern, TExprContext &ctx) {
TRowPattern result;
for (const auto& term: pattern->Children()) {
result.push_back(TRowPatternTerm{});
@@ -14,7 +13,7 @@ inline TRowPattern ConvertPattern(const TExprNode::TPtr& pattern, TExprContext &
result.back().push_back(TRowPatternFactor{
factor->Child(0)->IsAtom() ?
TRowPatternPrimary(TString(factor->Child(0)->Content())) :
- ConvertPattern(factor->Child(0), ctx, nestingLevel + 1),
+ ConvertPattern(factor->Child(0), ctx),
FromString<ui64>(factor->Child(1)->Content()),
FromString<ui64>(factor->Child(2)->Content()),
FromString<bool>(factor->Child(3)->Content()),
diff --git a/yql/essentials/core/yql_opt_match_recognize.cpp b/yql/essentials/core/yql_opt_match_recognize.cpp
index f2bef7d7d4..3efae9e42b 100644
--- a/yql/essentials/core/yql_opt_match_recognize.cpp
+++ b/yql/essentials/core/yql_opt_match_recognize.cpp
@@ -9,6 +9,7 @@ namespace NYql {
using namespace NNodes;
namespace {
+
bool IsStreaming(const TExprNode::TPtr& input, const TTypeAnnotationContext& typeAnnCtx) {
if (EMatchRecognizeStreamingMode::Disable == typeAnnCtx.MatchRecognizeStreaming){
return false;
@@ -29,65 +30,147 @@ bool IsStreaming(const TExprNode::TPtr& input, const TTypeAnnotationContext& typ
});
return hasPq;
}
-} //namespace
-
-// returns std::nullopt if all vars could be used
-std::optional<TSet<TStringBuf>> FindUsedVars(const TExprNode::TPtr& params) {
- TSet<TStringBuf> usedVars;
- bool allVarsUsed = false;
-
- const auto createVisitor = [&usedVars, &allVarsUsed](const TExprNode::TPtr& varsArg) {
- return [&varsArg, &usedVars, &allVarsUsed](const TExprNode::TPtr& node) -> bool {
- if (node->IsCallable("Member")) {
- if (node->Child(0) == varsArg) {
- usedVars.insert(node->Child(1)->Content());
- return false;
+
+TExprNode::TPtr ExpandMatchRecognizeMeasuresAggregates(const TExprNode::TPtr& node, TExprContext& ctx, TTypeAnnotationContext& /* typeAnnCtx */) {
+ const auto pos = node->Pos();
+ const auto vars = node->Child(3);
+ static constexpr size_t AggregatesLambdasStartPos = 4;
+ static constexpr size_t MeasuresLambdasStartPos = 3;
+
+ return ctx.Builder(pos)
+ .Callable("MatchRecognizeMeasures")
+ .Add(0, node->ChildPtr(0))
+ .Add(1, node->ChildPtr(1))
+ .Add(2, node->ChildPtr(2))
+ .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
+ for (size_t i = 0; i < vars->ChildrenSize(); ++i) {
+ const auto var = vars->Child(i)->Content();
+ const auto handler = node->ChildPtr(AggregatesLambdasStartPos + i);
+ if (!var) {
+ auto value = handler->GetTypeAnn()->GetKind() != ETypeAnnotationKind::Optional
+ ? ctx.Builder(pos).Callable("Just").Add(0, handler).Seal().Build()
+ : handler;
+ parent.Add(
+ MeasuresLambdasStartPos + i,
+ ctx.Builder(pos)
+ .Lambda()
+ .Param("data")
+ .Param("vars")
+ .Add(0, std::move(value))
+ .Seal()
+ .Build()
+ );
+ continue;
+ }
+ parent.Add(
+ MeasuresLambdasStartPos + i,
+ ctx.Builder(pos)
+ .Lambda()
+ .Param("data")
+ .Param("vars")
+ .Callable(0, "Member")
+ .Callable(0, "Head")
+ .Callable(0, "Aggregate")
+ .Callable(0, "OrderedMap")
+ .Callable(0, "OrderedFlatMap")
+ .Callable(0, "Member")
+ .Arg(0, "vars")
+ .Atom(1, var)
+ .Seal()
+ .Lambda(1)
+ .Param("item")
+ .Callable(0, "ListFromRange")
+ .Callable(0, "Member")
+ .Arg(0, "item")
+ .Atom(1, "From")
+ .Seal()
+ .Callable(1, "+MayWarn")
+ .Callable(0, "Member")
+ .Arg(0, "item")
+ .Atom(1, "To")
+ .Seal()
+ .Callable(1, "Uint64")
+ .Atom(0, "1")
+ .Seal()
+ .Seal()
+ .Seal()
+ .Seal()
+ .Seal()
+ .Lambda(1)
+ .Param("index")
+ .Callable(0, "Unwrap")
+ .Callable(0, "Lookup")
+ .Callable(0, "ToIndexDict")
+ .Arg(0, "data")
+ .Seal()
+ .Arg(1, "index")
+ .Seal()
+ .Seal()
+ .Seal()
+ .Seal()
+ .List(1).Seal()
+ .List(2)
+ .Add(0, handler)
+ .Seal()
+ .List(3).Seal()
+ .Seal()
+ .Seal()
+ .Atom(1, handler->Child(0)->Content())
+ .Seal()
+ .Seal()
+ .Build()
+ );
}
- }
- if (node == varsArg) {
- allVarsUsed = true;
- }
- return true;
- };
- };
+ return parent;
+ })
+ .Seal()
+ .Build();
+}
+
+THashSet<TStringBuf> FindUsedVars(const TExprNode::TPtr& params) {
+ THashSet<TStringBuf> result;
const auto measures = params->Child(0);
- static constexpr size_t measureLambdasStartPos = 3;
- for (size_t pos = measureLambdasStartPos; pos != measures->ChildrenSize(); pos++) {
- const auto lambda = measures->Child(pos);
- const auto lambdaArgs = lambda->Child(0);
- const auto lambdaBody = lambda->ChildPtr(1);
- const auto varsArg = lambdaArgs->ChildPtr(1);
- NYql::VisitExpr(lambdaBody, createVisitor(varsArg));
+ const auto measuresVars = measures->Child(3);
+ for (const auto& var : measuresVars->Children()) {
+ result.insert(var->Content());
}
const auto defines = params->Child(4);
static constexpr size_t defineLambdasStartPos = 3;
- for (size_t pos = defineLambdasStartPos; pos != defines->ChildrenSize(); pos++) {
- const auto lambda = defines->Child(pos);
+ for (auto i = defineLambdasStartPos; i < defines->ChildrenSize(); ++i) {
+ const auto lambda = defines->Child(i);
const auto lambdaArgs = lambda->Child(0);
const auto lambdaBody = lambda->ChildPtr(1);
- const auto varsArg = lambdaArgs->ChildPtr(1);
- NYql::VisitExpr(lambdaBody, createVisitor(varsArg));
+ const auto varsArg = lambdaArgs->Child(1);
+ NYql::VisitExpr(
+ lambdaBody,
+ [varsArg, &result](const TExprNode::TPtr& node) {
+ if (node->IsCallable("Member") && node->Child(0) == varsArg) {
+ result.insert(node->Child(1)->Content());
+ return false;
+ }
+ return true;
+ }
+ );
}
- return allVarsUsed ? std::nullopt : std::make_optional(usedVars);
+ return result;
}
-// usedVars can be std::nullopt if all vars could probably be used
-TExprNode::TPtr MarkUnusedPatternVars(const TExprNode::TPtr& node, TExprContext& ctx, const std::optional<TSet<TStringBuf>> &usedVars, TStringBuf rowsPerMatch) {
+TExprNode::TPtr MarkUnusedPatternVars(const TExprNode::TPtr& node, TExprContext& ctx, const THashSet<TStringBuf>& usedVars, TStringBuf rowsPerMatch) {
const auto pos = node->Pos();
if (node->ChildrenSize() != 0 && node->Child(0)->IsAtom()) {
const auto varName = node->Child(0)->Content();
- const auto output = node->Child(4);
- const auto varUnused = ("RowsPerMatch_AllRows" != rowsPerMatch || !output) && usedVars && !usedVars->contains(varName);
+ const auto output = FromString<bool>(node->Child(4)->Content());
+ const auto varUnused = ("RowsPerMatch_AllRows" != rowsPerMatch || !output) && !usedVars.contains(varName);
return ctx.Builder(pos)
.List()
.Add(0, node->ChildPtr(0))
.Add(1, node->ChildPtr(1))
.Add(2, node->ChildPtr(2))
.Add(3, node->ChildPtr(3))
- .Add(4, output)
+ .Add(4, node->ChildPtr(4))
.Add(5, ctx.NewAtom(pos, ToString(varUnused)))
.Seal()
.Build();
@@ -105,13 +188,15 @@ TExprNode::TPtr MarkUnusedPatternVars(const TExprNode::TPtr& node, TExprContext&
}
}
+} // anonymous namespace
+
TExprNode::TPtr ExpandMatchRecognize(const TExprNode::TPtr& node, TExprContext& ctx, TTypeAnnotationContext& typeAnnCtx) {
- YQL_ENSURE(node->IsCallable({"MatchRecognize"}));
+ YQL_ENSURE(node->IsCallable("MatchRecognize"));
const auto input = node->Child(0);
const auto partitionKeySelector = node->Child(1);
const auto partitionColumns = node->Child(2);
const auto sortTraits = node->Child(3);
- const auto params = node->ChildPtr(4);
+ const auto params = node->Child(4);
const auto pos = node->Pos();
const bool isStreaming = IsStreaming(input, typeAnnCtx);
@@ -120,6 +205,7 @@ TExprNode::TPtr ExpandMatchRecognize(const TExprNode::TPtr& node, TExprContext&
"Streaming", ctx.NewAtom(pos, ToString(isStreaming)), ctx);
const auto rowsPerMatch = params->Child(1)->Content();
+ auto measures = ExpandMatchRecognizeMeasuresAggregates(params->ChildPtr(0), ctx, typeAnnCtx);
const auto matchRecognize = ctx.Builder(pos)
.Lambda()
.Param("sortedPartition")
@@ -131,7 +217,7 @@ TExprNode::TPtr ExpandMatchRecognize(const TExprNode::TPtr& node, TExprContext&
.Add(1, partitionKeySelector)
.Add(2, partitionColumns)
.Callable(3, params->Content())
- .Add(0, params->ChildPtr(0))
+ .Add(0, std::move(measures))
.Add(1, params->ChildPtr(1))
.Add(2, params->ChildPtr(2))
.Add(3, MarkUnusedPatternVars(params->ChildPtr(3), ctx, FindUsedVars(params), rowsPerMatch))
diff --git a/yql/essentials/minikql/comp_nodes/mkql_match_recognize.cpp b/yql/essentials/minikql/comp_nodes/mkql_match_recognize.cpp
index d59ebee70f..5b0e60afa8 100644
--- a/yql/essentials/minikql/comp_nodes/mkql_match_recognize.cpp
+++ b/yql/essentials/minikql/comp_nodes/mkql_match_recognize.cpp
@@ -48,8 +48,8 @@ public:
{}
bool ProcessInputRow(NUdf::TUnboxedValue&& row, TComputationContext& ctx) {
- Parameters.InputDataArg->SetValue(ctx, ctx.HolderFactory.Create<TListValue<TSparseList>>(Rows));
- Parameters.CurrentRowIndexArg->SetValue(ctx, NUdf::TUnboxedValuePod(Rows.Size()));
+ Parameters.InputDataArg->SetValue(ctx, ctx.HolderFactory.Create<TListValue>(Rows));
+ Parameters.CurrentRowIndexArg->SetValue(ctx, NUdf::TUnboxedValuePod(Rows.LastRowIndex()));
Nfa.ProcessRow(Rows.Append(std::move(row)), ctx);
return HasMatched();
}
@@ -68,7 +68,7 @@ public:
}
Parameters.MatchedVarsArg->SetValue(ctx, ctx.HolderFactory.Create<TMatchedVarsValue<TSparseList::TRange>>(ctx.HolderFactory, match->Vars));
Parameters.MeasureInputDataArg->SetValue(ctx, ctx.HolderFactory.Create<TMeasureInputDataValue>(
- ctx.HolderFactory.Create<TListValue<TSparseList>>(Rows),
+ ctx.HolderFactory.Create<TListValue>(Rows),
Parameters.MeasureInputColumnOrder,
Parameters.MatchedVarsArg->GetValue(ctx),
Parameters.VarNames,
diff --git a/yql/essentials/minikql/comp_nodes/mkql_match_recognize_list.cpp b/yql/essentials/minikql/comp_nodes/mkql_match_recognize_list.cpp
new file mode 100644
index 0000000000..ae8a04eb76
--- /dev/null
+++ b/yql/essentials/minikql/comp_nodes/mkql_match_recognize_list.cpp
@@ -0,0 +1,124 @@
+#include "mkql_match_recognize_list.h"
+
+namespace NKikimr::NMiniKQL::NMatchRecognize {
+
+namespace {
+
+class TIterator : public TTemporaryComputationValue<TIterator> {
+public:
+ TIterator(TMemoryUsageInfo* memUsage, const TSparseList& parent)
+ : TTemporaryComputationValue<TIterator>(memUsage)
+ , Parent(parent)
+ , Current(Parent.Begin())
+ {}
+
+private:
+ bool Skip() final {
+ return ++Current != Parent.End();
+ }
+
+ bool Next(NUdf::TUnboxedValue& value) final {
+ if (!Skip()) {
+ return false;
+ }
+ value = Current->second.Value;
+ return true;
+ }
+
+ bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) final {
+ if (!Next(payload)) {
+ return false;
+ }
+ key = NUdf::TUnboxedValuePod(Current->first);
+ return true;
+ }
+
+ const TSparseList& Parent;
+ TSparseList::iterator Current;
+};
+
+class TKeysIterator : public TTemporaryComputationValue<TKeysIterator> {
+public:
+ TKeysIterator(TMemoryUsageInfo* memUsage, const TSparseList& parent)
+ : TTemporaryComputationValue<TKeysIterator>(memUsage)
+ , Parent(parent)
+ , Current(Parent.Begin())
+ {}
+private:
+ bool Skip() final {
+ return ++Current != Parent.End();
+ }
+
+ bool Next(NUdf::TUnboxedValue& key) final {
+ if (!Skip()) {
+ return false;
+ }
+ key = NUdf::TUnboxedValuePod(Current->first);
+ return true;
+ }
+
+ const TSparseList& Parent;
+ TSparseList::iterator Current;
+};
+
+} // anonymous namespace
+
+TListValue::TListValue(TMemoryUsageInfo* memUsage, const TSparseList& list)
+ : TComputationValue<TListValue>(memUsage)
+ , List(list)
+{}
+
+bool TListValue::HasFastListLength() const {
+ return true;
+}
+
+ui64 TListValue::GetListLength() const {
+ return GetDictLength();
+}
+
+ui64 TListValue::GetEstimatedListLength() const {
+ return GetListLength();
+}
+
+NUdf::TUnboxedValue TListValue::GetListIterator() const {
+ return GetPayloadsIterator();
+}
+
+bool TListValue::HasListItems() const {
+ return HasDictItems();
+}
+
+NUdf::IBoxedValuePtr TListValue::ToIndexDictImpl(const NUdf::IValueBuilder& builder) const {
+ Y_UNUSED(builder);
+ return const_cast<TListValue*>(this);
+}
+
+ui64 TListValue::GetDictLength() const {
+ return List.Size();
+}
+
+NUdf::TUnboxedValue TListValue::GetDictIterator() const {
+ return NUdf::TUnboxedValuePod(new TIterator(GetMemInfo(), List));
+}
+
+NUdf::TUnboxedValue TListValue::GetKeysIterator() const {
+ return NUdf::TUnboxedValuePod(new TKeysIterator(GetMemInfo(), List));
+}
+
+NUdf::TUnboxedValue TListValue::GetPayloadsIterator() const {
+ return NUdf::TUnboxedValuePod(new TIterator(GetMemInfo(), List));
+}
+
+bool TListValue::Contains(const NUdf::TUnboxedValuePod& key) const {
+ return List.Contains(key.Get<ui64>());
+}
+
+NUdf::TUnboxedValue TListValue::Lookup(const NUdf::TUnboxedValuePod& key) const {
+ return List.Get(key.Get<ui64>());
+}
+
+bool TListValue::HasDictItems() const {
+ return !List.Empty();
+}
+
+} // namespace NKikimr::NMiniKQL::NMatchRecognize
diff --git a/yql/essentials/minikql/comp_nodes/mkql_match_recognize_list.h b/yql/essentials/minikql/comp_nodes/mkql_match_recognize_list.h
index e60f5465ef..c3569cc4be 100644
--- a/yql/essentials/minikql/comp_nodes/mkql_match_recognize_list.h
+++ b/yql/essentials/minikql/comp_nodes/mkql_match_recognize_list.h
@@ -1,101 +1,15 @@
#pragma once
#include "mkql_match_recognize_save_load.h"
-#include "mkql_match_recognize_version.h"
#include <yql/essentials/minikql/defs.h>
-#include <yql/essentials/minikql/computation/mkql_computation_node_impl.h>
#include <yql/essentials/minikql/computation/mkql_computation_node_holders.h>
-#include <yql/essentials/minikql/comp_nodes/mkql_saveload.h>
+#include <yql/essentials/minikql/computation/mkql_computation_node_impl.h>
#include <yql/essentials/public/udf/udf_value.h>
#include <unordered_map>
namespace NKikimr::NMiniKQL::NMatchRecognize {
-class TSimpleList {
-public:
- ///Range that includes starting and ending points
- ///Can not be empty
- class TRange {
- public:
- TRange()
- : FromIndex(Max())
- , ToIndex(Max())
- , NfaIndex_(Max())
- {
- }
-
- explicit TRange(ui64 index)
- : FromIndex(index)
- , ToIndex(index)
- , NfaIndex_(Max())
- {
- }
-
- TRange(ui64 from, ui64 to)
- : FromIndex(from)
- , ToIndex(to)
- , NfaIndex_(Max())
- {
- MKQL_ENSURE(FromIndex <= ToIndex, "Internal logic error");
- }
-
- bool IsValid() const {
- return FromIndex != Max<size_t>() && ToIndex != Max<size_t>();
- }
-
- size_t From() const {
- MKQL_ENSURE(IsValid(), "Internal logic error");
- return FromIndex;
- }
-
- size_t To() const {
- MKQL_ENSURE(IsValid(), "Internal logic error");
- return ToIndex;
- }
-
- [[nodiscard]] size_t NfaIndex() const {
- MKQL_ENSURE(IsValid(), "Internal logic error");
- return NfaIndex_;
- }
-
- size_t Size() const {
- MKQL_ENSURE(IsValid(), "Internal logic error");
- return ToIndex - FromIndex + 1;
- }
-
- void Extend() {
- MKQL_ENSURE(IsValid(), "Internal logic error");
- ++ToIndex;
- }
-
- private:
- size_t FromIndex;
- size_t ToIndex;
- size_t NfaIndex_;
- };
-
- TRange Append(NUdf::TUnboxedValue&& value) {
- TRange result(Rows.size());
- Rows.push_back(std::move(value));
- return result;
- }
-
- size_t Size() const {
- return Rows.size();
- }
-
- bool Empty() const {
- return Size() == 0;
- }
-
- NUdf::TUnboxedValue Get(size_t i) const {
- return Rows.at(i);
- }
-private:
- TUnboxedValueVector Rows;
-};
-
///Stores only locked items
///Locks are holds by TRange
///When all locks on an item are released, the item is removed from the list
@@ -108,17 +22,31 @@ class TSparseList {
class TContainer: public TSimpleRefCount<TContainer> {
public:
using TPtr = TIntrusivePtr<TContainer>;
+ //TODO consider to replace hash table with contiguous chunks
+ using TStorage = TMKQLHashMap<size_t, TItem>;
+ using iterator = TStorage::const_iterator;
- void Add(size_t index, NUdf::TUnboxedValue&& value) {
- const auto& [iter, newOne] = Storage.emplace(index, TItem{std::move(value), 1});
- MKQL_ENSURE(newOne, "Internal logic error");
+ [[nodiscard]] iterator Begin() const noexcept {
+ return Storage.begin();
}
- size_t Size() const {
+ [[nodiscard]] iterator End() const noexcept {
+ return Storage.end();
+ }
+
+ [[nodiscard]] size_t Size() const noexcept {
return Storage.size();
}
- NUdf::TUnboxedValue Get(size_t i) const {
+ [[nodiscard]] size_t Empty() const noexcept {
+ return Storage.empty();
+ }
+
+ [[nodiscard]] bool Contains(size_t i) const noexcept {
+ return Storage.find(i) != Storage.cend();
+ }
+
+ [[nodiscard]] NUdf::TUnboxedValue Get(size_t i) const {
if (const auto it = Storage.find(i); it != Storage.cend()) {
return it->second.Value;
} else {
@@ -126,6 +54,11 @@ class TSparseList {
}
}
+ void Add(size_t index, NUdf::TUnboxedValue&& value) {
+ const auto& [iter, newOne] = Storage.emplace(index, TItem{std::move(value), 1});
+ MKQL_ENSURE(newOne, "Internal logic error");
+ }
+
void LockRange(size_t from, size_t to) {
for (auto i = from; i <= to; ++i) {
const auto it = Storage.find(i);
@@ -165,17 +98,8 @@ class TSparseList {
}
private:
- //TODO consider to replace hash table with contiguous chunks
- using TStorage = std::unordered_map<
- size_t,
- TItem,
- std::hash<size_t>,
- std::equal_to<size_t>,
- TMKQLAllocator<std::pair<const size_t, TItem>, EMemorySubPool::Temporary>>;
-
TStorage Storage;
};
- using TContainerPtr = TContainer::TPtr;
public:
///Range that includes starting and ending points
@@ -303,7 +227,7 @@ public:
}
private:
- TRange(TContainerPtr container, size_t index)
+ TRange(TContainer::TPtr container, size_t index)
: Container(container)
, FromIndex(index)
, ToIndex(index)
@@ -329,35 +253,48 @@ public:
NfaIndex_ = Max();
}
- TContainerPtr Container;
+ TContainer::TPtr Container;
size_t FromIndex;
size_t ToIndex;
size_t NfaIndex_;
};
-public:
TRange Append(NUdf::TUnboxedValue&& value) {
const auto index = ListSize++;
Container->Add(index, std::move(value));
return TRange(Container, index);
}
- NUdf::TUnboxedValue Get(size_t i) const {
- return Container->Get(i);
+ using iterator = TContainer::iterator;
+
+ [[nodiscard]] iterator Begin() const noexcept {
+ return Container->Begin();
+ }
+
+ [[nodiscard]] iterator End() const noexcept {
+ return Container->End();
}
///Return total size of sparse list including absent values
- size_t Size() const {
+ size_t LastRowIndex() const noexcept {
return ListSize;
}
///Return number of present values in sparse list
- size_t Filled() const {
+ size_t Size() const noexcept {
return Container->Size();
}
- bool Empty() const {
- return Size() == 0;
+ [[nodiscard]] bool Empty() const noexcept {
+ return Container->Empty();
+ }
+
+ [[nodiscard]] bool Contains(size_t i) const noexcept {
+ return Container->Contains(i);
+ }
+
+ [[nodiscard]] NUdf::TUnboxedValue Get(size_t i) const {
+ return Container->Get(i);
}
void Save(TMrOutputSerializer& serializer) const {
@@ -369,46 +306,32 @@ public:
}
private:
- TContainerPtr Container = MakeIntrusive<TContainer>();
+ TContainer::TPtr Container = MakeIntrusive<TContainer>();
size_t ListSize = 0; //impl: max index ever stored + 1
};
-template<typename L>
-class TListValue: public TComputationValue<TListValue<L>> {
+class TListValue final : public TComputationValue<TListValue> {
public:
- TListValue(TMemoryUsageInfo* memUsage, const L& list)
- : TComputationValue<TListValue<L>>(memUsage)
- , List(list)
- {
- }
-
- //TODO https://st.yandex-team.ru/YQL-16508
- //NUdf::TUnboxedValue GetListIterator() const override;
-
- bool HasFastListLength() const override {
- return !List.Empty();
- }
+ TListValue(TMemoryUsageInfo* memUsage, const TSparseList& list);
- ui64 GetListLength() const override {
- return List.Size();
- }
-
- bool HasListItems() const override {
- return !List.Empty();
- }
+ bool HasFastListLength() const final;
+ ui64 GetListLength() const final;
+ ui64 GetEstimatedListLength() const final;
+ NUdf::TUnboxedValue GetListIterator() const final;
+ bool HasListItems() const final;
- NUdf::IBoxedValuePtr ToIndexDictImpl(const NUdf::IValueBuilder& builder) const override {
- Y_UNUSED(builder);
- return const_cast<TListValue*>(this);
- }
+ NUdf::IBoxedValuePtr ToIndexDictImpl(const NUdf::IValueBuilder& builder) const final;
- NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override {
- return List.Get(key.Get<ui64>());
- }
+ ui64 GetDictLength() const final;
+ NUdf::TUnboxedValue GetDictIterator() const final;
+ NUdf::TUnboxedValue GetKeysIterator() const final;
+ NUdf::TUnboxedValue GetPayloadsIterator() const final;
+ bool Contains(const NUdf::TUnboxedValuePod& key) const final;
+ NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const final;
+ bool HasDictItems() const final;
private:
- L List;
+ TSparseList List;
};
-}//namespace NKikimr::NMiniKQL::NMatchRecognize
-
+} // namespace NKikimr::NMiniKQL::NMatchRecognize
diff --git a/yql/essentials/minikql/comp_nodes/mkql_match_recognize_measure_arg.cpp b/yql/essentials/minikql/comp_nodes/mkql_match_recognize_measure_arg.cpp
new file mode 100644
index 0000000000..9a152dfcec
--- /dev/null
+++ b/yql/essentials/minikql/comp_nodes/mkql_match_recognize_measure_arg.cpp
@@ -0,0 +1,128 @@
+#include "mkql_match_recognize_measure_arg.h"
+
+namespace NKikimr::NMiniKQL::NMatchRecognize {
+
+TRowForMeasureValue::TRowForMeasureValue(
+ TMemoryUsageInfo* memInfo,
+ NUdf::TUnboxedValue inputRow,
+ ui64 rowIndex,
+ const TMeasureInputColumnOrder& columnOrder,
+ const NUdf::TUnboxedValue& matchedVars,
+ const TUnboxedValueVector& varNames,
+ ui64 matchNumber)
+: TComputationValue<TRowForMeasureValue>(memInfo)
+, InputRow(inputRow)
+, RowIndex(rowIndex)
+, ColumnOrder(columnOrder)
+, MatchedVars(matchedVars)
+, VarNames(varNames)
+, MatchNumber(matchNumber)
+{}
+
+NUdf::TUnboxedValue TRowForMeasureValue::GetElement(ui32 index) const {
+ switch(ColumnOrder[index].first) {
+ case NYql::NMatchRecognize::EMeasureInputDataSpecialColumns::Classifier: {
+ auto varIterator = MatchedVars.GetListIterator();
+ MKQL_ENSURE(varIterator, "Internal logic error");
+ NUdf::TUnboxedValue var;
+ size_t varIndex = 0;
+ while(varIterator.Next(var)) {
+ auto rangeIterator = var.GetListIterator();
+ MKQL_ENSURE(varIterator, "Internal logic error");
+ NUdf::TUnboxedValue range;
+ while(rangeIterator.Next(range)) {
+ const auto from = range.GetElement(0).Get<ui64>();
+ const auto to = range.GetElement(1).Get<ui64>();
+ if (RowIndex >= from and RowIndex <= to) {
+ return VarNames[varIndex];
+ }
+ }
+ ++varIndex;
+ }
+ MKQL_ENSURE(MatchedVars.GetListLength() == varIndex, "Internal logic error");
+ return MakeString("");
+ }
+ case NYql::NMatchRecognize::EMeasureInputDataSpecialColumns::MatchNumber:
+ return NUdf::TUnboxedValuePod(MatchNumber);
+ case NYql::NMatchRecognize::EMeasureInputDataSpecialColumns::Last: //Last corresponds to columns from the input table row
+ return InputRow.GetElement(ColumnOrder[index].second);
+ }
+}
+
+TMeasureInputDataValue::TMeasureInputDataValue(
+ TMemoryUsageInfo* memInfo,
+ const NUdf::TUnboxedValue& inputData,
+ const TMeasureInputColumnOrder& columnOrder,
+ const NUdf::TUnboxedValue& matchedVars,
+ const TUnboxedValueVector& varNames,
+ ui64 matchNumber)
+: TComputationValue<TMeasureInputDataValue>(memInfo)
+, InputData(inputData)
+, ColumnOrder(columnOrder)
+, MatchedVars(matchedVars)
+, VarNames(varNames)
+, MatchNumber(matchNumber)
+{}
+
+bool TMeasureInputDataValue::HasFastListLength() const {
+ return true;
+}
+
+ui64 TMeasureInputDataValue::GetListLength() const {
+ return GetDictLength();
+}
+
+ui64 TMeasureInputDataValue::GetEstimatedListLength() const {
+ return GetListLength();
+}
+
+NUdf::TUnboxedValue TMeasureInputDataValue::GetListIterator() const {
+ return GetPayloadsIterator();
+}
+
+bool TMeasureInputDataValue::HasListItems() const {
+ return HasDictItems();
+}
+
+NUdf::IBoxedValuePtr TMeasureInputDataValue::ToIndexDictImpl(const NUdf::IValueBuilder& builder) const {
+ Y_UNUSED(builder);
+ return const_cast<TMeasureInputDataValue*>(this);
+}
+
+ui64 TMeasureInputDataValue::GetDictLength() const {
+ return InputData.GetDictLength();
+}
+
+NUdf::TUnboxedValue TMeasureInputDataValue::GetDictIterator() const {
+ return InputData.GetDictIterator();
+}
+
+NUdf::TUnboxedValue TMeasureInputDataValue::GetKeysIterator() const {
+ return InputData.GetKeysIterator();
+}
+
+NUdf::TUnboxedValue TMeasureInputDataValue::GetPayloadsIterator() const {
+ return InputData.GetPayloadsIterator();
+}
+
+bool TMeasureInputDataValue::Contains(const NUdf::TUnboxedValuePod& key) const {
+ return InputData.Contains(key);
+}
+
+NUdf::TUnboxedValue TMeasureInputDataValue::Lookup(const NUdf::TUnboxedValuePod& key) const {
+ return NUdf::TUnboxedValuePod{new TRowForMeasureValue(
+ GetMemInfo(),
+ InputData.Lookup(key),
+ key.Get<ui64>(),
+ ColumnOrder,
+ MatchedVars,
+ VarNames,
+ MatchNumber
+ )};
+}
+
+bool TMeasureInputDataValue::HasDictItems() const {
+ return InputData.HasDictItems();
+}
+
+} // namespace NKikimr::NMiniKQL::NMatchRecognize
diff --git a/yql/essentials/minikql/comp_nodes/mkql_match_recognize_measure_arg.h b/yql/essentials/minikql/comp_nodes/mkql_match_recognize_measure_arg.h
index e738bbe9fb..db143e9b30 100644
--- a/yql/essentials/minikql/comp_nodes/mkql_match_recognize_measure_arg.h
+++ b/yql/essentials/minikql/comp_nodes/mkql_match_recognize_measure_arg.h
@@ -1,67 +1,27 @@
#pragma once
-#include "mkql_match_recognize_matched_vars.h"
-#include <yql/essentials/minikql/computation/mkql_computation_node_impl.h>
#include <yql/essentials/core/sql_types/match_recognize.h>
+#include <yql/essentials/minikql/computation/mkql_computation_node_holders.h>
+#include <yql/essentials/minikql/computation/mkql_computation_node_impl.h>
#include <yql/essentials/minikql/mkql_string_util.h>
namespace NKikimr::NMiniKQL::NMatchRecognize {
-using NYql::NMatchRecognize::EMeasureInputDataSpecialColumns;
-
-using TMeasureInputColumnOrder = std::vector<std::pair<EMeasureInputDataSpecialColumns, size_t>, TMKQLAllocator<std::pair<EMeasureInputDataSpecialColumns, size_t>>>;
+using TMeasureInputColumnOrder = TMKQLVector<std::pair<NYql::NMatchRecognize::EMeasureInputDataSpecialColumns, size_t>>;
-//Input row augmented with lightweight special columns for calculating MEASURE lambdas
-class TRowForMeasureValue: public TComputationValue<TRowForMeasureValue>
-{
+class TRowForMeasureValue final : public TComputationValue<TRowForMeasureValue> {
public:
TRowForMeasureValue(
- TMemoryUsageInfo* memInfo,
- NUdf::TUnboxedValue inputRow,
- ui64 rowIndex,
- const TMeasureInputColumnOrder& columnOrder,
- const NUdf::TUnboxedValue& matchedVars,
- const TUnboxedValueVector& varNames,
- ui64 matchNumber
- )
- : TComputationValue<TRowForMeasureValue>(memInfo)
- , InputRow(inputRow)
- , RowIndex(rowIndex)
- , ColumnOrder(columnOrder)
- , MatchedVars(matchedVars)
- , VarNames(varNames)
- , MatchNumber(matchNumber)
- {}
+ TMemoryUsageInfo* memInfo,
+ NUdf::TUnboxedValue inputRow,
+ ui64 rowIndex,
+ const TMeasureInputColumnOrder& columnOrder,
+ const NUdf::TUnboxedValue& matchedVars,
+ const TUnboxedValueVector& varNames,
+ ui64 matchNumber);
+
+ NUdf::TUnboxedValue GetElement(ui32 index) const final;
- NUdf::TUnboxedValue GetElement(ui32 index) const override {
- switch(ColumnOrder[index].first) {
- case EMeasureInputDataSpecialColumns::Classifier: {
- auto varIterator = MatchedVars.GetListIterator();
- MKQL_ENSURE(varIterator, "Internal logic error");
- NUdf::TUnboxedValue var;
- size_t varIndex = 0;
- while(varIterator.Next(var)) {
- auto rangeIterator = var.GetListIterator();
- MKQL_ENSURE(varIterator, "Internal logic error");
- NUdf::TUnboxedValue range;
- while(rangeIterator.Next(range)) {
- const auto from = range.GetElement(0).Get<ui64>();
- const auto to = range.GetElement(1).Get<ui64>();
- if (RowIndex >= from and RowIndex <= to) {
- return VarNames[varIndex];
- }
- }
- ++varIndex;
- }
- MKQL_ENSURE(MatchedVars.GetListLength() == varIndex, "Internal logic error");
- return MakeString("");
- }
- case EMeasureInputDataSpecialColumns::MatchNumber:
- return NUdf::TUnboxedValuePod(MatchNumber);
- case EMeasureInputDataSpecialColumns::Last: //Last corresponds to columns from the input table row
- return InputRow.GetElement(ColumnOrder[index].second);
- }
- }
private:
const NUdf::TUnboxedValue InputRow;
const ui64 RowIndex;
@@ -71,51 +31,32 @@ private:
ui64 MatchNumber;
};
-class TMeasureInputDataValue: public TComputationValue<TMeasureInputDataValue> {
- using Base = TComputationValue<TMeasureInputDataValue>;
+class TMeasureInputDataValue final : public TComputationValue<TMeasureInputDataValue> {
public:
- TMeasureInputDataValue(TMemoryUsageInfo* memInfo,
- const NUdf::TUnboxedValue& inputData,
- const TMeasureInputColumnOrder& columnOrder,
- const NUdf::TUnboxedValue& matchedVars,
- const TUnboxedValueVector& varNames,
- ui64 matchNumber)
- : Base(memInfo)
- , InputData(inputData)
- , ColumnOrder(columnOrder)
- , MatchedVars(matchedVars)
- , VarNames(varNames)
- , MatchNumber(matchNumber)
- {}
-
- bool HasFastListLength() const override {
- return InputData.HasFastListLength();
- }
+ TMeasureInputDataValue(
+ TMemoryUsageInfo* memInfo,
+ const NUdf::TUnboxedValue& inputData,
+ const TMeasureInputColumnOrder& columnOrder,
+ const NUdf::TUnboxedValue& matchedVars,
+ const TUnboxedValueVector& varNames,
+ ui64 matchNumber);
- ui64 GetListLength() const override {
- return InputData.GetListLength();
- }
+ bool HasFastListLength() const final;
+ ui64 GetListLength() const final;
+ ui64 GetEstimatedListLength() const final;
+ NUdf::TUnboxedValue GetListIterator() const final;
+ bool HasListItems() const final;
- //TODO https://st.yandex-team.ru/YQL-16508
- //NUdf::TUnboxedValue GetListIterator() const override;
+ NUdf::IBoxedValuePtr ToIndexDictImpl(const NUdf::IValueBuilder& builder) const final;
- NUdf::IBoxedValuePtr ToIndexDictImpl(const NUdf::IValueBuilder& builder) const override {
- Y_UNUSED(builder);
- return const_cast<TMeasureInputDataValue*>(this);
- }
+ ui64 GetDictLength() const final;
+ NUdf::TUnboxedValue GetDictIterator() const final;
+ NUdf::TUnboxedValue GetKeysIterator() const final;
+ NUdf::TUnboxedValue GetPayloadsIterator() const final;
+ bool Contains(const NUdf::TUnboxedValuePod& key) const final;
+ NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const final;
+ bool HasDictItems() const final;
- NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override {
- auto inputRow = InputData.Lookup(key);
- return NUdf::TUnboxedValuePod{new TRowForMeasureValue(
- GetMemInfo(),
- inputRow,
- key.Get<ui64>(),
- ColumnOrder,
- MatchedVars,
- VarNames,
- MatchNumber
- )};
- }
private:
const NUdf::TUnboxedValue InputData;
const TMeasureInputColumnOrder& ColumnOrder;
@@ -124,5 +65,4 @@ private:
const ui64 MatchNumber;
};
-}//namespace NKikimr::NMiniKQL::NMatchRecognize
-
+} // namespace NKikimr::NMiniKQL::NMatchRecognize
diff --git a/yql/essentials/minikql/comp_nodes/mkql_match_recognize_rows_formatter.cpp b/yql/essentials/minikql/comp_nodes/mkql_match_recognize_rows_formatter.cpp
index 74b28b5ab9..9178a9fbb2 100644
--- a/yql/essentials/minikql/comp_nodes/mkql_match_recognize_rows_formatter.cpp
+++ b/yql/essentials/minikql/comp_nodes/mkql_match_recognize_rows_formatter.cpp
@@ -63,6 +63,9 @@ public:
const TSparseList& rows,
const NUdf::TUnboxedValue& partitionKey,
const TNfaTransitionGraph& graph) override {
+ if (Max<size_t>() == CurrentRowIndex_) {
+ return NUdf::TUnboxedValue{};
+ }
return GetMatchRow(ctx, rows, partitionKey, graph);
}
@@ -93,6 +96,8 @@ private:
if (auto iter = ToIndexToMatchRangeLookup_.lower_bound(CurrentRowIndex_);
iter == ToIndexToMatchRangeLookup_.end()) {
MKQL_ENSURE(false, "Internal logic error");
+ } else if (CurrentRowIndex_ < iter->second.From()) {
+ ++CurrentRowIndex_;
} else if (auto transition = std::get_if<TMatchedVarTransition>(&graph.Transitions.at(iter->second.NfaIndex()));
!transition) {
MKQL_ENSURE(false, "Internal logic error");
@@ -106,9 +111,10 @@ private:
return NUdf::TUnboxedValue{};
}
const auto result = DoGetMatchRow(ctx, rows, partitionKey, graph);
- ++CurrentRowIndex_;
if (CurrentRowIndex_ == Match_.EndIndex) {
Clear();
+ } else {
+ ++CurrentRowIndex_;
}
return result;
}
diff --git a/yql/essentials/minikql/comp_nodes/ut/mkql_match_recognize_list_ut.cpp b/yql/essentials/minikql/comp_nodes/ut/mkql_match_recognize_list_ut.cpp
index 7578cc2ebe..d3be80cfe6 100644
--- a/yql/essentials/minikql/comp_nodes/ut/mkql_match_recognize_list_ut.cpp
+++ b/yql/essentials/minikql/comp_nodes/ut/mkql_match_recognize_list_ut.cpp
@@ -5,73 +5,49 @@
namespace NKikimr::NMiniKQL::NMatchRecognize {
-template<class L>
-void CommonForSimpleAndSparse(const THolderFactory& holderFactory) {
- using TList = L;
- using TRange = typename L::TRange;
- TList list;
- TRange r;
- for (ui64 i = 0; i != 10; ++i) {
- r = list.Append(NUdf::TUnboxedValuePod{i});
- UNIT_ASSERT_VALUES_EQUAL(1, r.Size());
- NUdf::TUnboxedValue v = list.Get(i);
- UNIT_ASSERT_VALUES_EQUAL(i, v.Get<ui64>());
- }
- UNIT_ASSERT_VALUES_EQUAL(10, list.Size());
- {
- auto r2 = list.Append(NUdf::TUnboxedValuePod{10});
- Y_UNUSED(r2);
- r.Extend();
- }
- UNIT_ASSERT_VALUES_EQUAL(11, list.Size());
- {
- const NUdf::TUnboxedValue& v = list.Get(10);
- UNIT_ASSERT_VALUES_EQUAL(10, v.Get<ui64>());
- }
- //Test access via value
- const NUdf::TUnboxedValue& listValue = holderFactory.Create<TListValue<L>>(list);
- UNIT_ASSERT(listValue);
- UNIT_ASSERT(listValue.HasValue());
- UNIT_ASSERT(listValue.HasListItems());
- UNIT_ASSERT(listValue.HasFastListLength());
- UNIT_ASSERT_VALUES_EQUAL(11, listValue.GetListLength());
- TDefaultValueBuilder valueBuilder(holderFactory);
- auto listValueAsDict = NUdf::TBoxedValueAccessor::ToIndexDictImpl(*listValue.AsBoxed(), TDefaultValueBuilder(holderFactory));
- {
- const NUdf::TUnboxedValue &v = NUdf::TBoxedValueAccessor::Lookup(*listValueAsDict, NUdf::TUnboxedValuePod{9});
- UNIT_ASSERT_VALUES_EQUAL(9, v.Get<ui64>());
- }
- {
- const NUdf::TUnboxedValue &v = NUdf::TBoxedValueAccessor::Lookup(*listValueAsDict, NUdf::TUnboxedValuePod{10});
- UNIT_ASSERT_VALUES_EQUAL(10, v.Get<ui64>());
- }
-}
-
Y_UNIT_TEST_SUITE(MatchRecognizeList) {
TMemoryUsageInfo memUsage("MatchRecognizeListTest");
- Y_UNIT_TEST(SimpleListCommon) {
- TScopedAlloc alloc(__LOCATION__);
- THolderFactory holderFactory(alloc.Ref(), memUsage);
- CommonForSimpleAndSparse<TSimpleList>(holderFactory);
- }
Y_UNIT_TEST(SparseListCommon) {
TScopedAlloc alloc(__LOCATION__);
THolderFactory holderFactory(alloc.Ref(), memUsage);
- CommonForSimpleAndSparse<TSparseList>(holderFactory);
- }
- Y_UNIT_TEST(SimpleListSpecific) {
- TScopedAlloc alloc(__LOCATION__);
- THolderFactory holderFactory(alloc.Ref(), memUsage);
- TSimpleList list;
- for (ui64 i = 0; i != 10; ++i) {
- list.Append(NUdf::TUnboxedValuePod{i});
- }
- //All added items are accessible regardless of held ranges(locks)
+ TSparseList list;
+ TSparseList::TRange r;
for (ui64 i = 0; i != 10; ++i) {
+ r = list.Append(NUdf::TUnboxedValuePod{i});
+ UNIT_ASSERT_VALUES_EQUAL(1, r.Size());
NUdf::TUnboxedValue v = list.Get(i);
UNIT_ASSERT_VALUES_EQUAL(i, v.Get<ui64>());
}
+ UNIT_ASSERT_VALUES_EQUAL(10, list.LastRowIndex());
+ {
+ auto r2 = list.Append(NUdf::TUnboxedValuePod{10});
+ Y_UNUSED(r2);
+ r.Extend();
+ }
+ UNIT_ASSERT_VALUES_EQUAL(11, list.LastRowIndex());
+ {
+ const NUdf::TUnboxedValue& v = list.Get(10);
+ UNIT_ASSERT_VALUES_EQUAL(10, v.Get<ui64>());
+ }
+ //Test access via value
+ const NUdf::TUnboxedValue& listValue = holderFactory.Create<TListValue>(list);
+ UNIT_ASSERT(listValue);
+ UNIT_ASSERT(listValue.HasValue());
+ UNIT_ASSERT(listValue.HasListItems());
+ UNIT_ASSERT(listValue.HasFastListLength());
+ UNIT_ASSERT_VALUES_EQUAL(2, listValue.GetListLength());
+ TDefaultValueBuilder valueBuilder(holderFactory);
+ auto listValueAsDict = NUdf::TBoxedValueAccessor::ToIndexDictImpl(*listValue.AsBoxed(), TDefaultValueBuilder(holderFactory));
+ {
+ const NUdf::TUnboxedValue &v = NUdf::TBoxedValueAccessor::Lookup(*listValueAsDict, NUdf::TUnboxedValuePod{9});
+ UNIT_ASSERT_VALUES_EQUAL(9, v.Get<ui64>());
+ }
+ {
+ const NUdf::TUnboxedValue &v = NUdf::TBoxedValueAccessor::Lookup(*listValueAsDict, NUdf::TUnboxedValuePod{10});
+ UNIT_ASSERT_VALUES_EQUAL(10, v.Get<ui64>());
+ }
}
+
Y_UNIT_TEST(SparseListSpecific) {
TScopedAlloc alloc(__LOCATION__);
THolderFactory holderFactory(alloc.Ref(), memUsage);
@@ -81,7 +57,7 @@ Y_UNIT_TEST_SUITE(MatchRecognizeList) {
list.Append(NUdf::TUnboxedValuePod{i});
}
//Check no one is stored
- UNIT_ASSERT_VALUES_EQUAL(0, list.Filled());
+ UNIT_ASSERT_VALUES_EQUAL(0, list.Size());
for (ui64 i = 0; i != 10; ++i) {
NUdf::TUnboxedValue v = list.Get(i);
UNIT_ASSERT(!v);
@@ -92,7 +68,7 @@ Y_UNIT_TEST_SUITE(MatchRecognizeList) {
r = list.Append(NUdf::TUnboxedValuePod{i});
}
//Check that only the last is stored
- UNIT_ASSERT_VALUES_EQUAL(1, list.Filled());
+ UNIT_ASSERT_VALUES_EQUAL(1, list.Size());
for (ui64 i = 0; i != 19; ++i) {
NUdf::TUnboxedValue v = list.Get(i);
UNIT_ASSERT(!v);
@@ -106,26 +82,26 @@ Y_UNIT_TEST_SUITE(MatchRecognizeList) {
TSparseList::TRange copiedRange{r};
TSparseList::TRange assignedRange{r};
assignedRange = copiedRange;
- UNIT_ASSERT_VALUES_EQUAL(1, list.Filled());
+ UNIT_ASSERT_VALUES_EQUAL(1, list.Size());
{
NUdf::TUnboxedValue v = list.Get(19);
UNIT_ASSERT_VALUES_EQUAL(19, v.Get<ui64>());
}
r.Release();
- UNIT_ASSERT_VALUES_EQUAL(1, list.Filled());
+ UNIT_ASSERT_VALUES_EQUAL(1, list.Size());
{
NUdf::TUnboxedValue v = list.Get(19);
UNIT_ASSERT_VALUES_EQUAL(19, v.Get<ui64>());
}
- UNIT_ASSERT_VALUES_EQUAL(1, list.Filled());
+ UNIT_ASSERT_VALUES_EQUAL(1, list.Size());
copiedRange.Release();
- UNIT_ASSERT_VALUES_EQUAL(1, list.Filled());
+ UNIT_ASSERT_VALUES_EQUAL(1, list.Size());
{
NUdf::TUnboxedValue v = list.Get(19);
UNIT_ASSERT_VALUES_EQUAL(19, v.Get<ui64>());
}
assignedRange.Release();
- UNIT_ASSERT_VALUES_EQUAL(0, list.Filled());
+ UNIT_ASSERT_VALUES_EQUAL(0, list.Size());
{
NUdf::TUnboxedValue v = list.Get(19);
UNIT_ASSERT(!v);
@@ -133,4 +109,4 @@ Y_UNIT_TEST_SUITE(MatchRecognizeList) {
}
}
-}//namespace NKikimr::NMiniKQL::TMatchRecognize
+} // namespace NKikimr::NMiniKQL::NMatchRecognize
diff --git a/yql/essentials/minikql/comp_nodes/ut/mkql_match_recognize_matched_vars_ut.cpp b/yql/essentials/minikql/comp_nodes/ut/mkql_match_recognize_matched_vars_ut.cpp
index b744e3d53b..368dedfa01 100644
--- a/yql/essentials/minikql/comp_nodes/ut/mkql_match_recognize_matched_vars_ut.cpp
+++ b/yql/essentials/minikql/comp_nodes/ut/mkql_match_recognize_matched_vars_ut.cpp
@@ -4,6 +4,105 @@
namespace NKikimr::NMiniKQL::NMatchRecognize {
+class TSimpleList {
+public:
+ using iterator = TUnboxedValueVector::const_iterator;
+
+ [[nodiscard]] iterator Begin() const noexcept {
+ return Rows.begin();
+ }
+
+ [[nodiscard]] iterator End() const noexcept {
+ return Rows.end();
+ }
+
+ [[nodiscard]] size_t Size() const noexcept {
+ return Rows.size();
+ }
+
+ [[nodiscard]] bool Empty() const noexcept {
+ return Rows.empty();
+ }
+
+ [[nodiscard]] bool Contains(size_t i) const noexcept {
+ return i < Size();
+ }
+
+ [[nodiscard]] NUdf::TUnboxedValue Get(size_t i) const {
+ return Rows.at(i);
+ }
+
+ ///Range that includes starting and ending points
+ ///Can not be empty
+ class TRange {
+ public:
+ TRange()
+ : FromIndex(Max())
+ , ToIndex(Max())
+ , NfaIndex_(Max())
+ {
+ }
+
+ explicit TRange(ui64 index)
+ : FromIndex(index)
+ , ToIndex(index)
+ , NfaIndex_(Max())
+ {
+ }
+
+ TRange(ui64 from, ui64 to)
+ : FromIndex(from)
+ , ToIndex(to)
+ , NfaIndex_(Max())
+ {
+ MKQL_ENSURE(FromIndex <= ToIndex, "Internal logic error");
+ }
+
+ bool IsValid() const {
+ return FromIndex != Max<size_t>() && ToIndex != Max<size_t>();
+ }
+
+ size_t From() const {
+ MKQL_ENSURE(IsValid(), "Internal logic error");
+ return FromIndex;
+ }
+
+ size_t To() const {
+ MKQL_ENSURE(IsValid(), "Internal logic error");
+ return ToIndex;
+ }
+
+ [[nodiscard]] size_t NfaIndex() const {
+ MKQL_ENSURE(IsValid(), "Internal logic error");
+ return NfaIndex_;
+ }
+
+ size_t Size() const {
+ MKQL_ENSURE(IsValid(), "Internal logic error");
+ return ToIndex - FromIndex + 1;
+ }
+
+ void Extend() {
+ MKQL_ENSURE(IsValid(), "Internal logic error");
+ ++ToIndex;
+ }
+
+ private:
+ size_t FromIndex;
+ size_t ToIndex;
+ size_t NfaIndex_;
+ };
+
+ TRange Append(NUdf::TUnboxedValue&& value) {
+ TRange result(Rows.size());
+ Rows.push_back(std::move(value));
+ return result;
+ }
+
+private:
+ TUnboxedValueVector Rows;
+};
+
Y_UNIT_TEST_SUITE(MatchRecognizeMatchedVarExtend) {
using TRange = TSimpleList::TRange;
using TMatchedVar = TMatchedVar<TRange>;
@@ -230,4 +329,4 @@ Y_UNIT_TEST_SUITE(MatchRecognizeMatchedVarsToValueByRef) {
}
}
}
-}//namespace NKikimr::NMiniKQL::TMatchRecognize
+} // namespace NKikimr::NMiniKQL::NMatchRecognize
diff --git a/yql/essentials/minikql/comp_nodes/ya.make.inc b/yql/essentials/minikql/comp_nodes/ya.make.inc
index fbf5b04108..29656cf128 100644
--- a/yql/essentials/minikql/comp_nodes/ya.make.inc
+++ b/yql/essentials/minikql/comp_nodes/ya.make.inc
@@ -80,6 +80,8 @@ SET(ORIG_SOURCES
mkql_mapnext.cpp
mkql_map_join.cpp
mkql_match_recognize.cpp
+ mkql_match_recognize_list.cpp
+ mkql_match_recognize_measure_arg.cpp
mkql_match_recognize_rows_formatter.cpp
mkql_multihopping.cpp
mkql_multimap.cpp
diff --git a/yql/essentials/minikql/computation/mkql_computation_node_holders.h b/yql/essentials/minikql/computation/mkql_computation_node_holders.h
index f2bb09006d..23219cc5aa 100644
--- a/yql/essentials/minikql/computation/mkql_computation_node_holders.h
+++ b/yql/essentials/minikql/computation/mkql_computation_node_holders.h
@@ -28,6 +28,11 @@ class TMemoryUsageInfo;
const ui32 CodegenArraysFallbackLimit = 1000u;
+template <typename Type, EMemorySubPool MemoryPool = EMemorySubPool::Default>
+using TMKQLVector = std::vector<Type, TMKQLAllocator<Type, MemoryPool>>;
+template<typename Key, typename T, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>, EMemorySubPool MemoryPool = EMemorySubPool::Default>
+using TMKQLHashMap = std::unordered_map<Key, T, Hash, KeyEqual, TMKQLAllocator<std::pair<const Key, T>, MemoryPool>>;
+
using TKeyTypes = std::vector<std::pair<NUdf::EDataSlot, bool>>;
using TUnboxedValueVector = std::vector<NUdf::TUnboxedValue, TMKQLAllocator<NUdf::TUnboxedValue>>;
using TTemporaryUnboxedValueVector = std::vector<NUdf::TUnboxedValue, TMKQLAllocator<NUdf::TUnboxedValue, EMemorySubPool::Temporary>>;
@@ -1074,7 +1079,7 @@ public: //unavailable getters may be eliminated at compile time, but it'd make c
private:
TKeyTypes KeyTypes;
bool IsTuple = false;
-
+
//unsused pointers may be eliminated at compile time, but it'd make code much less readable
NUdf::IEquate::TPtr Equate;
NUdf::IHash::TPtr Hash;
diff --git a/yql/essentials/mount/lib/yql/aggregate.yqls b/yql/essentials/mount/lib/yql/aggregate.yqls
index b2450bf239..e687adbf80 100755
--- a/yql/essentials/mount/lib/yql/aggregate.yqls
+++ b/yql/essentials/mount/lib/yql/aggregate.yqls
@@ -34,6 +34,8 @@
(let bit_and_traits_factory_raw (lambda '(list_type) (Apply simple_traits_factory list_type (lambda '(one two) (BitAnd one two)))))
(let bit_or_traits_factory_raw (lambda '(list_type) (Apply simple_traits_factory list_type (lambda '(one two) (BitOr one two)))))
(let bit_xor_traits_factory_raw (lambda '(list_type) (Apply simple_traits_factory list_type (lambda '(one two) (BitXor one two)))))
+(let first_traits_factory_raw (lambda '(list_type) (Apply simple_traits_factory list_type (lambda '(lhs rhs) (Coalesce rhs lhs)))))
+(let last_traits_factory_raw (lambda '(list_type) (Apply simple_traits_factory list_type (lambda '(lhs rhs) (Coalesce lhs rhs)))))
# list_type:type
# support optional values
@@ -592,7 +594,7 @@
(let max_traits_factory_opt (lambda '(list_type) (Apply simple_traits_factory list_type (lambda '(one two) (AggrMax one two)))))
(let sum_traits_factory_opt (lambda '(list_type) (block '(
(let item_type (Apply remove_optional_type (ListItemType list_type)))
- (return (Apply simple_traits_factory_map list_type
+ (return (Apply simple_traits_factory_map list_type
(lambda '(one two) (AggrAdd one two))
(lambda '(value) (MatchType item_type 'Interval (lambda '() (Apply convert_interval_to_decimal value)) (lambda '() (WidenIntegral value))))
(lambda '(value) (MatchType item_type 'Interval (lambda '() (Apply convert_decimal_to_interval value)) (lambda '() value)))
@@ -601,6 +603,8 @@
(let bit_and_traits_factory_opt (lambda '(list_type) (Apply optional_traits_factory list_type bit_and_traits_factory_raw)))
(let bit_or_traits_factory_opt (lambda '(list_type) (Apply optional_traits_factory list_type bit_or_traits_factory_raw)))
(let bit_xor_traits_factory_opt (lambda '(list_type) (Apply optional_traits_factory list_type bit_xor_traits_factory_raw)))
+(let first_traits_factory_opt (lambda '(list_type) (Apply optional_traits_factory list_type first_traits_factory_raw)))
+(let last_traits_factory_opt (lambda '(list_type) (Apply optional_traits_factory list_type last_traits_factory_raw)))
(let avg_traits_factory_opt (lambda '(list_type) (Apply optional_traits_factory list_type avg_traits_factory_raw)))
(let variance_0_0_traits_factory_opt (lambda '(list_type) (Apply optional_traits_factory list_type variance_0_0_traits_factory_raw)))
(let variance_1_0_traits_factory_opt (lambda '(list_type) (Apply optional_traits_factory list_type variance_1_0_traits_factory_raw)))
@@ -674,6 +678,8 @@
(let bit_and_traits_factory (lambda '(list_type extractor) (Apply extractor_traits_factory list_type extractor bit_and_traits_factory_opt)))
(let bit_or_traits_factory (lambda '(list_type extractor) (Apply extractor_traits_factory list_type extractor bit_or_traits_factory_opt)))
(let bit_xor_traits_factory (lambda '(list_type extractor) (Apply extractor_traits_factory list_type extractor bit_xor_traits_factory_opt)))
+(let first_traits_factory (lambda '(list_type extractor) (Apply extractor_traits_factory list_type extractor first_traits_factory_raw)))
+(let last_traits_factory (lambda '(list_type extractor) (Apply extractor_traits_factory list_type extractor last_traits_factory_raw)))
(let and_traits_factory (lambda '(list_type extractor) (Apply extractor_traits_factory list_type (lambda '(v) (SafeCast (Apply extractor v) (DataType 'Bool))) bool_and_traits_factory_opt)))
(let or_traits_factory (lambda '(list_type extractor) (Apply extractor_traits_factory list_type (lambda '(v) (SafeCast (Apply extractor v) (DataType 'Bool))) bool_or_traits_factory_opt)))
(let xor_traits_factory (lambda '(list_type extractor) (Apply extractor_traits_factory list_type (lambda '(v) (SafeCast (Apply extractor v) (DataType 'Bool))) bool_xor_traits_factory_opt)))
@@ -963,6 +969,8 @@
(export bit_and_traits_factory)
(export bit_or_traits_factory)
(export bit_xor_traits_factory)
+(export first_traits_factory)
+(export last_traits_factory)
(export and_traits_factory)
(export or_traits_factory)
(export xor_traits_factory)
diff --git a/yql/essentials/sql/v1/builtin.cpp b/yql/essentials/sql/v1/builtin.cpp
index c9953c6e0d..8e49ba39f2 100644
--- a/yql/essentials/sql/v1/builtin.cpp
+++ b/yql/essentials/sql/v1/builtin.cpp
@@ -3149,10 +3149,6 @@ struct TBuiltinFuncData {
// Hopping intervals time functions
{"hopstart", BuildSimpleBuiltinFactoryCallback<THoppingTime<true>>()},
{"hopend", BuildSimpleBuiltinFactoryCallback<THoppingTime<false>>()},
-
- //MatchRecognize navigation functions
- {"first", BuildNamedBuiltinFactoryCallback<TMatchRecognizeNavigate>("FIRST")},
- {"last", BuildNamedBuiltinFactoryCallback<TMatchRecognizeNavigate>("LAST")},
};
return builtinFuncs;
}
@@ -3269,6 +3265,10 @@ struct TBuiltinFuncData {
{"firstvalueignorenulls", BuildAggrFuncFactoryCallback("FirstValueIgnoreNulls", "first_value_ignore_nulls_traits_factory", {OverWindow})},
{"lastvalueignorenulls", BuildAggrFuncFactoryCallback("LastValueIgnoreNulls", "last_value_ignore_nulls_traits_factory", {OverWindow})},
{"nthvalueignorenulls", BuildAggrFuncFactoryCallback("NthValueIgnoreNulls", "nth_value_ignore_nulls_traits_factory", {OverWindow}, NTH_VALUE)},
+
+ // MatchRecognize navigation functions
+ {"first", BuildAggrFuncFactoryCallback("First", "first_traits_factory")},
+ {"last", BuildAggrFuncFactoryCallback("Last", "last_traits_factory")},
};
return aggrFuncs;
}
@@ -3633,7 +3633,17 @@ TNodePtr BuildBuiltinFunc(TContext& ctx, TPosition pos, TString name, const TVec
return new TInvalidBuiltin(pos, TStringBuilder() << "Unknown aggregation function: " << *args[0]->GetLiteral("String"));
}
- return (*aggrCallback).second(pos, args, aggMode, true).Release();
+ switch (ctx.GetColumnReferenceState()) {
+ case EColumnRefState::MatchRecognizeMeasures:
+ [[fallthrough]];
+ case EColumnRefState::MatchRecognizeDefine:
+ return new TInvalidBuiltin(pos, "Cannot use aggregation factory inside the MATCH_RECOGNIZE context");
+ default:
+ if ("first" == aggNormalizedName || "last" == aggNormalizedName) {
+ return new TInvalidBuiltin(pos, "Cannot use FIRST and LAST outside the MATCH_RECOGNIZE context");
+ }
+ return (*aggrCallback).second(pos, args, aggMode, true);
+ }
}
if (normalizedName == "aggregateby" || normalizedName == "multiaggregateby") {
@@ -3651,7 +3661,19 @@ TNodePtr BuildBuiltinFunc(TContext& ctx, TPosition pos, TString name, const TVec
auto aggrCallback = aggrFuncs.find(normalizedName);
if (aggrCallback != aggrFuncs.end()) {
- return (*aggrCallback).second(pos, args, aggMode, false).Release();
+ switch (ctx.GetColumnReferenceState()) {
+ case EColumnRefState::MatchRecognizeMeasures: {
+ auto result = (*aggrCallback).second(pos, args, aggMode, false);
+ return BuildMatchRecognizeVarAccess(pos, std::move(result));
+ }
+ case EColumnRefState::MatchRecognizeDefine:
+ return BuildMatchRecognizeDefineAggregate(ctx.Pos(), normalizedName, args);
+ default:
+ if ("first" == normalizedName || "last" == normalizedName) {
+ return new TInvalidBuiltin(pos, "Cannot use FIRST and LAST outside the MATCH_RECOGNIZE context");
+ }
+ return (*aggrCallback).second(pos, args, aggMode, false);
+ }
}
if (aggMode == EAggregateMode::Distinct || aggMode == EAggregateMode::OverWindowDistinct) {
return new TInvalidBuiltin(pos, "DISTINCT can only be used in aggregation functions");
diff --git a/yql/essentials/sql/v1/context.h b/yql/essentials/sql/v1/context.h
index 85a4739eaf..31185c0d77 100644
--- a/yql/essentials/sql/v1/context.h
+++ b/yql/essentials/sql/v1/context.h
@@ -85,7 +85,9 @@ namespace NSQLTranslationV1 {
Allow,
AsStringLiteral,
AsPgType,
- MatchRecognize,
+ MatchRecognizeMeasures,
+ MatchRecognizeDefine,
+ MatchRecognizeDefineAggregate,
};
class TContext {
@@ -204,12 +206,36 @@ namespace NSQLTranslationV1 {
return TopLevelColumnReferenceState;
}
- TStringBuf GetMatchRecognizeDefineVar() const {
- YQL_ENSURE(EColumnRefState::MatchRecognize == ColumnReferenceState,
- "DefineVar can only be accessed within processing of MATCH_RECOGNIZE lambdas");
+ [[nodiscard]] TString GetMatchRecognizeDefineVar() const {
+ YQL_ENSURE(EColumnRefState::MatchRecognizeMeasures == ColumnReferenceState
+ || EColumnRefState::MatchRecognizeDefine == ColumnReferenceState
+ || EColumnRefState::MatchRecognizeDefineAggregate == ColumnReferenceState,
+ "MATCH_RECOGNIZE Var can only be accessed within processing of MATCH_RECOGNIZE lambdas");
return MatchRecognizeDefineVar;
}
+ TString ExtractMatchRecognizeAggrVar() {
+ YQL_ENSURE(EColumnRefState::MatchRecognizeMeasures == ColumnReferenceState
+ || EColumnRefState::MatchRecognizeDefine == ColumnReferenceState
+ || EColumnRefState::MatchRecognizeDefineAggregate == ColumnReferenceState,
+ "MATCH_RECOGNIZE Var can only be accessed within processing of MATCH_RECOGNIZE lambdas");
+ return std::exchange(MatchRecognizeAggrVar, "");
+ }
+
+ [[nodiscard]] bool SetMatchRecognizeAggrVar(TString var) {
+ YQL_ENSURE(EColumnRefState::MatchRecognizeMeasures == ColumnReferenceState
+ || EColumnRefState::MatchRecognizeDefine == ColumnReferenceState
+ || EColumnRefState::MatchRecognizeDefineAggregate == ColumnReferenceState,
+ "MATCH_RECOGNIZE Var can only be accessed within processing of MATCH_RECOGNIZE lambdas");
+ if (MatchRecognizeAggrVar.empty()) {
+ MatchRecognizeAggrVar = std::move(var);
+ } else if (MatchRecognizeAggrVar != var) {
+ Error() << "Illegal use of aggregates or navigation operators in MATCH_RECOGNIZE";
+ return false;
+ }
+ return true;
+ }
+
TVector<NSQLTranslation::TSQLHint> PullHintForToken(NYql::TPosition tokenPos);
void WarnUnusedHints();
@@ -231,6 +257,7 @@ namespace NSQLTranslationV1 {
EColumnRefState ColumnReferenceState = EColumnRefState::Deny;
EColumnRefState TopLevelColumnReferenceState = EColumnRefState::Deny;
TString MatchRecognizeDefineVar;
+ TString MatchRecognizeAggrVar;
TString NoColumnErrorContext = "in current scope";
TVector<TBlocks*> CurrentBlocks;
@@ -347,7 +374,13 @@ namespace NSQLTranslationV1 {
} else {
Ctx.ColumnReferenceState = state;
}
- YQL_ENSURE(defineVar.empty() || EColumnRefState::MatchRecognize == state, "Internal logic error");
+ YQL_ENSURE(
+ defineVar.empty()
+ || EColumnRefState::MatchRecognizeMeasures == state
+ || EColumnRefState::MatchRecognizeDefine == state
+ || EColumnRefState::MatchRecognizeDefineAggregate == state,
+ "Internal logic error"
+ );
ctx.MatchRecognizeDefineVar = defineVar;
}
diff --git a/yql/essentials/sql/v1/match_recognize.cpp b/yql/essentials/sql/v1/match_recognize.cpp
index 84a20ae273..be2f508e87 100644
--- a/yql/essentials/sql/v1/match_recognize.cpp
+++ b/yql/essentials/sql/v1/match_recognize.cpp
@@ -2,111 +2,302 @@
#include "source.h"
#include "context.h"
+#include <util/generic/overloaded.h>
+
namespace NSQLTranslationV1 {
namespace {
-const auto VarDataName = "data";
-const auto VarMatchedVarsName = "vars";
-const auto VarLastRowIndexName = "lri";
+constexpr auto VarDataName = "data";
+constexpr auto VarMatchedVarsName = "vars";
+constexpr auto VarLastRowIndexName = "lri";
+
+class TMatchRecognizeColumnAccessNode final : public TAstListNode {
+public:
+ TMatchRecognizeColumnAccessNode(TPosition pos, TString var, TString column)
+ : TAstListNode(pos)
+ , Var(std::move(var))
+ , Column(std::move(column)) {
+ }
+
+ const TString* GetColumnName() const override {
+ return std::addressof(Column);
+ }
+
+ bool DoInit(TContext& ctx, ISource* /* src */) override {
+ switch (ctx.GetColumnReferenceState()) {
+ case EColumnRefState::MatchRecognizeMeasures:
+ if (!ctx.SetMatchRecognizeAggrVar(Var)) {
+ return false;
+ }
+ Add(
+ "Member",
+ BuildAtom(Pos, "row"),
+ Q(Column)
+ );
+ break;
+ case EColumnRefState::MatchRecognizeDefine:
+ if (ctx.GetMatchRecognizeDefineVar() != Var) {
+ ctx.Error() << "Row pattern navigation function is required";
+ return false;
+ }
+ BuildLookup(VarLastRowIndexName);
+ break;
+ case EColumnRefState::MatchRecognizeDefineAggregate:
+ if (!ctx.SetMatchRecognizeAggrVar(Var)) {
+ return false;
+ }
+ BuildLookup("index");
+ break;
+ default:
+ Y_ABORT("Unexpected column reference state");
+ }
+ return true;
+ }
+
+ TNodePtr DoClone() const override {
+ return new TMatchRecognizeColumnAccessNode(Pos, Var, Column);
+ }
+
+private:
+ void BuildLookup(TString varKeyName) {
+ Add(
+ "Member",
+ Y(
+ "Lookup",
+ Y(
+ "ToIndexDict",
+ BuildAtom(Pos, VarDataName)
+ ),
+ BuildAtom(Pos, std::move(varKeyName))
+ ),
+ Q(Column)
+ );
+ }
+
+private:
+ TString Var;
+ TString Column;
+};
+
+class TMatchRecognizeDefineAggregate final : public TAstListNode {
+public:
+ TMatchRecognizeDefineAggregate(TPosition pos, TString name, TVector<TNodePtr> args)
+ : TAstListNode(pos)
+ , Name(std::move(name))
+ , Args(std::move(args)) {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Y_DEBUG_ABORT_UNLESS(ctx.GetColumnReferenceState() == EColumnRefState::MatchRecognizeDefine);
+ TColumnRefScope scope(ctx, EColumnRefState::MatchRecognizeDefineAggregate, false, ctx.GetMatchRecognizeDefineVar());
+ if (Args.size() != 1) {
+ ctx.Error() << "Exactly one argument is required in MATCH_RECOGNIZE navigation function";
+ return false;
+ }
+ const auto arg = Args[0];
+ if (!arg->Init(ctx, src)) {
+ return false;
+ }
+
+ const auto body = [&]() -> TNodePtr {
+ if ("first" == Name) {
+ return Y("Member", Y("Head", "item"), Q("From"));
+ } else if ("last" == Name) {
+ return Y("Member", Y("Last", "item"), Q("To"));
+ } else {
+ ctx.Error() << "Unknown row pattern navigation function: " << Name;
+ return {};
+ }
+ }();
+ if (!body) {
+ return false;
+ }
+ Add("Apply", BuildLambda(Pos, Y("index"), arg), body);
+ return true;
+ }
-} //namespace {
+ TNodePtr DoClone() const override {
+ return new TMatchRecognizeDefineAggregate(Pos, Name, Args);
+ }
-class TMatchRecognize: public TAstListNode {
+private:
+ TString Name;
+ TVector<TNodePtr> Args;
+};
+
+class TMatchRecognizeVarAccessNode final : public INode {
+public:
+ TMatchRecognizeVarAccessNode(TPosition pos, TNodePtr arg)
+ : INode(pos)
+ , Arg(std::move(arg)) {
+ }
+
+ [[nodiscard]] const TString& GetVar() const noexcept {
+ return Var;
+ }
+
+ TAggregationPtr GetAggregation() const override {
+ return Arg->GetAggregation();
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!Arg->Init(ctx, src)) {
+ return false;
+ }
+ Var = ctx.ExtractMatchRecognizeAggrVar();
+ Expr = [&]() -> TNodePtr {
+ switch (ctx.GetColumnReferenceState()) {
+ case EColumnRefState::MatchRecognizeMeasures:
+ return Arg;
+ case EColumnRefState::MatchRecognizeDefine:
+ return Y(
+ "Apply",
+ BuildLambda(Pos, Y("item"), Arg),
+ Y(
+ "Member",
+ BuildAtom(ctx.Pos(), VarMatchedVarsName),
+ Q(Var)
+ )
+ );
+ default:
+ Y_ABORT("Unexpected column reference state");
+ }
+ }();
+ return Expr->Init(ctx, src);
+ }
+
+ TNodePtr DoClone() const override {
+ return new TMatchRecognizeVarAccessNode(Pos, Arg);
+ }
+
+ TAstNode* Translate(TContext& ctx) const override {
+ return Expr->Translate(ctx);
+ }
+
+private:
+ TString Var;
+ TNodePtr Arg;
+ TNodePtr Expr;
+};
+
+class TMatchRecognize final : public TAstListNode {
public:
TMatchRecognize(
- TPosition pos,
- ISource* source,
- const TString& inputTable,
- std::pair<TPosition, TVector<TNamedFunction>>&& partitioners,
- std::pair<TPosition, TVector<TSortSpecificationPtr>>&& sortSpecs,
- std::pair<TPosition, TVector<TNamedFunction>>&& measures,
- std::pair<TPosition, NYql::NMatchRecognize::ERowsPerMatch>&& rowsPerMatch,
- std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo>&& skipTo,
- std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern,
- std::pair<TPosition, TNodePtr>&& subset,
- std::pair<TPosition, TVector<TNamedFunction>>&& definitions
- ): TAstListNode(pos, {BuildAtom(pos, "block")})
- {
- Add(BuildBlockStatements(
- pos,
- source,
- inputTable,
- std::move(partitioners),
- std::move(sortSpecs),
- std::move(measures),
- std::move(rowsPerMatch),
- std::move(skipTo),
- std::move(pattern),
- std::move(subset),
- std::move(definitions)
- ));
+ TPosition pos,
+ TString label,
+ TNodePtr partitionKeySelector,
+ TNodePtr partitionColumns,
+ TVector<TSortSpecificationPtr> sortSpecs,
+ TVector<TNamedFunction> measures,
+ TNodePtr rowsPerMatch,
+ TNodePtr skipTo,
+ TNodePtr pattern,
+ TNodePtr patternVars,
+ TNodePtr subset,
+ TVector<TNamedFunction> definitions)
+ : TAstListNode(pos)
+ , Label(std::move(label))
+ , PartitionKeySelector(std::move(partitionKeySelector))
+ , PartitionColumns(std::move(partitionColumns))
+ , SortSpecs(std::move(sortSpecs))
+ , Measures(std::move(measures))
+ , RowsPerMatch(std::move(rowsPerMatch))
+ , SkipTo(std::move(skipTo))
+ , Pattern(std::move(pattern))
+ , PatternVars(std::move(patternVars))
+ , Subset(std::move(subset))
+ , Definitions(std::move(definitions)) {
}
+
private:
- TMatchRecognize(const TMatchRecognize& other)
- : TAstListNode(other.Pos)
- {
- Nodes = CloneContainer(other.Nodes);
- }
-
- TNodePtr BuildBlockStatements(
- TPosition pos,
- ISource* source,
- const TString& inputTable,
- std::pair<TPosition, TVector<TNamedFunction>>&& partitioners,
- std::pair<TPosition, TVector<TSortSpecificationPtr>>&& sortSpecs,
- std::pair<TPosition, TVector<TNamedFunction>>&& measures,
- std::pair<TPosition, NYql::NMatchRecognize::ERowsPerMatch>&& rowsPerMatch,
- std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo>&& skipTo,
- std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern,
- std::pair<TPosition, TNodePtr>&& subset,
- std::pair<TPosition, TVector<TNamedFunction>>&& definitions
- ) {
- Y_UNUSED(pos);
-
- auto inputRowType = Y("ListItemType",Y("TypeOf", inputTable));
-
- auto patternNode = Pattern(pattern.first, pattern.second);
-
- auto partitionColumns = Y();
- for (const auto& p: partitioners.second){
- partitionColumns->Add(BuildQuotedAtom(p.callable->GetPos(), p.name));
- }
- partitionColumns = Q(partitionColumns);
- auto partitionKeySelector = Y();
- for (const auto& p: partitioners.second){
- partitionKeySelector->Add(p.callable);
- }
- partitionKeySelector = BuildLambda(partitioners.first, Y("row"), Q(partitionKeySelector));
+ bool DoInit(TContext& ctx, ISource* src) override {
+ auto inputRowType = Y("ListItemType", Y("TypeOf", Label));
+
+ if (!PartitionKeySelector->Init(ctx, src)) {
+ return false;
+ }
+ if (!PartitionColumns->Init(ctx, src)) {
+ return false;
+ }
+
+ const auto sortTraits = SortSpecs.empty() ? Y("Void") : src->BuildSortSpec(SortSpecs, Label, true, false);
+ if (!sortTraits->Init(ctx, src)) {
+ return false;
+ }
auto measureNames = Y();
- for (const auto& m: measures.second){
- measureNames->Add(BuildQuotedAtom(m.callable->GetPos(), m.name));
+ for (const auto& m: Measures) {
+ measureNames->Add(BuildQuotedAtom(m.Callable->GetPos(), m.Name));
}
- TNodePtr measuresNode = Y("MatchRecognizeMeasures", inputRowType, patternNode, Q(measureNames));
- for (const auto& m: measures.second){
- measuresNode->Add(BuildLambda(m.callable->GetPos(), Y(VarDataName, VarMatchedVarsName), m.callable));
+ auto measureVars = Y();
+ for (const auto& m: Measures) {
+ TColumnRefScope scope(ctx, EColumnRefState::MatchRecognizeMeasures);
+ if (!m.Callable->Init(ctx, src)) {
+ return false;
+ }
+ const auto varAccess = dynamic_cast<TMatchRecognizeVarAccessNode*>(m.Callable.Get());
+ auto var = varAccess ? varAccess->GetVar() : "";
+ measureVars->Add(BuildQuotedAtom(m.Callable->GetPos(), std::move(var)));
}
- auto defineNames = Y();
- for (const auto& d: definitions.second) {
- defineNames->Add(BuildQuotedAtom(d.callable->GetPos(), d.name));
+ auto measuresNode = Y("MatchRecognizeMeasuresAggregates", inputRowType, Q(PatternVars), Q(measureNames), Q(measureVars));
+ for (const auto& m: Measures) {
+ auto aggr = m.Callable->GetAggregation();
+ if (!aggr) {
+ // TODO(YQL-16508): support aggregations inside expressions
+ // ctx.Error(m.Callable->GetPos()) << "Cannot use aggregations inside expression";
+ // return false;
+ measuresNode->Add(m.Callable);
+ } else {
+ const auto [traits, result] = aggr->AggregationTraits(Y("TypeOf", Label), false, false, false, ctx);
+ if (!result) {
+ return false;
+ }
+ measuresNode->Add(traits);
+ }
+ }
+
+ if (!RowsPerMatch->Init(ctx, src)) {
+ return false;
}
- TNodePtr defineNode = Y("MatchRecognizeDefines", inputRowType, patternNode, Q(defineNames));
- for (const auto& d: definitions.second) {
- defineNode->Add(BuildLambda(d.callable->GetPos(), Y(VarDataName, VarMatchedVarsName, VarLastRowIndexName), d.callable));
+ if (!SkipTo->Init(ctx, src)) {
+ return false;
+ }
+
+ if (!Pattern->Init(ctx, src)) {
+ return false;
+ }
+
+ if (!PatternVars->Init(ctx, src)) {
+ return false;
+ }
+
+ auto defineNames = Y();
+ for (const auto& d: Definitions) {
+ defineNames->Add(BuildQuotedAtom(d.Callable->GetPos(), d.Name));
+ }
+ auto defineNode = Y("MatchRecognizeDefines", inputRowType, Q(PatternVars), Q(defineNames));
+ for (const auto& d: Definitions) {
+ TColumnRefScope scope(ctx, EColumnRefState::MatchRecognizeDefine, true, d.Name);
+ if (!d.Callable->Init(ctx, src)) {
+ return false;
+ }
+ defineNode->Add(BuildLambda(d.Callable->GetPos(), Y(VarDataName, VarMatchedVarsName, VarLastRowIndexName), d.Callable));
}
- return Q(Y(
- Y("let", "input", inputTable),
- Y("let", "partitionKeySelector", partitionKeySelector),
- Y("let", "partitionColumns", partitionColumns),
- Y("let", "sortTraits", sortSpecs.second.empty()? Y("Void") : source->BuildSortSpec(sortSpecs.second, inputTable, true, false)),
+ Add(
+ "block",
+ Q(Y(
+ Y("let", "input", Label),
+ Y("let", "partitionKeySelector", PartitionKeySelector),
+ Y("let", "partitionColumns", PartitionColumns),
+ Y("let", "sortTraits", sortTraits),
Y("let", "measures", measuresNode),
- Y("let", "rowsPerMatch", BuildQuotedAtom(rowsPerMatch.first, "RowsPerMatch_" + ToString(rowsPerMatch.second))),
- Y("let", "skipTo", BuildTuple(skipTo.first, {Q("AfterMatchSkip_" + ToString(skipTo.second.To)), Q(ToString(skipTo.second.Var))})),
- Y("let", "pattern", patternNode),
- Y("let", "subset", subset.second ? subset.second : Q("")),
+ Y("let", "rowsPerMatch", RowsPerMatch),
+ Y("let", "skipTo", SkipTo),
+ Y("let", "pattern", Pattern),
+ Y("let", "subset", Subset ? Subset : Q("")),
Y("let", "define", defineNode),
Y("let", "res", Y("MatchRecognize",
"input",
@@ -122,133 +313,76 @@ private:
)
)),
Y("return", "res")
- ));
- }
-
- TPtr PatternFactor(const TPosition& pos, const NYql::NMatchRecognize::TRowPatternFactor& factor) {
- return BuildTuple(pos, {
- factor.Primary.index() == 0 ?
- BuildQuotedAtom(pos, std::get<0>(factor.Primary)) :
- Pattern(pos, std::get<1>(factor.Primary)),
- BuildQuotedAtom(pos, ToString(factor.QuantityMin)),
- BuildQuotedAtom(pos, ToString(factor.QuantityMax)),
- BuildQuotedAtom(pos, ToString(factor.Greedy)),
- BuildQuotedAtom(pos, ToString(factor.Output)),
- BuildQuotedAtom(pos, ToString(factor.Unused))
- });
- }
-
-
- TPtr PatternTerm(const TPosition& pos, const NYql::NMatchRecognize::TRowPatternTerm& term) {
- auto factors = Y();
- for (const auto& f: term)
- factors->Add(PatternFactor(pos, f));
- return Q(std::move(factors));
+ ))
+ );
+ return true;
}
- TPtr Pattern(const TPosition& pos, const NYql::NMatchRecognize::TRowPattern& pattern) {
- TNodePtr patternNode = Y("MatchRecognizePattern");
- for (const auto& t: pattern) {
- patternNode->Add(PatternTerm(pos, t));
- }
- return patternNode;
+ TNodePtr DoClone() const override {
+ return new TMatchRecognize(
+ Pos,
+ Label,
+ PartitionKeySelector,
+ PartitionColumns,
+ SortSpecs,
+ Measures,
+ RowsPerMatch,
+ SkipTo,
+ Pattern,
+ PatternVars,
+ Subset,
+ Definitions
+ );
}
- TPtr DoClone() const final{
- return new TMatchRecognize(*this);
- }
+private:
+ TString Label;
+ TNodePtr PartitionKeySelector;
+ TNodePtr PartitionColumns;
+ TVector<TSortSpecificationPtr> SortSpecs;
+ TVector<TNamedFunction> Measures;
+ TNodePtr RowsPerMatch;
+ TNodePtr SkipTo;
+ TNodePtr Pattern;
+ TNodePtr PatternVars;
+ TNodePtr Subset;
+ TVector<TNamedFunction> Definitions;
};
-TNodePtr TMatchRecognizeBuilder::Build(TContext& ctx, TString&& inputTable, ISource* source){
+} // anonymous namespace
+
+TNodePtr TMatchRecognizeBuilder::Build(TContext& ctx, TString label, ISource* src) {
TNodePtr node = new TMatchRecognize(
- Pos,
- source,
- std::move(inputTable),
- std::move(Partitioners),
- std::move(SortSpecs),
- std::move(Measures),
- std::move(RowsPerMatch),
- std::move(SkipTo),
- std::move(Pattern),
- std::move(Subset),
- std::move(Definitions)
+ Pos,
+ std::move(label),
+ std::move(PartitionKeySelector),
+ std::move(PartitionColumns),
+ std::move(SortSpecs),
+ std::move(Measures),
+ std::move(RowsPerMatch),
+ std::move(SkipTo),
+ std::move(Pattern),
+ std::move(PatternVars),
+ std::move(Subset),
+ std::move(Definitions)
);
- if (!node->Init(ctx, source))
- return nullptr;
+ if (!node->Init(ctx, src)) {
+ return {};
+ }
return node;
}
-namespace {
-const auto DefaultNavigatingFunction = "MatchRecognizeDefaultNavigating";
+TNodePtr BuildMatchRecognizeColumnAccess(TPosition pos, TString var, TString column) {
+ return MakeIntrusive<TMatchRecognizeColumnAccessNode>(pos, std::move(var), std::move(column));
}
-bool TMatchRecognizeVarAccessNode::DoInit(TContext& ctx, ISource* src) {
- //If referenced var is the var that is currently being defined
- //then it's a reference to the last row in a partition
- Node = new TMatchRecognizeNavigate(ctx.Pos(), DefaultNavigatingFunction, TVector<TNodePtr>{this->Clone()});
- return Node->Init(ctx, src);
+TNodePtr BuildMatchRecognizeDefineAggregate(TPosition pos, TString name, TVector<TNodePtr> args) {
+ const auto result = MakeIntrusive<TMatchRecognizeDefineAggregate>(pos, std::move(name), std::move(args));
+ return BuildMatchRecognizeVarAccess(pos, std::move(result));
}
-bool TMatchRecognizeNavigate::DoInit(TContext& ctx, ISource* src) {
- Y_UNUSED(src);
- if (Args.size() != 1) {
- ctx.Error(Pos) << "Exactly one argument is required in MATCH_RECOGNIZE navigation function";
- return false;
- }
- const auto varColumn = dynamic_cast<TMatchRecognizeVarAccessNode *>(Args[0].Get());
- if (not varColumn) {
- ctx.Error(Pos) << "Row pattern navigation operations are applicable to row pattern variable only";
- return false;
- }
- const auto varData = BuildAtom(ctx.Pos(), VarDataName);
- const auto varMatchedVars = BuildAtom(ctx.Pos(), VarMatchedVarsName);
- const auto varLastRowIndex = BuildAtom(ctx.Pos(), VarLastRowIndexName);
-
- const auto matchedRanges = Y("Member", varMatchedVars, Q(varColumn->GetVar()));
- TNodePtr navigatedRowIndex;
- if (DefaultNavigatingFunction == Name) {
- if (not varColumn->IsTheSameVar()) {
- ctx.Error(Pos) << "Row pattern navigation function is required";
- return false;
- }
- navigatedRowIndex = varLastRowIndex;
- }
- else if ("PREV" == Name) {
- if (not varColumn->IsTheSameVar()) {
- ctx.Error(Pos) << "PREV relative to matched vars is not implemented yet";
- return false;
- }
- navigatedRowIndex = Y(
- "-",
- varLastRowIndex,
- Y("Uint64", Q("1"))
- );
- } else if ("FIRST" == Name) {
- navigatedRowIndex = Y(
- "Member",
- Y("Head", matchedRanges),
- Q("From")
- );
- } else if ("LAST" == Name) {
- navigatedRowIndex = Y(
- "Member",
- Y("Last", matchedRanges),
- Q("To")
- );
- } else {
- ctx.Error(Pos) << "Internal logic error";
- return false;
- }
- Add("Member");
- Add(
- Y(
- "Lookup",
- Y("ToIndexDict", varData),
- navigatedRowIndex
- )
- ),
- Add(Q(varColumn->GetColumn()));
- return true;
+TNodePtr BuildMatchRecognizeVarAccess(TPosition pos, TNodePtr extractor) {
+ return MakeIntrusive<TMatchRecognizeVarAccessNode>(pos, std::move(extractor));
}
} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/match_recognize.h b/yql/essentials/sql/v1/match_recognize.h
index 4b0e98b9b7..818bd6e600 100644
--- a/yql/essentials/sql/v1/match_recognize.h
+++ b/yql/essentials/sql/v1/match_recognize.h
@@ -1,125 +1,64 @@
#pragma once
+
#include "node.h"
+
#include <yql/essentials/core/sql_types/match_recognize.h>
#include <util/generic/ptr.h>
namespace NSQLTranslationV1 {
struct TNamedFunction {
- TNodePtr callable; //Callable with some free args
- TString name;
+ TNodePtr Callable;
+ TString Name;
};
class TMatchRecognizeBuilder: public TSimpleRefCount<TMatchRecognizeBuilder> {
public:
TMatchRecognizeBuilder(
- TPosition clausePos,
- std::pair<TPosition, TVector<TNamedFunction>>&& partitioners,
- std::pair<TPosition, TVector<TSortSpecificationPtr>>&& sortSpecs,
- std::pair<TPosition, TVector<TNamedFunction>>&& measures,
- std::pair<TPosition, NYql::NMatchRecognize::ERowsPerMatch>&& rowsPerMatch,
- std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo>&& skipTo,
- std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern,
- std::pair<TPosition, TNodePtr>&& subset,
- std::pair<TPosition, TVector<TNamedFunction>>&& definitions
- )
- : Pos(clausePos)
- , Partitioners(std::move(partitioners))
- , SortSpecs(std::move(sortSpecs))
- , Measures(std::move(measures))
- , RowsPerMatch(std::move(rowsPerMatch))
- , SkipTo(std::move(skipTo))
- , Pattern(std::move(pattern))
- , Subset(std::move(subset))
- , Definitions(definitions)
-
+ TPosition pos,
+ TNodePtr partitionKeySelector,
+ TNodePtr partitionColumns,
+ TVector<TSortSpecificationPtr> sortSpecs,
+ TVector<TNamedFunction> measures,
+ TNodePtr rowsPerMatch,
+ TNodePtr skipTo,
+ TNodePtr pattern,
+ TNodePtr patternVars,
+ TNodePtr subset,
+ TVector<TNamedFunction> definitions)
+ : Pos(pos)
+ , PartitionKeySelector(std::move(partitionKeySelector))
+ , PartitionColumns(std::move(partitionColumns))
+ , SortSpecs(std::move(sortSpecs))
+ , Measures(std::move(measures))
+ , RowsPerMatch(std::move(rowsPerMatch))
+ , SkipTo(std::move(skipTo))
+ , Pattern(std::move(pattern))
+ , PatternVars(std::move(patternVars))
+ , Subset(std::move(subset))
+ , Definitions(std::move(definitions))
{}
- TNodePtr Build(TContext& ctx, TString&& inputTable, ISource* source);
-private:
- TPosition Pos;
- std::pair<TPosition, TVector<TNamedFunction>> Partitioners;
- std::pair<TPosition, TVector<TSortSpecificationPtr>> SortSpecs;
- std::pair<TPosition, TVector<TNamedFunction>> Measures;
- std::pair<TPosition, NYql::NMatchRecognize::ERowsPerMatch> RowsPerMatch;
- std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo> SkipTo;
- std::pair<TPosition, NYql::NMatchRecognize::TRowPattern> Pattern;
- std::pair<TPosition, TNodePtr> Subset;
- std::pair<TPosition, TVector<TNamedFunction>> Definitions;
-};
-
-using TMatchRecognizeBuilderPtr=TIntrusivePtr<TMatchRecognizeBuilder> ;
-
-class TMatchRecognizeVarAccessNode: public INode {
-public:
- TMatchRecognizeVarAccessNode(TPosition pos, const TString& var, const TString& column, bool theSameVar)
- : INode(pos)
- , Var(var)
- , TheSameVar(theSameVar)
- , Column(column)
- {
- }
-
- TString GetVar() const {
- return Var;
- }
-
- bool IsTheSameVar() const {
- return TheSameVar;
- }
-
- TString GetColumn() const {
- return Column;
- }
- bool DoInit(TContext& ctx, ISource* src) override;
-
- TAstNode* Translate(TContext& ctx) const override {
- return Node->Translate(ctx);
- }
-
- TPtr DoClone() const override {
- YQL_ENSURE(!Node, "TMatchRecognizeVarAccessNode::Clone: Node must not be initialized");
- auto copy = new TMatchRecognizeVarAccessNode(Pos, Var, Column, TheSameVar);
- return copy;
- }
-
-protected:
- void DoUpdateState() const override {
- YQL_ENSURE(Node);
- }
-
- void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const final {
- Y_DEBUG_ABORT_UNLESS(Node);
- Node->VisitTree(func, visited);
- }
+ TNodePtr Build(TContext& ctx, TString label, ISource* source);
private:
- TNodePtr Node;
- const TString Var;
- const bool TheSameVar; //reference the same var as being defined by this expression;
- const TString Column;
+ TPosition Pos;
+ TNodePtr PartitionKeySelector;
+ TNodePtr PartitionColumns;
+ TVector<TSortSpecificationPtr> SortSpecs;
+ TVector<TNamedFunction> Measures;
+ TNodePtr RowsPerMatch;
+ TNodePtr SkipTo;
+ TNodePtr Pattern;
+ TNodePtr PatternVars;
+ TNodePtr Subset;
+ TVector<TNamedFunction> Definitions;
};
-class TMatchRecognizeNavigate: public TAstListNode {
-public:
- TMatchRecognizeNavigate(TPosition pos, const TString& name, const TVector<TNodePtr>& args)
- : TAstListNode(pos)
- , Name(name)
- , Args(args)
- {
- }
-
-private:
- TNodePtr DoClone() const override {
- return new TMatchRecognizeNavigate(GetPos(), Name, CloneContainer(Args));
- }
-
- bool DoInit(TContext& ctx, ISource* src) override;
+using TMatchRecognizeBuilderPtr = TIntrusivePtr<TMatchRecognizeBuilder>;
-private:
- const TString Name;
- const TVector<TNodePtr> Args;
-};
+TNodePtr BuildMatchRecognizeColumnAccess(TPosition pos, TString var, TString column);
+TNodePtr BuildMatchRecognizeDefineAggregate(TPosition pos, TString name, TVector<TNodePtr> args);
+TNodePtr BuildMatchRecognizeVarAccess(TPosition pos, TNodePtr extractor);
} // namespace NSQLTranslationV1
-
diff --git a/yql/essentials/sql/v1/node.cpp b/yql/essentials/sql/v1/node.cpp
index 2339fa894e..eea9069abf 100644
--- a/yql/essentials/sql/v1/node.cpp
+++ b/yql/essentials/sql/v1/node.cpp
@@ -706,6 +706,14 @@ TAstNode* TAstDirectNode::Translate(TContext& ctx) const {
return Node;
}
+TNodePtr BuildList(TPosition pos, TVector<TNodePtr> nodes) {
+ return new TAstListNodeImpl(pos, std::move(nodes));
+}
+
+TNodePtr BuildQuote(TPosition pos, TNodePtr expr) {
+ return BuildList(pos, {BuildAtom(pos, "quote", TNodeFlags::Default), expr});
+}
+
TNodePtr BuildAtom(TPosition pos, const TString& content, ui32 flags, bool isOptionalArg) {
return new TAstAtomNodeImpl(pos, content, flags, isOptionalArg);
}
@@ -2669,10 +2677,6 @@ TNodePtr BuildAccess(TPosition pos, const TVector<INode::TIdPart>& ids, bool isL
return new TAccessNode(pos, ids, isLookup);
}
-TNodePtr BuildMatchRecognizeVarAccess(TPosition pos, const TString& var, const TString& column, bool theSameVar) {
- return new TMatchRecognizeVarAccessNode(pos, var, column, theSameVar);
-}
-
void WarnIfAliasFromSelectIsUsedInGroupBy(TContext& ctx, const TVector<TNodePtr>& selectTerms, const TVector<TNodePtr>& groupByTerms,
const TVector<TNodePtr>& groupByExprTerms)
{
diff --git a/yql/essentials/sql/v1/node.h b/yql/essentials/sql/v1/node.h
index c9402a2f97..c197f7ac90 100644
--- a/yql/essentials/sql/v1/node.h
+++ b/yql/essentials/sql/v1/node.h
@@ -1437,6 +1437,8 @@ namespace NSQLTranslationV1 {
TString TypeByAlias(const TString& alias, bool normalize = true);
+ TNodePtr BuildList(TPosition pos, TVector<TNodePtr> nodes = {});
+ TNodePtr BuildQuote(TPosition pos, TNodePtr expr);
TNodePtr BuildAtom(TPosition pos, const TString& content, ui32 flags = NYql::TNodeFlags::ArbitraryContent,
bool isOptionalArg = false);
TNodePtr BuildQuotedAtom(TPosition pos, const TString& content, ui32 flags = NYql::TNodeFlags::ArbitraryContent);
@@ -1471,7 +1473,6 @@ namespace NSQLTranslationV1 {
TNodePtr BuildColumn(TPosition pos, const TDeferredAtom& column, const TString& source = TString());
TNodePtr BuildColumnOrType(TPosition pos, const TString& column = TString());
TNodePtr BuildAccess(TPosition pos, const TVector<INode::TIdPart>& ids, bool isLookup);
- TNodePtr BuildMatchRecognizeVarAccess(TPosition pos, const TString& var, const TString& column, bool theSameVar);
TNodePtr BuildBind(TPosition pos, const TString& module, const TString& alias);
TNodePtr BuildLambda(TPosition pos, TNodePtr params, TNodePtr body, const TString& resName = TString());
TNodePtr BuildLambda(TPosition pos, TNodePtr params, const TVector<TNodePtr>& bodies);
diff --git a/yql/essentials/sql/v1/source.cpp b/yql/essentials/sql/v1/source.cpp
index 4231a9d370..c2fb13b103 100644
--- a/yql/essentials/sql/v1/source.cpp
+++ b/yql/essentials/sql/v1/source.cpp
@@ -288,7 +288,7 @@ inline TNodePtr ISource::AliasOrColumn(const TNodePtr& node, bool withSource) {
bool ISource::AddAggregationOverWindow(TContext& ctx, const TString& windowName, TAggregationPtr func) {
if (ctx.DistinctOverWindow) {
- YQL_ENSURE(func->IsOverWindow() || func->IsOverWindowDistinct());
+ YQL_ENSURE(func->IsOverWindow() || func->IsOverWindowDistinct());
} else {
YQL_ENSURE(func->IsOverWindow());
if (func->IsDistinct()) {
diff --git a/yql/essentials/sql/v1/sql_expression.cpp b/yql/essentials/sql/v1/sql_expression.cpp
index c2074cb50d..be401949c7 100644
--- a/yql/essentials/sql/v1/sql_expression.cpp
+++ b/yql/essentials/sql/v1/sql_expression.cpp
@@ -860,24 +860,7 @@ TNodePtr TSqlExpression::JsonApiExpr(const TRule_json_api_expr& node) {
return result;
}
-TNodePtr MatchRecognizeVarAccess(TTranslation& ctx, const TString& var, const TRule_an_id_or_type& suffix, bool theSameVar) {
- switch (suffix.GetAltCase()) {
- case TRule_an_id_or_type::kAltAnIdOrType1:
- break;
- case TRule_an_id_or_type::kAltAnIdOrType2:
- break;
- case TRule_an_id_or_type::ALT_NOT_SET:
- break;
- }
- const auto& column = Id(
- suffix.GetAlt_an_id_or_type1()
- .GetRule_id_or_type1().GetAlt_id_or_type1().GetRule_id1(),
- ctx
- );
- return BuildMatchRecognizeVarAccess(TPosition{}, var, column, theSameVar);
-}
-
-TNodePtr TSqlExpression::RowPatternVarAccess(const TString& alias, const TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2 block) {
+TNodePtr TSqlExpression::RowPatternVarAccess(TString var, const TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2 block) {
switch (block.GetAltCase()) {
case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt1:
break;
@@ -888,13 +871,10 @@ TNodePtr TSqlExpression::RowPatternVarAccess(const TString& alias, const TRule_u
case TRule_an_id_or_type::kAltAnIdOrType1: {
const auto &idOrType = block.GetAlt3().GetRule_an_id_or_type1().GetAlt_an_id_or_type1().GetRule_id_or_type1();
switch(idOrType.GetAltCase()) {
- case TRule_id_or_type::kAltIdOrType1:
- return BuildMatchRecognizeVarAccess(
- Ctx.Pos(),
- alias,
- Id(idOrType.GetAlt_id_or_type1().GetRule_id1(), *this),
- Ctx.GetMatchRecognizeDefineVar() == alias
- );
+ case TRule_id_or_type::kAltIdOrType1: {
+ const auto column = Id(idOrType.GetAlt_id_or_type1().GetRule_id1(), *this);
+ return BuildMatchRecognizeColumnAccess(Ctx.Pos(), std::move(var), std::move(column));
+ }
case TRule_id_or_type::kAltIdOrType2:
break;
case TRule_id_or_type::ALT_NOT_SET:
@@ -911,7 +891,7 @@ TNodePtr TSqlExpression::RowPatternVarAccess(const TString& alias, const TRule_u
case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::ALT_NOT_SET:
Y_ABORT("You should change implementation according to grammar changes");
}
- return TNodePtr{};
+ return {};
}
template<typename TUnaryCasualExprRule>
@@ -991,13 +971,12 @@ TNodePtr TSqlExpression::UnaryCasualExpr(const TUnaryCasualExprRule& node, const
case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt3: {
// In case of MATCH_RECOGNIZE lambdas
// X.Y is treated as Var.Column access
- if (isColumnRef && EColumnRefState::MatchRecognize == Ctx.GetColumnReferenceState()) {
- if (auto rowPatternVarAccess = RowPatternVarAccess(
- name,
- b.GetAlt3().GetBlock2())
- ) {
- return rowPatternVarAccess;
- }
+ if (isColumnRef && (
+ EColumnRefState::MatchRecognizeMeasures == Ctx.GetColumnReferenceState() ||
+ EColumnRefState::MatchRecognizeDefine == Ctx.GetColumnReferenceState() ||
+ EColumnRefState::MatchRecognizeDefineAggregate == Ctx.GetColumnReferenceState()
+ )) {
+ return RowPatternVarAccess(std::move(name), b.GetAlt3().GetBlock2());
}
break;
}
diff --git a/yql/essentials/sql/v1/sql_expression.h b/yql/essentials/sql/v1/sql_expression.h
index 4f9100722c..adcf44b11b 100644
--- a/yql/essentials/sql/v1/sql_expression.h
+++ b/yql/essentials/sql/v1/sql_expression.h
@@ -109,7 +109,7 @@ private:
TNodePtr BinOperList(const TString& opName, TVector<TNodePtr>::const_iterator begin, TVector<TNodePtr>::const_iterator end) const;
- TNodePtr RowPatternVarAccess(const TString& alias, const TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2 block);
+ TNodePtr RowPatternVarAccess(TString var, const TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2 block);
struct TCaseBranch {
TNodePtr Pred;
diff --git a/yql/essentials/sql/v1/sql_match_recognize.cpp b/yql/essentials/sql/v1/sql_match_recognize.cpp
index 41415b7f23..6fcd56f4d6 100644
--- a/yql/essentials/sql/v1/sql_match_recognize.cpp
+++ b/yql/essentials/sql/v1/sql_match_recognize.cpp
@@ -1,367 +1,467 @@
#include "sql_match_recognize.h"
+
#include "node.h"
#include "sql_expression.h"
+
#include <yql/essentials/core/sql_types/match_recognize.h>
-#include <algorithm>
namespace NSQLTranslationV1 {
-using namespace NSQLv1Generated;
-
-namespace {
-
-TPosition TokenPosition(const TToken& token){
- return TPosition{token.GetColumn(), token.GetLine()};
-}
-
-TString PatternVar(const TRule_row_pattern_variable_name& node, TSqlMatchRecognizeClause& ctx){
- return Id(node.GetRule_identifier1(), ctx);
-}
-
-} //namespace
+TSqlMatchRecognizeClause::TSqlMatchRecognizeClause(TContext& ctx, NSQLTranslation::ESqlMode mode) : TSqlTranslation(ctx, mode) {}
TMatchRecognizeBuilderPtr TSqlMatchRecognizeClause::CreateBuilder(const NSQLv1Generated::TRule_row_pattern_recognition_clause &matchRecognizeClause) {
- TPosition pos(matchRecognizeClause.GetToken1().GetColumn(), matchRecognizeClause.GetToken1().GetLine());
+ auto pos = GetPos(matchRecognizeClause.GetToken1());
if (!Ctx.FeatureR010) {
Ctx.Error(pos, TIssuesIds::CORE) << "Unexpected MATCH_RECOGNIZE";
return {};
}
- TVector<TNamedFunction> partitioners;
- TPosition partitionsPos = pos;
- if (matchRecognizeClause.HasBlock3()) {
- const auto& partitionClause = matchRecognizeClause.GetBlock3().GetRule_window_partition_clause1();
- partitionsPos = TokenPosition(partitionClause.GetToken1());
- partitioners = ParsePartitionBy(partitionClause);
- if (!partitioners)
- return {};
- }
- TVector<TSortSpecificationPtr> sortSpecs;
- TPosition orderByPos = pos;
- if (matchRecognizeClause.HasBlock4()) {
- const auto& orderByClause = matchRecognizeClause.GetBlock4().GetRule_order_by_clause1();
- orderByPos = TokenPosition(orderByClause.GetToken1());
- if (!OrderByClause(orderByClause, sortSpecs)) {
- return {};
- }
- }
- TPosition measuresPos = pos;
- TVector<TNamedFunction> measures;
- if (matchRecognizeClause.HasBlock5()) {
- const auto& measuresClause = matchRecognizeClause.GetBlock5().GetRule_row_pattern_measures1();
- measuresPos = TokenPosition(measuresClause.GetToken1());
- measures = ParseMeasures(measuresClause.GetRule_row_pattern_measure_list2());
+ auto [partitionKeySelector, partitionColumns] = ParsePartitionBy(
+ pos,
+ matchRecognizeClause.HasBlock3()
+ ? std::addressof(matchRecognizeClause.GetBlock3().GetRule_window_partition_clause1())
+ : nullptr
+ );
+
+ auto sortSpecs = ParseOrderBy(
+ matchRecognizeClause.HasBlock4()
+ ? std::addressof(matchRecognizeClause.GetBlock4().GetRule_order_by_clause1())
+ : nullptr
+ );
+ if (!sortSpecs) {
+ return {};
}
- auto rowsPerMatch = std::pair {pos, NYql::NMatchRecognize::ERowsPerMatch::OneRow};
- if (matchRecognizeClause.HasBlock6()) {
- rowsPerMatch = ParseRowsPerMatch(matchRecognizeClause.GetBlock6().GetRule_row_pattern_rows_per_match1());
+ auto measures = ParseMeasures(
+ matchRecognizeClause.HasBlock5()
+ ? std::addressof(matchRecognizeClause.GetBlock5().GetRule_row_pattern_measures1().GetRule_row_pattern_measure_list2())
+ : nullptr
+ );
+
+ auto rowsPerMatch = ParseRowsPerMatch(
+ pos,
+ matchRecognizeClause.HasBlock6()
+ ? std::addressof(matchRecognizeClause.GetBlock6().GetRule_row_pattern_rows_per_match1())
+ : nullptr
+ );
+ if (!rowsPerMatch) {
+ return {};
}
const auto& commonSyntax = matchRecognizeClause.GetRule_row_pattern_common_syntax7();
-
if (commonSyntax.HasBlock2()) {
const auto& initialOrSeek = commonSyntax.GetBlock2().GetRule_row_pattern_initial_or_seek1();
- Ctx.Error(TokenPosition(initialOrSeek.GetToken1())) << "InitialOrSeek subclause is not allowed in FROM clause";
+ Ctx.Error(GetPos(initialOrSeek.GetToken1())) << "InitialOrSeek subclause is not allowed in FROM clause";
return {};
}
- auto pattern = ParsePattern(commonSyntax.GetRule_row_pattern5());
- const auto& patternPos = TokenPosition(commonSyntax.token3());
-
- //this block is located before pattern block in grammar,
- // but depends on it, so it is processed after pattern block
- std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo> skipTo {
- pos,
- NYql::NMatchRecognize::TAfterMatchSkipTo{
- NYql::NMatchRecognize::EAfterMatchSkipTo::PastLastRow,
- TString()
- }
- };
- if (commonSyntax.HasBlock1()){
- skipTo = ParseAfterMatchSkipTo(commonSyntax.GetBlock1().GetRule_row_pattern_skip_to3());
- const auto varRequired =
- NYql::NMatchRecognize::EAfterMatchSkipTo::ToFirst == skipTo.second.To ||
- NYql::NMatchRecognize::EAfterMatchSkipTo::ToLast == skipTo.second.To ||
- NYql::NMatchRecognize::EAfterMatchSkipTo::To == skipTo.second.To;
- if (varRequired) {
- const auto& allVars = NYql::NMatchRecognize::GetPatternVars(pattern);
- if (allVars.find(skipTo.second.Var) == allVars.cend()) {
- Ctx.Error(skipTo.first) << "Unknown pattern variable in AFTER MATCH";
- return {};
- }
- }
+ PatternVarNames.clear();
+ PatternVars = BuildList(pos);
+ auto pattern = ParsePattern(pos, commonSyntax.GetRule_row_pattern5(), 0, true);
+ if (!pattern) {
+ return {};
}
+ auto skipTo = ParseAfterMatchSkipTo(
+ pos,
+ commonSyntax.HasBlock1()
+ ? std::addressof(commonSyntax.GetBlock1().GetRule_row_pattern_skip_to3())
+ : nullptr
+ );
+ if (!skipTo) {
+ return {};
+ }
- TNodePtr subset;
- TPosition subsetPos = pos;
- if (commonSyntax.HasBlock7()) {
- const auto& rowPatternSubset = commonSyntax.GetBlock7().GetRule_row_pattern_subset_clause1();
- subsetPos = TokenPosition(rowPatternSubset.GetToken1());
- Ctx.Error() << "SUBSET is not implemented yet";
- //TODO https://st.yandex-team.ru/YQL-16225
+ auto subset = ParseSubset(
+ pos,
+ commonSyntax.HasBlock7()
+ ? std::addressof(commonSyntax.GetBlock7().GetRule_row_pattern_subset_clause1())
+ : nullptr
+ );
+ if (!subset) {
return {};
}
- const auto& definitions = ParseDefinitions(commonSyntax.GetRule_row_pattern_definition_list9());
- const auto& definitionsPos = TokenPosition(commonSyntax.GetToken8());
- const auto& rowPatternVariables = GetPatternVars(pattern);
+ auto definitions = ParseDefinitions(commonSyntax.GetRule_row_pattern_definition_list9());
for (const auto& [callable, name]: definitions) {
- if (!rowPatternVariables.contains(name)) {
+ if (!PatternVarNames.contains(name)) {
Ctx.Error(callable->GetPos()) << "ROW PATTERN VARIABLE " << name << " is defined, but not mentioned in the PATTERN";
return {};
}
}
- return new TMatchRecognizeBuilder{
+ return new TMatchRecognizeBuilder(
pos,
- std::pair{partitionsPos, std::move(partitioners)},
- std::pair{orderByPos, std::move(sortSpecs)},
- std::pair{measuresPos, measures},
+ std::move(partitionKeySelector),
+ std::move(partitionColumns),
+ std::move(*sortSpecs),
+ std::move(measures),
std::move(rowsPerMatch),
std::move(skipTo),
- std::pair{patternPos, std::move(pattern)},
- std::pair{subsetPos, std::move(subset)},
- std::pair{definitionsPos, std::move(definitions)}
- };
-
+ std::move(pattern),
+ std::move(PatternVars),
+ std::move(*subset),
+ std::move(definitions)
+ );
+}
+std::tuple<TNodePtr, TNodePtr> TSqlMatchRecognizeClause::ParsePartitionBy(TPosition pos, const TRule_window_partition_clause* node) {
+ auto [partitionKeySelector, partitionColumns] = [&]() -> std::tuple<TNodePtr, TNodePtr> {
+ auto partitionKeySelector = BuildList(pos);
+ auto partitionColumns = BuildList(pos);
+ if (!node) {
+ return {partitionKeySelector, partitionColumns};
+ }
+ TColumnRefScope scope(Ctx, EColumnRefState::Allow);
+ TVector<TNodePtr> partitionExprs;
+ if (!NamedExprList(node->GetRule_named_expr_list4(), partitionExprs)) {
+ return {partitionKeySelector, partitionColumns};
+ }
+ for (const auto& p : partitionExprs) {
+ auto label = p->GetLabel();
+ if (!label && p->GetColumnName()) {
+ label = *p->GetColumnName();
+ }
+ partitionKeySelector->Add(p);
+ partitionColumns->Add(BuildQuotedAtom(p->GetPos(), label));
+ }
+ return {partitionKeySelector, partitionColumns};
+ }();
+ return {
+ BuildLambda(pos, BuildList(pos, {BuildAtom(pos, "row")}), BuildQuote(pos, std::move(partitionKeySelector))),
+ BuildQuote(pos, std::move(partitionColumns))
+ };
}
-TVector<TNamedFunction> TSqlMatchRecognizeClause::ParsePartitionBy(const TRule_window_partition_clause& partitionClause) {
- TColumnRefScope scope(Ctx, EColumnRefState::Allow);
- TVector<TNodePtr> partitionExprs;
- if (!NamedExprList(
- partitionClause.GetRule_named_expr_list4(),
- partitionExprs)) {
- return {};
+TMaybe<TVector<TSortSpecificationPtr>> TSqlMatchRecognizeClause::ParseOrderBy(const TRule_order_by_clause* node) {
+ if (!node) {
+ return TVector<TSortSpecificationPtr>{};
}
- TVector<TNamedFunction> partitioners;
- for (const auto& p: partitionExprs) {
- auto label = p->GetLabel();
- if (!label && p->GetColumnName()) {
- label = *p->GetColumnName();
- }
- partitioners.push_back(TNamedFunction{p, label});
+ TVector<TSortSpecificationPtr> result;
+ if (!OrderByClause(*node, result)) {
+ return {};
}
- return partitioners;
+ return result;
}
TNamedFunction TSqlMatchRecognizeClause::ParseOneMeasure(const TRule_row_pattern_measure_definition& node) {
- TColumnRefScope scope(Ctx, EColumnRefState::MatchRecognize);
- const auto& expr = TSqlExpression(Ctx, Mode).Build(node.GetRule_expr1());
- const auto& name = Id(node.GetRule_an_id3(), *this);
- //Each measure must be a lambda, that accepts 2 args:
+ TColumnRefScope scope(Ctx, EColumnRefState::MatchRecognizeMeasures);
+ auto callable = TSqlExpression(Ctx, Mode).Build(node.GetRule_expr1());
+ auto measureName = Id(node.GetRule_an_id3(), *this);
+ // Each measure must be a lambda, that accepts 2 args:
// - List<InputTableColumns + _yql_Classifier, _yql_MatchNumber>
// - Struct that maps row pattern variables to ranges in the queue
- return {expr, name};
+ return {std::move(callable), std::move(measureName)};
}
-TVector<TNamedFunction> TSqlMatchRecognizeClause::ParseMeasures(const TRule_row_pattern_measure_list& node) {
- TVector<TNamedFunction> result{ ParseOneMeasure(node.GetRule_row_pattern_measure_definition1()) };
- for (const auto& m: node.GetBlock2()) {
+TVector<TNamedFunction> TSqlMatchRecognizeClause::ParseMeasures(const TRule_row_pattern_measure_list* node) {
+ if (!node) {
+ return {};
+ }
+ TVector<TNamedFunction> result{ParseOneMeasure(node->GetRule_row_pattern_measure_definition1())};
+ for (const auto& m: node->GetBlock2()) {
result.push_back(ParseOneMeasure(m.GetRule_row_pattern_measure_definition2()));
}
return result;
}
-std::pair<TPosition, NYql::NMatchRecognize::ERowsPerMatch> TSqlMatchRecognizeClause::ParseRowsPerMatch(const TRule_row_pattern_rows_per_match& rowsPerMatchClause) {
-
- switch(rowsPerMatchClause.GetAltCase()) {
- case TRule_row_pattern_rows_per_match::kAltRowPatternRowsPerMatch1:
- return std::pair {
- TokenPosition(rowsPerMatchClause.GetAlt_row_pattern_rows_per_match1().GetToken1()),
- NYql::NMatchRecognize::ERowsPerMatch::OneRow
- };
- case TRule_row_pattern_rows_per_match::kAltRowPatternRowsPerMatch2:
- return std::pair {
- TokenPosition(rowsPerMatchClause.GetAlt_row_pattern_rows_per_match2().GetToken1()),
- NYql::NMatchRecognize::ERowsPerMatch::AllRows
- };
+TNodePtr TSqlMatchRecognizeClause::ParseRowsPerMatch(TPosition pos, const TRule_row_pattern_rows_per_match* node) {
+ const auto result = [&]() -> NYql::NMatchRecognize::ERowsPerMatch {
+ if (!node) {
+ return NYql::NMatchRecognize::ERowsPerMatch::OneRow;
+ }
+ switch (node->GetAltCase()) {
+ case TRule_row_pattern_rows_per_match::kAltRowPatternRowsPerMatch1: {
+ const auto& rowsPerMatch = node->GetAlt_row_pattern_rows_per_match1();
+ pos = GetPos(rowsPerMatch.GetToken1());
+ return NYql::NMatchRecognize::ERowsPerMatch::OneRow;
+ }
+ case TRule_row_pattern_rows_per_match::kAltRowPatternRowsPerMatch2: {
+ const auto& rowsPerMatch = node->GetAlt_row_pattern_rows_per_match2();
+ pos = GetPos(rowsPerMatch.GetToken1());
+ return NYql::NMatchRecognize::ERowsPerMatch::AllRows;
+ }
case TRule_row_pattern_rows_per_match::ALT_NOT_SET:
Y_ABORT("You should change implementation according to grammar changes");
- }
+ }
+ }();
+ return BuildQuotedAtom(pos, "RowsPerMatch_" + ToString(result));
}
-std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo> TSqlMatchRecognizeClause::ParseAfterMatchSkipTo(const TRule_row_pattern_skip_to& skipToClause) {
- switch (skipToClause.GetAltCase()) {
- case TRule_row_pattern_skip_to::kAltRowPatternSkipTo1:
- return std::pair{
- TokenPosition(skipToClause.GetAlt_row_pattern_skip_to1().GetToken1()),
- NYql::NMatchRecognize::TAfterMatchSkipTo{NYql::NMatchRecognize::EAfterMatchSkipTo::NextRow, ""}
- };
- case TRule_row_pattern_skip_to::kAltRowPatternSkipTo2:
- return std::pair{
- TokenPosition(skipToClause.GetAlt_row_pattern_skip_to2().GetToken1()),
- NYql::NMatchRecognize::TAfterMatchSkipTo{NYql::NMatchRecognize::EAfterMatchSkipTo::PastLastRow, ""}
- };
- case TRule_row_pattern_skip_to::kAltRowPatternSkipTo3:
- return std::pair{
- TokenPosition(skipToClause.GetAlt_row_pattern_skip_to3().GetToken1()),
- NYql::NMatchRecognize::TAfterMatchSkipTo{
- NYql::NMatchRecognize::EAfterMatchSkipTo::ToFirst,
- skipToClause.GetAlt_row_pattern_skip_to3().GetRule_row_pattern_skip_to_variable_name4().GetRule_row_pattern_variable_name1().GetRule_identifier1().GetToken1().GetValue()
- }
- };
- case TRule_row_pattern_skip_to::kAltRowPatternSkipTo4:
- return std::pair{
- TokenPosition(skipToClause.GetAlt_row_pattern_skip_to4().GetToken1()),
- NYql::NMatchRecognize::TAfterMatchSkipTo{
- NYql::NMatchRecognize::EAfterMatchSkipTo::ToLast,
- skipToClause.GetAlt_row_pattern_skip_to4().GetRule_row_pattern_skip_to_variable_name4().GetRule_row_pattern_variable_name1().GetRule_identifier1().GetToken1().GetValue()
- }
- };
- case TRule_row_pattern_skip_to::kAltRowPatternSkipTo5:
- return std::pair{
- TokenPosition(skipToClause.GetAlt_row_pattern_skip_to5().GetToken1()),
- NYql::NMatchRecognize::TAfterMatchSkipTo{
- NYql::NMatchRecognize::EAfterMatchSkipTo::To,
- skipToClause.GetAlt_row_pattern_skip_to5().GetRule_row_pattern_skip_to_variable_name3().GetRule_row_pattern_variable_name1().GetRule_identifier1().GetToken1().GetValue()
- }
- };
+TNodePtr TSqlMatchRecognizeClause::ParseAfterMatchSkipTo(TPosition pos, const TRule_row_pattern_skip_to* node) {
+ auto skipToPos = pos;
+ auto varPos = pos;
+ const auto result = [&]() -> TMaybe<NYql::NMatchRecognize::TAfterMatchSkipTo> {
+ if (!node) {
+ return NYql::NMatchRecognize::TAfterMatchSkipTo{NYql::NMatchRecognize::EAfterMatchSkipTo::PastLastRow, ""};
+ }
+ switch (node->GetAltCase()) {
+ case TRule_row_pattern_skip_to::kAltRowPatternSkipTo1: {
+ const auto& skipTo = node->GetAlt_row_pattern_skip_to1();
+ skipToPos = GetPos(skipTo.GetToken1());
+ return NYql::NMatchRecognize::TAfterMatchSkipTo{NYql::NMatchRecognize::EAfterMatchSkipTo::NextRow, ""};
+ }
+ case TRule_row_pattern_skip_to::kAltRowPatternSkipTo2: {
+ const auto& skipTo = node->GetAlt_row_pattern_skip_to2();
+ skipToPos = GetPos(skipTo.GetToken1());
+ return NYql::NMatchRecognize::TAfterMatchSkipTo{NYql::NMatchRecognize::EAfterMatchSkipTo::PastLastRow, ""};
+ }
+ case TRule_row_pattern_skip_to::kAltRowPatternSkipTo3: {
+ const auto& skipTo = node->GetAlt_row_pattern_skip_to3();
+ skipToPos = GetPos(skipTo.GetToken1());
+ const auto& identifier = skipTo.GetRule_row_pattern_skip_to_variable_name4().GetRule_row_pattern_variable_name1().GetRule_identifier1();
+ auto var = identifier.GetToken1().GetValue();
+ varPos = GetPos(identifier.GetToken1());
+ if (!PatternVarNames.contains(var)) {
+ Ctx.Error(varPos) << "Unknown pattern variable in AFTER MATCH SKIP TO FIRST";
+ return {};
+ }
+ return NYql::NMatchRecognize::TAfterMatchSkipTo{NYql::NMatchRecognize::EAfterMatchSkipTo::ToFirst, std::move(var)};
+ }
+ case TRule_row_pattern_skip_to::kAltRowPatternSkipTo4: {
+ const auto& skipTo = node->GetAlt_row_pattern_skip_to4();
+ skipToPos = GetPos(skipTo.GetToken1());
+ const auto& identifier = skipTo.GetRule_row_pattern_skip_to_variable_name4().GetRule_row_pattern_variable_name1().GetRule_identifier1();
+ auto var = identifier.GetToken1().GetValue();
+ varPos = GetPos(identifier.GetToken1());
+ if (!PatternVarNames.contains(var)) {
+ Ctx.Error(varPos) << "Unknown pattern variable in AFTER MATCH SKIP TO LAST";
+ return {};
+ }
+ return NYql::NMatchRecognize::TAfterMatchSkipTo{NYql::NMatchRecognize::EAfterMatchSkipTo::ToLast, std::move(var)};
+ }
+ case TRule_row_pattern_skip_to::kAltRowPatternSkipTo5: {
+ const auto& skipTo = node->GetAlt_row_pattern_skip_to5();
+ skipToPos = GetPos(skipTo.GetToken1());
+ const auto& identifier = skipTo.GetRule_row_pattern_skip_to_variable_name3().GetRule_row_pattern_variable_name1().GetRule_identifier1();
+ auto var = identifier.GetToken1().GetValue();
+ varPos = GetPos(identifier.GetToken1());
+ if (!PatternVarNames.contains(var)) {
+ Ctx.Error(varPos) << "Unknown pattern variable in AFTER MATCH SKIP TO";
+ return {};
+ }
+ return NYql::NMatchRecognize::TAfterMatchSkipTo{NYql::NMatchRecognize::EAfterMatchSkipTo::To, std::move(var)};
+ }
case TRule_row_pattern_skip_to::ALT_NOT_SET:
Y_ABORT("You should change implementation according to grammar changes");
+ }
+ }();
+ if (!result) {
+ return {};
}
+ return BuildTuple(pos, {
+ BuildQuotedAtom(skipToPos, "AfterMatchSkip_" + ToString(result->To)),
+ BuildQuotedAtom(varPos, std::move(result->Var))
+ });
}
-NYql::NMatchRecognize::TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTerm(const TRule_row_pattern_term& node, size_t patternNestingLevel, bool outputArg) {
- NYql::NMatchRecognize::TRowPatternTerm term;
- TPosition pos;
- for (const auto& factor: node.GetBlock1()) {
- const auto& primaryVar = factor.GetRule_row_pattern_factor1().GetRule_row_pattern_primary1();
- NYql::NMatchRecognize::TRowPatternPrimary primary;
- bool output = outputArg;
- switch (primaryVar.GetAltCase()) {
- case TRule_row_pattern_primary::kAltRowPatternPrimary1:
- primary = PatternVar(primaryVar.GetAlt_row_pattern_primary1().GetRule_row_pattern_primary_variable_name1().GetRule_row_pattern_variable_name1(), *this);
- break;
- case TRule_row_pattern_primary::kAltRowPatternPrimary2:
- primary = primaryVar.GetAlt_row_pattern_primary2().GetToken1().GetValue();
- Y_ENSURE("$" == std::get<0>(primary));
- break;
- case TRule_row_pattern_primary::kAltRowPatternPrimary3:
- primary = primaryVar.GetAlt_row_pattern_primary3().GetToken1().GetValue();
- Y_ENSURE("^" == std::get<0>(primary));
- break;
- case TRule_row_pattern_primary::kAltRowPatternPrimary4: {
- if (patternNestingLevel <= NYql::NMatchRecognize::MaxPatternNesting) {
- primary = ParsePattern(primaryVar.GetAlt_row_pattern_primary4().GetBlock2().GetRule_row_pattern1(), patternNestingLevel + 1, output);
- } else {
- Ctx.Error(TokenPosition(primaryVar.GetAlt_row_pattern_primary4().GetToken1()))
- << "To big nesting level in the pattern";
- return NYql::NMatchRecognize::TRowPatternTerm{};
- }
- break;
+TNodePtr TSqlMatchRecognizeClause::BuildPatternFactor(TPosition pos, TNodePtr primary, std::tuple<ui64, ui64, bool, bool, bool> quantifier) {
+ return std::apply([&](const auto& ...args) {
+ return BuildTuple(pos, {std::move(primary), BuildQuotedAtom(pos, ToString(args))...});
+ }, quantifier);
+}
+
+TNodePtr TSqlMatchRecognizeClause::ParsePatternFactor(TPosition pos, const TRule_row_pattern_factor& node, size_t nestingLevel, bool output) {
+ if (nestingLevel > MaxPatternNesting) {
+ Ctx.Error(pos) << "To big nesting level in the pattern";
+ return {};
+ }
+ auto primary = [&]() -> TNodePtr {
+ const auto& primaryAlt = node.GetRule_row_pattern_primary1();
+ switch (primaryAlt.GetAltCase()) {
+ case TRule_row_pattern_primary::kAltRowPatternPrimary1: {
+ const auto& primary = primaryAlt.GetAlt_row_pattern_primary1();
+ const auto& identifier = primary.GetRule_row_pattern_primary_variable_name1().GetRule_row_pattern_variable_name1().GetRule_identifier1();
+ const auto varName = Id(identifier, *this);
+ const auto var = BuildQuotedAtom(GetPos(identifier.GetToken1()), varName);
+ if (PatternVarNames.insert(varName).second) {
+ PatternVars->Add(var);
}
- case TRule_row_pattern_primary::kAltRowPatternPrimary5:
- output = false;
- primary = ParsePattern(primaryVar.GetAlt_row_pattern_primary5().GetRule_row_pattern3(), patternNestingLevel + 1, output);
- break;
- case TRule_row_pattern_primary::kAltRowPatternPrimary6: {
- std::vector<NYql::NMatchRecognize::TRowPatternPrimary> items{ParsePattern(
- primaryVar.GetAlt_row_pattern_primary6().GetRule_row_pattern_permute1().GetRule_row_pattern3(), patternNestingLevel + 1, output)
- };
- for (const auto& p: primaryVar.GetAlt_row_pattern_primary6().GetRule_row_pattern_permute1().GetBlock4()) {
- items.push_back(ParsePattern(p.GetRule_row_pattern2(), patternNestingLevel + 1, output));
- }
- //Permutations now is a syntactic sugar and converted to all possible alternatives
- if (items.size() > NYql::NMatchRecognize::MaxPermutedItems) {
- Ctx.Error(TokenPosition(primaryVar.GetAlt_row_pattern_primary4().GetToken1()))
- << "Too many items in permute";
- return NYql::NMatchRecognize::TRowPatternTerm{};
- }
- std::vector<size_t> indexes(items.size());
- std::generate(begin(indexes), end(indexes), [n = 0] () mutable { return n++; });
- NYql::NMatchRecognize::TRowPattern permuted;
- do {
- NYql::NMatchRecognize::TRowPatternTerm term;
- term.reserve(indexes.size());
- for (size_t i = 0; i != indexes.size(); ++i) {
- term.push_back({items[indexes[i]], 1, 1, true, false, false});
- }
- permuted.push_back(std::move(term));
- } while (std::next_permutation(indexes.begin(), indexes.end()));
- primary = permuted;
- break;
+ return var;
+ }
+ case TRule_row_pattern_primary::kAltRowPatternPrimary2: {
+ const auto& primary = primaryAlt.GetAlt_row_pattern_primary2();
+ const auto& token = primary.GetToken1();
+ const auto varName = token.GetValue();
+ const auto var = BuildQuotedAtom(GetPos(token), varName);
+ if (PatternVarNames.insert(varName).second) {
+ PatternVars->Add(var);
}
- case TRule_row_pattern_primary::ALT_NOT_SET:
- Y_ABORT("You should change implementation according to grammar changes");
+ return var;
}
- uint64_t quantityMin = 1;
- uint64_t quantityMax = 1;
- constexpr uint64_t infinity = std::numeric_limits<uint64_t>::max();
- bool greedy = true;
- if (factor.GetRule_row_pattern_factor1().HasBlock2()) {
- const auto& quantifier = factor.GetRule_row_pattern_factor1().GetBlock2().GetRule_row_pattern_quantifier1();
- switch(quantifier.GetAltCase()){
- case TRule_row_pattern_quantifier::kAltRowPatternQuantifier1: //*
- quantityMin = 0;
- quantityMax = infinity;
- greedy = !quantifier.GetAlt_row_pattern_quantifier1().HasBlock2();
- break;
- case TRule_row_pattern_quantifier::kAltRowPatternQuantifier2: //+
- quantityMax = infinity;
- greedy = !quantifier.GetAlt_row_pattern_quantifier2().HasBlock2();
- break;
- case TRule_row_pattern_quantifier::kAltRowPatternQuantifier3: //?
- quantityMin = 0;
- greedy = !quantifier.GetAlt_row_pattern_quantifier3().HasBlock2();
- break;
- case TRule_row_pattern_quantifier::kAltRowPatternQuantifier4: //{ 2?, 4?}
- if (quantifier.GetAlt_row_pattern_quantifier4().HasBlock2()) {
- quantityMin = FromString(quantifier.GetAlt_row_pattern_quantifier4().GetBlock2().GetRule_integer1().GetToken1().GetValue());
- }
- else {
- quantityMin = 0;;
- }
- if (quantifier.GetAlt_row_pattern_quantifier4().HasBlock4()) {
- quantityMax = FromString(quantifier.GetAlt_row_pattern_quantifier4().GetBlock4().GetRule_integer1().GetToken1().GetValue());
- }
- else {
- quantityMax = infinity;
- }
- greedy = !quantifier.GetAlt_row_pattern_quantifier4().HasBlock6();
-
- break;
- case TRule_row_pattern_quantifier::kAltRowPatternQuantifier5:
- quantityMin = quantityMax = FromString(quantifier.GetAlt_row_pattern_quantifier5().GetRule_integer2().GetToken1().GetValue());
- break;
- case TRule_row_pattern_quantifier::ALT_NOT_SET:
- Y_ABORT("You should change implementation according to grammar changes");
+ case TRule_row_pattern_primary::kAltRowPatternPrimary3: {
+ const auto& primary = primaryAlt.GetAlt_row_pattern_primary3();
+ const auto& token = primary.GetToken1();
+ const auto varName = token.GetValue();
+ const auto var = BuildQuotedAtom(GetPos(token), varName);
+ if (PatternVarNames.insert(varName).second) {
+ PatternVars->Add(var);
}
+ return var;
+ }
+ case TRule_row_pattern_primary::kAltRowPatternPrimary4: {
+ const auto& primary = primaryAlt.GetAlt_row_pattern_primary4();
+ return ParsePattern(pos, primary.GetBlock2().GetRule_row_pattern1(), nestingLevel + 1, output);
+ }
+ case TRule_row_pattern_primary::kAltRowPatternPrimary5: {
+ const auto& primary = primaryAlt.GetAlt_row_pattern_primary5();
+ output = false;
+ return ParsePattern(pos, primary.GetRule_row_pattern3(), nestingLevel + 1, output);
+ }
+ case TRule_row_pattern_primary::kAltRowPatternPrimary6: {
+ const auto& primary = primaryAlt.GetAlt_row_pattern_primary6();
+ std::vector<TNodePtr> items{
+ ParsePattern(pos, primary.GetRule_row_pattern_permute1().GetRule_row_pattern3(), nestingLevel + 1, output)
+ };
+ for (const auto& p: primary.GetRule_row_pattern_permute1().GetBlock4()) {
+ items.push_back(ParsePattern(pos, p.GetRule_row_pattern2(), nestingLevel + 1, output));
+ }
+ if (items.size() > MaxPermutedItems) {
+ Ctx.Error(GetPos(primary.GetRule_row_pattern_permute1().GetToken1())) << "Too many items in permute";
+ return {};
+ }
+ std::vector<size_t> indexes(items.size());
+ Iota(indexes.begin(), indexes.end(), 0);
+ std::vector<TNodePtr> result;
+ do {
+ std::vector<TNodePtr> term;
+ term.reserve(items.size());
+ for (auto index : indexes) {
+ term.push_back(BuildPatternFactor(pos, items[index], std::tuple{1, 1, true, output, false}));
+ }
+ result.push_back(BuildPatternTerm(pos, std::move(term)));
+ } while (std::next_permutation(indexes.begin(), indexes.end()));
+ return BuildPattern(pos, std::move(result));
+ }
+ case TRule_row_pattern_primary::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
}
- term.push_back(NYql::NMatchRecognize::TRowPatternFactor{std::move(primary), quantityMin, quantityMax, greedy, output, false});
+ }();
+ if (!primary) {
+ return {};
}
- return term;
+
+ const auto quantifier = [&]() {
+ if (!node.HasBlock2()) {
+ const auto quantity = static_cast<ui64>(1);
+ return std::tuple{quantity, quantity, true, output, false};
+ }
+ const auto& quantifierAlt = node.GetBlock2().GetRule_row_pattern_quantifier1();
+ switch (quantifierAlt.GetAltCase()) {
+ case TRule_row_pattern_quantifier::kAltRowPatternQuantifier1: { // *
+ const auto& quantifier = quantifierAlt.GetAlt_row_pattern_quantifier1();
+ pos = GetPos(quantifier.GetToken1());
+ return std::tuple{static_cast<ui64>(0), static_cast<ui64>(Max()), !quantifier.HasBlock2(), output, false};
+ }
+ case TRule_row_pattern_quantifier::kAltRowPatternQuantifier2: { // +
+ const auto& quantifier = quantifierAlt.GetAlt_row_pattern_quantifier2();
+ pos = GetPos(quantifier.GetToken1());
+ return std::tuple{static_cast<ui64>(1), static_cast<ui64>(Max()), !quantifier.HasBlock2(), output, false};
+ }
+ case TRule_row_pattern_quantifier::kAltRowPatternQuantifier3: { // ?
+ const auto& quantifier = quantifierAlt.GetAlt_row_pattern_quantifier3();
+ pos = GetPos(quantifier.GetToken1());
+ return std::tuple{static_cast<ui64>(0), static_cast<ui64>(1), !quantifier.HasBlock2(), output, false};
+ }
+ case TRule_row_pattern_quantifier::kAltRowPatternQuantifier4: { // {n?, m?}
+ const auto& quantifier = quantifierAlt.GetAlt_row_pattern_quantifier4();
+ pos = GetPos(quantifier.GetToken1());
+ return std::tuple{
+ quantifier.HasBlock2()
+ ? FromString(quantifier.GetBlock2().GetRule_integer1().GetToken1().GetValue())
+ : static_cast<ui64>(0),
+ quantifier.HasBlock4()
+ ? FromString(quantifier.GetBlock4().GetRule_integer1().GetToken1().GetValue())
+ : static_cast<ui64>(Max()),
+ !quantifier.HasBlock6(),
+ output,
+ false
+ };
+ }
+ case TRule_row_pattern_quantifier::kAltRowPatternQuantifier5: { // {n}
+ const auto quantifier = quantifierAlt.GetAlt_row_pattern_quantifier5();
+ pos = GetPos(quantifier.GetToken1());
+ const auto quantity = static_cast<ui64>(FromString(quantifier.GetRule_integer2().GetToken1().GetValue()));
+ return std::tuple{quantity, quantity, true, output, false};
+ }
+ case TRule_row_pattern_quantifier::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ }();
+ return BuildPatternFactor(pos, std::move(primary), std::move(quantifier));
}
-NYql::NMatchRecognize::TRowPattern TSqlMatchRecognizeClause::ParsePattern(const TRule_row_pattern& node, size_t patternNestingLevel, bool output){
- TVector<NYql::NMatchRecognize::TRowPatternTerm> result;
- result.push_back(ParsePatternTerm(node.GetRule_row_pattern_term1(), patternNestingLevel, output));
- for (const auto& term: node.GetBlock2())
- result.push_back(ParsePatternTerm(term.GetRule_row_pattern_term2(), patternNestingLevel, output));
+TNodePtr TSqlMatchRecognizeClause::BuildPatternTerm(TPosition pos, std::vector<TNodePtr> term) {
+ auto result = BuildList(pos);
+ for (auto& factor : term) {
+ if (!factor) {
+ return {};
+ }
+ result->Add(std::move(factor));
+ }
+ return BuildQuote(pos, std::move(result));
+}
+
+TNodePtr TSqlMatchRecognizeClause::ParsePatternTerm(TPosition pos, const TRule_row_pattern_term& node, size_t nestingLevel, bool output) {
+ std::vector<TNodePtr> result;
+ result.reserve(node.GetBlock1().size());
+ for (const auto& factor: node.GetBlock1()) {
+ result.push_back(ParsePatternFactor(pos, factor.GetRule_row_pattern_factor1(), nestingLevel, output));
+ }
+ return BuildPatternTerm(pos, std::move(result));
+}
+
+TNodePtr TSqlMatchRecognizeClause::BuildPattern(TPosition pos, std::vector<TNodePtr> pattern) {
+ const auto result = BuildList(pos, {BuildAtom(pos, "MatchRecognizePattern")});
+ for (auto& term: pattern) {
+ if (!term) {
+ return {};
+ }
+ result->Add(std::move(term));
+ }
return result;
}
-TNamedFunction TSqlMatchRecognizeClause::ParseOneDefinition(const TRule_row_pattern_definition& node){
- const auto& varName = PatternVar(node.GetRule_row_pattern_definition_variable_name1().GetRule_row_pattern_variable_name1(), *this);
- TColumnRefScope scope(Ctx, EColumnRefState::MatchRecognize, true, varName);
- const auto& searchCondition = TSqlExpression(Ctx, Mode).Build(node.GetRule_row_pattern_definition_search_condition3().GetRule_search_condition1().GetRule_expr1());
- return TNamedFunction{searchCondition, varName};
+TNodePtr TSqlMatchRecognizeClause::ParsePattern(TPosition pos, const TRule_row_pattern& node, size_t nestingLevel, bool output) {
+ std::vector<TNodePtr> result;
+ result.reserve(1 + node.GetBlock2().size());
+ result.push_back(ParsePatternTerm(pos, node.GetRule_row_pattern_term1(), nestingLevel, output));
+ for (const auto& term: node.GetBlock2()) {
+ result.push_back(ParsePatternTerm(pos, term.GetRule_row_pattern_term2(), nestingLevel, output));
+ }
+ return BuildPattern(pos, std::move(result));
+}
+
+TMaybe<TNodePtr> TSqlMatchRecognizeClause::ParseSubset(TPosition pos, const TRule_row_pattern_subset_clause* node) {
+ if (!node) {
+ return TNodePtr{};
+ }
+ pos = GetPos(node->GetToken1());
+ // TODO https://st.yandex-team.ru/YQL-16225
+ Ctx.Error(pos) << "SUBSET is not implemented yet";
+ return {};
+}
+
+TNamedFunction TSqlMatchRecognizeClause::ParseOneDefinition(const TRule_row_pattern_definition& node) {
+ const auto& identifier = node.GetRule_row_pattern_definition_variable_name1().GetRule_row_pattern_variable_name1().GetRule_identifier1();
+ auto defineName = Id(identifier, *this);
+ TColumnRefScope scope(Ctx, EColumnRefState::MatchRecognizeDefine, true, defineName);
+ const auto& searchCondition = node.GetRule_row_pattern_definition_search_condition3().GetRule_search_condition1().GetRule_expr1();
+ auto callable = TSqlExpression(Ctx, Mode).Build(searchCondition);
+ // Each define must be a predicate lambda, that accepts 3 args:
+ // - List<input table rows>
+ // - A struct that maps row pattern variables to ranges in the queue
+ // - An index of the current row
+ return {std::move(callable), std::move(defineName)};
}
TVector<TNamedFunction> TSqlMatchRecognizeClause::ParseDefinitions(const TRule_row_pattern_definition_list& node) {
- TVector<TNamedFunction> result { ParseOneDefinition(node.GetRule_row_pattern_definition1())};
+ TVector<TNamedFunction> result{ParseOneDefinition(node.GetRule_row_pattern_definition1())};
for (const auto& d: node.GetBlock2()) {
- //Each define must be a predicate lambda, that accepts 3 args:
- // - List<input table rows>
- // - A struct that maps row pattern variables to ranges in the queue
- // - An index of the current row
result.push_back(ParseOneDefinition(d.GetRule_row_pattern_definition2()));
}
return result;
}
-} //namespace NSQLTranslationV1
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql_match_recognize.h b/yql/essentials/sql/v1/sql_match_recognize.h
index 219baeaa09..928ed35b5d 100644
--- a/yql/essentials/sql/v1/sql_match_recognize.h
+++ b/yql/essentials/sql/v1/sql_match_recognize.h
@@ -1,28 +1,38 @@
#pragma once
-#include "sql_translation.h"
#include "match_recognize.h"
+#include "node.h"
+#include "sql_translation.h"
namespace NSQLTranslationV1 {
-using namespace NSQLv1Generated;
-
-class TSqlMatchRecognizeClause: public TSqlTranslation {
+class TSqlMatchRecognizeClause final : public TSqlTranslation {
public:
- TSqlMatchRecognizeClause(TContext& ctx, NSQLTranslation::ESqlMode mode)
- : TSqlTranslation(ctx, mode)
- {}
+ TSqlMatchRecognizeClause(TContext& ctx, NSQLTranslation::ESqlMode mode);
TMatchRecognizeBuilderPtr CreateBuilder(const TRule_row_pattern_recognition_clause& node);
+ static constexpr size_t MaxPatternNesting = 20;
+ static constexpr size_t MaxPermutedItems = 6;
+
private:
- TVector<TNamedFunction> ParsePartitionBy(const TRule_window_partition_clause& partitionClause);
+ std::tuple<TNodePtr, TNodePtr> ParsePartitionBy(TPosition pos, const TRule_window_partition_clause* node);
+ TMaybe<TVector<TSortSpecificationPtr>> ParseOrderBy(const TRule_order_by_clause* node);
TNamedFunction ParseOneMeasure(const TRule_row_pattern_measure_definition& node);
- TVector<TNamedFunction> ParseMeasures(const TRule_row_pattern_measure_list& node);
- std::pair<TPosition, NYql::NMatchRecognize::ERowsPerMatch> ParseRowsPerMatch(const TRule_row_pattern_rows_per_match& rowsPerMatchClause);
- std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo> ParseAfterMatchSkipTo(const TRule_row_pattern_skip_to& skipToClause);
- NYql::NMatchRecognize::TRowPatternTerm ParsePatternTerm(const TRule_row_pattern_term& node, size_t patternNestingLevel, bool output);
- NYql::NMatchRecognize::TRowPattern ParsePattern(const TRule_row_pattern& node, size_t patternNestingLevel = 1, bool output = true);
+ TVector<TNamedFunction> ParseMeasures(const TRule_row_pattern_measure_list* node);
+ TNodePtr ParseRowsPerMatch(TPosition pos, const TRule_row_pattern_rows_per_match* node);
+ TNodePtr ParseAfterMatchSkipTo(TPosition pos, const TRule_row_pattern_skip_to* node);
+ TNodePtr BuildPatternFactor(TPosition pos, TNodePtr primary, std::tuple<ui64, ui64, bool, bool, bool> quantifier);
+ TNodePtr ParsePatternFactor(TPosition pos, const TRule_row_pattern_factor& node, size_t nestingLevel, bool output);
+ TNodePtr BuildPatternTerm(TPosition pos, std::vector<TNodePtr> term);
+ TNodePtr ParsePatternTerm(TPosition pos, const TRule_row_pattern_term& node, size_t nestingLevel, bool output);
+ TNodePtr BuildPattern(TPosition pos, std::vector<TNodePtr> pattern);
+ TNodePtr ParsePattern(TPosition pos, const TRule_row_pattern& node, size_t nestingLevel, bool output);
+ TMaybe<TNodePtr> ParseSubset(TPosition pos, const TRule_row_pattern_subset_clause* node);
TNamedFunction ParseOneDefinition(const TRule_row_pattern_definition& node);
TVector<TNamedFunction> ParseDefinitions(const TRule_row_pattern_definition_list& node);
+
+private:
+ THashSet<TString> PatternVarNames;
+ TNodePtr PatternVars;
};
} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql_match_recognize_ut.cpp b/yql/essentials/sql/v1/sql_match_recognize_ut.cpp
index f591ef0647..f599dc6e99 100644
--- a/yql/essentials/sql/v1/sql_match_recognize_ut.cpp
+++ b/yql/essentials/sql/v1/sql_match_recognize_ut.cpp
@@ -41,7 +41,7 @@ bool IsLambda(const NYql::TAstNode* node, ui32 numberOfArgs) {
return false;
}
if (!node->GetChild(0)->IsAtom() || node->GetChild(0)->GetContent() != "lambda") {
- return false;
+ return false;
}
return IsQuotedListOfSize(node->GetChild(1), numberOfArgs);
}
@@ -71,7 +71,7 @@ FROM Input MATCH_RECOGNIZE(
auto matchRecognizeAndSample = R"(
USE plato;
SELECT *
-FROM Input MATCH_RECOGNIZE(
+FROM Input MATCH_RECOGNIZE(
PATTERN ( A )
DEFINE A as A
) TABLESAMPLE BERNOULLI(1.0)
@@ -148,13 +148,13 @@ FROM Input MATCH_RECOGNIZE(
auto r = MatchRecognizeSqlToYql(stmt);
UNIT_ASSERT(r.IsOk());
const auto measures = FindMatchRecognizeParam(r.Root, "measures");
- UNIT_ASSERT_VALUES_EQUAL(6, measures->GetChildrenCount());
+ UNIT_ASSERT_VALUES_EQUAL(7, measures->GetChildrenCount());
const auto columnNames = measures->GetChild(3);
UNIT_ASSERT(IsQuotedListOfSize(columnNames, 2));
UNIT_ASSERT_VALUES_EQUAL("T", columnNames->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
UNIT_ASSERT_VALUES_EQUAL("Key", columnNames->GetChild(1)->GetChild(1)->GetChild(1)->GetContent());
- UNIT_ASSERT(IsLambda(measures->GetChild(4), 2));
- UNIT_ASSERT(IsLambda(measures->GetChild(5), 2));
+ UNIT_ASSERT(IsQuotedListOfSize(measures->GetChild(4), 2));
+ UNIT_ASSERT(IsQuotedListOfSize(measures->GetChild(5), 2));
}
Y_UNIT_TEST(RowsPerMatch) {
{
@@ -326,7 +326,7 @@ USE plato;
SELECT *
FROM Input MATCH_RECOGNIZE(
INITIAL
- PATTERN (A+ B* C?)
+ PATTERN (A+ B* C?)
DEFINE A as A
)
)";
@@ -340,7 +340,7 @@ USE plato;
SELECT *
FROM Input MATCH_RECOGNIZE(
SEEK
- PATTERN (A+ B* C?)
+ PATTERN (A+ B* C?)
DEFINE A as A
)
)";
@@ -353,7 +353,7 @@ FROM Input MATCH_RECOGNIZE(
USE plato;
SELECT *
FROM Input MATCH_RECOGNIZE(
- PATTERN (A+ B* C?)
+ PATTERN (A+ B* C?)
DEFINE A as A
)
)";
@@ -427,7 +427,7 @@ PATTERN (
}
Y_UNIT_TEST(PatternLimitedNesting) {
- const size_t MaxNesting = 20;
+ constexpr size_t MaxNesting = 20;
for (size_t extraNesting = 0; extraNesting <= 1; ++extraNesting) {
std::string pattern;
for (size_t i = 0; i != MaxNesting + extraNesting; ++i)
@@ -469,7 +469,7 @@ FROM Input MATCH_RECOGNIZE(
};
auto getTheFactor = [](const NYql::TAstNode* root) {
const auto& patternCallable = FindMatchRecognizeParam(root, "pattern");
- const auto& factor = patternCallable->GetChild(1)->GetChild(1)->GetChild(0)->GetChild(1);
+ const auto& factor = patternCallable->GetChild(1)->GetChild(1)->GetChild(0)->GetChild(1);
return NYql::NMatchRecognize::TRowPatternFactor{
TString(), //primary var or subexpression, not used in this test
FromString<uint64_t>(factor->GetChild(1)->GetChild(1)->GetContent()), //QuantityMin
@@ -651,7 +651,7 @@ FROM Input MATCH_RECOGNIZE(
}
Y_UNIT_TEST(PermuteTooMuch) {
- for (size_t n = 1; n <= NYql::NMatchRecognize::MaxPermutedItems + 1; ++n) {
+ for (size_t n = 1; n <= 6 + 1; ++n) {
std::vector<std::string> vars(n);
std::generate(begin(vars), end(vars), [n = 0] () mutable { return "A" + std::to_string(n++);});
const auto stmt = TString(R"(
@@ -671,7 +671,7 @@ FROM Input MATCH_RECOGNIZE(
)"
);
const auto &r = MatchRecognizeSqlToYql(stmt);
- if (n <= NYql::NMatchRecognize::MaxPermutedItems) {
+ if (n <= 6) {
UNIT_ASSERT(r.IsOk());
} else {
UNIT_ASSERT(!r.IsOk());
diff --git a/yql/essentials/sql/v1/sql_ut.cpp b/yql/essentials/sql/v1/sql_ut.cpp
index 6bdd2c7e44..bd385c8feb 100644
--- a/yql/essentials/sql/v1/sql_ut.cpp
+++ b/yql/essentials/sql/v1/sql_ut.cpp
@@ -8162,3 +8162,42 @@ Y_UNIT_TEST_SUITE(QuerySplit) {
};)");
}
}
+
+Y_UNIT_TEST_SUITE(MatchRecognizeMeasuresAggregation) {
+ Y_UNIT_TEST(InsideSelect) {
+ ExpectFailWithError(R"sql(
+ SELECT FIRST(0), LAST(1);
+ )sql",
+ "<main>:2:20: Error: Cannot use FIRST and LAST outside the MATCH_RECOGNIZE context\n"
+ "<main>:2:30: Error: Cannot use FIRST and LAST outside the MATCH_RECOGNIZE context\n"
+ );
+ }
+
+ Y_UNIT_TEST(OutsideSelect) {
+ ExpectFailWithError(R"sql(
+ $lambda = ($x) -> (FIRST($x) + LAST($x));
+ SELECT $lambda(x) FROM plato.Input;
+ )sql",
+ "<main>:2:32: Error: Cannot use FIRST and LAST outside the MATCH_RECOGNIZE context\n"
+ "<main>:2:44: Error: Cannot use FIRST and LAST outside the MATCH_RECOGNIZE context\n"
+ );
+ }
+
+ Y_UNIT_TEST(AsAggregateFunction) {
+ ExpectFailWithError(R"sql(
+ SELECT FIRST(x), LAST(x) FROM plato.Input;
+ )sql",
+ "<main>:2:20: Error: Cannot use FIRST and LAST outside the MATCH_RECOGNIZE context\n"
+ "<main>:2:30: Error: Cannot use FIRST and LAST outside the MATCH_RECOGNIZE context\n"
+ );
+ }
+
+ Y_UNIT_TEST(AsWindowFunction) {
+ ExpectFailWithError(R"sql(
+ SELECT FIRST(x) OVER(), LAST(x) OVER() FROM plato.Input;
+ )sql",
+ "<main>:2:20: Error: Cannot use FIRST and LAST outside the MATCH_RECOGNIZE context\n"
+ "<main>:2:37: Error: Cannot use FIRST and LAST outside the MATCH_RECOGNIZE context\n"
+ );
+ }
+}
diff --git a/yql/essentials/sql/v1/sql_ut_antlr4.cpp b/yql/essentials/sql/v1/sql_ut_antlr4.cpp
index 4b1233bfee..9463408886 100644
--- a/yql/essentials/sql/v1/sql_ut_antlr4.cpp
+++ b/yql/essentials/sql/v1/sql_ut_antlr4.cpp
@@ -8121,3 +8121,42 @@ $__ydb_transfer_lambda = ($x) -> {
}
}
+
+Y_UNIT_TEST_SUITE(MatchRecognizeMeasuresAggregation) {
+ Y_UNIT_TEST(InsideSelect) {
+ ExpectFailWithError(R"sql(
+ SELECT FIRST(0), LAST(1);
+ )sql",
+ "<main>:2:20: Error: Cannot use FIRST and LAST outside the MATCH_RECOGNIZE context\n"
+ "<main>:2:30: Error: Cannot use FIRST and LAST outside the MATCH_RECOGNIZE context\n"
+ );
+ }
+
+ Y_UNIT_TEST(OutsideSelect) {
+ ExpectFailWithError(R"sql(
+ $lambda = ($x) -> (FIRST($x) + LAST($x));
+ SELECT $lambda(x) FROM plato.Input;
+ )sql",
+ "<main>:2:32: Error: Cannot use FIRST and LAST outside the MATCH_RECOGNIZE context\n"
+ "<main>:2:44: Error: Cannot use FIRST and LAST outside the MATCH_RECOGNIZE context\n"
+ );
+ }
+
+ Y_UNIT_TEST(AsAggregateFunction) {
+ ExpectFailWithError(R"sql(
+ SELECT FIRST(x), LAST(x) FROM plato.Input;
+ )sql",
+ "<main>:2:20: Error: Cannot use FIRST and LAST outside the MATCH_RECOGNIZE context\n"
+ "<main>:2:30: Error: Cannot use FIRST and LAST outside the MATCH_RECOGNIZE context\n"
+ );
+ }
+
+ Y_UNIT_TEST(AsWindowFunction) {
+ ExpectFailWithError(R"sql(
+ SELECT FIRST(x) OVER(), LAST(x) OVER() FROM plato.Input;
+ )sql",
+ "<main>:2:20: Error: Cannot use FIRST and LAST outside the MATCH_RECOGNIZE context\n"
+ "<main>:2:37: Error: Cannot use FIRST and LAST outside the MATCH_RECOGNIZE context\n"
+ );
+ }
+}
diff --git a/yql/essentials/tests/sql/minirun/part0/canondata/result.json b/yql/essentials/tests/sql/minirun/part0/canondata/result.json
index cecc1d216b..55b679803b 100644
--- a/yql/essentials/tests/sql/minirun/part0/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part0/canondata/result.json
@@ -696,9 +696,9 @@
],
"test.test[match_recognize-after_match_skip_past_last_row-default.txt-Debug]": [
{
- "checksum": "5f35ea2e92d57979f9c4bba6b5646a34",
- "size": 1152,
- "uri": "https://{canondata_backend}/1920236/0e4eb7607962ae8fb5d7c33255de8ebf2bb42fa1/resource.tar.gz#test.test_match_recognize-after_match_skip_past_last_row-default.txt-Debug_/opt.yql"
+ "checksum": "ba5829938cca0f9da11d044432ba58c4",
+ "size": 1294,
+ "uri": "https://{canondata_backend}/1777230/d5f34e6dd6caad9e61f6ac66cc95521cc05a75cf/resource.tar.gz#test.test_match_recognize-after_match_skip_past_last_row-default.txt-Debug_/opt.yql"
}
],
"test.test[match_recognize-after_match_skip_past_last_row-default.txt-Results]": [
@@ -710,9 +710,9 @@
],
"test.test[match_recognize-alerts-streaming-default.txt-Debug]": [
{
- "checksum": "4c0317b3fb6f4b79fec4d8b5d361a30a",
- "size": 5963,
- "uri": "https://{canondata_backend}/1936947/b760795ecc1ab66b9c70ac9ecfdf8a8ffa413f66/resource.tar.gz#test.test_match_recognize-alerts-streaming-default.txt-Debug_/opt.yql"
+ "checksum": "d84be5a785956f5561d3871d773bb117",
+ "size": 6021,
+ "uri": "https://{canondata_backend}/1777230/d5f34e6dd6caad9e61f6ac66cc95521cc05a75cf/resource.tar.gz#test.test_match_recognize-alerts-streaming-default.txt-Debug_/opt.yql"
}
],
"test.test[match_recognize-alerts-streaming-default.txt-Results]": [
@@ -724,16 +724,16 @@
],
"test.test[match_recognize-test_type-default.txt-Debug]": [
{
- "checksum": "2a2950eafdd72b695adee8a0d1deddab",
- "size": 2645,
- "uri": "https://{canondata_backend}/1936947/b760795ecc1ab66b9c70ac9ecfdf8a8ffa413f66/resource.tar.gz#test.test_match_recognize-test_type-default.txt-Debug_/opt.yql"
+ "checksum": "332b177bb25cf91a2a59f949bc30997a",
+ "size": 2667,
+ "uri": "https://{canondata_backend}/1777230/d5f34e6dd6caad9e61f6ac66cc95521cc05a75cf/resource.tar.gz#test.test_match_recognize-test_type-default.txt-Debug_/opt.yql"
}
],
"test.test[match_recognize-test_type-default.txt-Results]": [
{
- "checksum": "02e86332916457371461fd277e2f0003",
- "size": 4370,
- "uri": "https://{canondata_backend}/1936947/b760795ecc1ab66b9c70ac9ecfdf8a8ffa413f66/resource.tar.gz#test.test_match_recognize-test_type-default.txt-Results_/results.txt"
+ "checksum": "568681ef1c935d6bdeabf22735cc4c63",
+ "size": 5138,
+ "uri": "https://{canondata_backend}/1777230/d5f34e6dd6caad9e61f6ac66cc95521cc05a75cf/resource.tar.gz#test.test_match_recognize-test_type-default.txt-Results_/results.txt"
}
],
"test.test[optimizers-or_absorption--Debug]": [
diff --git a/yql/essentials/tests/sql/minirun/part1/canondata/result.json b/yql/essentials/tests/sql/minirun/part1/canondata/result.json
index 4ce5118f55..e9313a184a 100644
--- a/yql/essentials/tests/sql/minirun/part1/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part1/canondata/result.json
@@ -694,9 +694,9 @@
],
"test.test[match_recognize-greedy_quantifiers-default.txt-Debug]": [
{
- "checksum": "f715209ae286a07b1b13eaa7a4e5bb74",
- "size": 2103,
- "uri": "https://{canondata_backend}/1920236/97f248ad438e8879d0fc75f582d39c0a52739159/resource.tar.gz#test.test_match_recognize-greedy_quantifiers-default.txt-Debug_/opt.yql"
+ "checksum": "66fb0a8ccd3814cb306c356fcecea0d1",
+ "size": 2147,
+ "uri": "https://{canondata_backend}/1777230/e0a11b3037046c8b251f0102af3c7cc58af5ef2c/resource.tar.gz#test.test_match_recognize-greedy_quantifiers-default.txt-Debug_/opt.yql"
}
],
"test.test[match_recognize-greedy_quantifiers-default.txt-Results]": [
diff --git a/yql/essentials/tests/sql/minirun/part2/canondata/result.json b/yql/essentials/tests/sql/minirun/part2/canondata/result.json
index 4c2099af44..39f596db22 100644
--- a/yql/essentials/tests/sql/minirun/part2/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part2/canondata/result.json
@@ -773,9 +773,9 @@
],
"test.test[match_recognize-simple_paritioning-default.txt-Debug]": [
{
- "checksum": "ed2ffd7ee602e50eb77df1f51835692d",
- "size": 3030,
- "uri": "https://{canondata_backend}/1773845/e276b05a77b889bd8fbb83a0bc0087dea7abd8f4/resource.tar.gz#test.test_match_recognize-simple_paritioning-default.txt-Debug_/opt.yql"
+ "checksum": "459d2a84dbc7335c291bc01ab9573c6e",
+ "size": 3201,
+ "uri": "https://{canondata_backend}/1937492/74169e2f8afb099b48d4cfe4c1190fcfe62c417a/resource.tar.gz#test.test_match_recognize-simple_paritioning-default.txt-Debug_/opt.yql"
}
],
"test.test[match_recognize-simple_paritioning-default.txt-Results]": [
diff --git a/yql/essentials/tests/sql/minirun/part4/canondata/result.json b/yql/essentials/tests/sql/minirun/part4/canondata/result.json
index 02a5ceee0c..c4ada02ff9 100644
--- a/yql/essentials/tests/sql/minirun/part4/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part4/canondata/result.json
@@ -894,6 +894,20 @@
"uri": "https://{canondata_backend}/1931696/ea5c7a58e2d64730a365efbdc95cdd2474857fcb/resource.tar.gz#test.test_like-like_rewrite-default.txt-Results_/results.txt"
}
],
+ "test.test[match_recognize-measures_aggregate-default.txt-Debug]": [
+ {
+ "checksum": "57fd3f8f46c4a0ab7bf9ef9a4842ea75",
+ "size": 5880,
+ "uri": "https://{canondata_backend}/1775059/fa3782efce42b73c9d2a91d53e640bb71168f31f/resource.tar.gz#test.test_match_recognize-measures_aggregate-default.txt-Debug_/opt.yql"
+ }
+ ],
+ "test.test[match_recognize-measures_aggregate-default.txt-Results]": [
+ {
+ "checksum": "58ad2a60431fd720f1968f892142143e",
+ "size": 11093,
+ "uri": "https://{canondata_backend}/1775059/fa3782efce42b73c9d2a91d53e640bb71168f31f/resource.tar.gz#test.test_match_recognize-measures_aggregate-default.txt-Results_/results.txt"
+ }
+ ],
"test.test[optimizers-coalesce_propagate-default.txt-Debug]": [
{
"checksum": "9ce2404a84d5d2a812bb99e30502c2a9",
diff --git a/yql/essentials/tests/sql/minirun/part5/canondata/result.json b/yql/essentials/tests/sql/minirun/part5/canondata/result.json
index 0fc1d285af..3bbd5eafd1 100644
--- a/yql/essentials/tests/sql/minirun/part5/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part5/canondata/result.json
@@ -1098,9 +1098,9 @@
],
"test.test[match_recognize-alerts-default.txt-Debug]": [
{
- "checksum": "17ab30106ff230f4c490cadb8641e3c8",
- "size": 5965,
- "uri": "https://{canondata_backend}/1936947/73a52333839d1b24061b13e03914d1ed469097ed/resource.tar.gz#test.test_match_recognize-alerts-default.txt-Debug_/opt.yql"
+ "checksum": "9f76da8aa8f0d280d8a98e4eb3f892b1",
+ "size": 6023,
+ "uri": "https://{canondata_backend}/1130705/e4b7d47d9742325daff68c922887a7262afbacba/resource.tar.gz#test.test_match_recognize-alerts-default.txt-Debug_/opt.yql"
}
],
"test.test[match_recognize-alerts-default.txt-Results]": [
@@ -1112,9 +1112,9 @@
],
"test.test[match_recognize-all_rows_per_match-default.txt-Debug]": [
{
- "checksum": "94481db077832a3635f63f613be474dc",
- "size": 3913,
- "uri": "https://{canondata_backend}/1942278/297fcbf291f6e0fa588710a14ca3180cf33bc176/resource.tar.gz#test.test_match_recognize-all_rows_per_match-default.txt-Debug_/opt.yql"
+ "checksum": "d165424c12b99f87fd8aca943b72458e",
+ "size": 3852,
+ "uri": "https://{canondata_backend}/1130705/e4b7d47d9742325daff68c922887a7262afbacba/resource.tar.gz#test.test_match_recognize-all_rows_per_match-default.txt-Debug_/opt.yql"
}
],
"test.test[match_recognize-all_rows_per_match-default.txt-Results]": [
diff --git a/yql/essentials/tests/sql/minirun/part6/canondata/result.json b/yql/essentials/tests/sql/minirun/part6/canondata/result.json
index 088e4d2a31..268e02f868 100644
--- a/yql/essentials/tests/sql/minirun/part6/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part6/canondata/result.json
@@ -849,9 +849,9 @@
],
"test.test[match_recognize-alerts_without_order-default.txt-Debug]": [
{
- "checksum": "c724b7b2600d261b02e191689e728f28",
- "size": 5813,
- "uri": "https://{canondata_backend}/1936947/0ddea11997745cd059fbd894ec6ff75cd45330e7/resource.tar.gz#test.test_match_recognize-alerts_without_order-default.txt-Debug_/opt.yql"
+ "checksum": "594f151e529e79595efe63067ad0cf67",
+ "size": 5900,
+ "uri": "https://{canondata_backend}/1775059/35c89b22136cd01420b6cc7ddf223a627fc65f0b/resource.tar.gz#test.test_match_recognize-alerts_without_order-default.txt-Debug_/opt.yql"
}
],
"test.test[match_recognize-alerts_without_order-default.txt-Results]": [
diff --git a/yql/essentials/tests/sql/minirun/part7/canondata/result.json b/yql/essentials/tests/sql/minirun/part7/canondata/result.json
index 3adddc5fb4..900975682d 100644
--- a/yql/essentials/tests/sql/minirun/part7/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part7/canondata/result.json
@@ -644,16 +644,16 @@
],
"test.test[match_recognize-test_type-streaming-default.txt-Debug]": [
{
- "checksum": "5618af42a1caa66d16c80f966e94237f",
- "size": 2546,
- "uri": "https://{canondata_backend}/1936273/40a052daef6f43b6c825c32f19c5707a9107d4bc/resource.tar.gz#test.test_match_recognize-test_type-streaming-default.txt-Debug_/opt.yql"
+ "checksum": "78f9d1127f6c378a383e746f8e7ca2b6",
+ "size": 2582,
+ "uri": "https://{canondata_backend}/1900335/96f474c83eaaa22aac9f78dae9544c91548b2f2f/resource.tar.gz#test.test_match_recognize-test_type-streaming-default.txt-Debug_/opt.yql"
}
],
"test.test[match_recognize-test_type-streaming-default.txt-Results]": [
{
- "checksum": "a426ea78777873f86c83681342304e1a",
- "size": 4374,
- "uri": "https://{canondata_backend}/1936273/40a052daef6f43b6c825c32f19c5707a9107d4bc/resource.tar.gz#test.test_match_recognize-test_type-streaming-default.txt-Results_/results.txt"
+ "checksum": "3c7b40f6b2281e1f94394cedd06f6f54",
+ "size": 5142,
+ "uri": "https://{canondata_backend}/1900335/96f474c83eaaa22aac9f78dae9544c91548b2f2f/resource.tar.gz#test.test_match_recognize-test_type-streaming-default.txt-Results_/results.txt"
}
],
"test.test[order_by-order_by_missing_project_column_nosimple--Debug]": [
diff --git a/yql/essentials/tests/sql/minirun/part8/canondata/result.json b/yql/essentials/tests/sql/minirun/part8/canondata/result.json
index de9da9c5b2..99429e3c85 100644
--- a/yql/essentials/tests/sql/minirun/part8/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part8/canondata/result.json
@@ -903,9 +903,9 @@
],
"test.test[match_recognize-permute-default.txt-Debug]": [
{
- "checksum": "58f5d288f2e3837a813be53a69b0595b",
- "size": 2421,
- "uri": "https://{canondata_backend}/1817427/e19dd6d5c506c6d882f6e409830437f1245b321b/resource.tar.gz#test.test_match_recognize-permute-default.txt-Debug_/opt.yql"
+ "checksum": "f33a6938ad6524015fe493f9e45a9a1a",
+ "size": 2300,
+ "uri": "https://{canondata_backend}/1900335/3c460c2946ea734b521d2f5710a7c3237ef371b3/resource.tar.gz#test.test_match_recognize-permute-default.txt-Debug_/opt.yql"
}
],
"test.test[match_recognize-permute-default.txt-Results]": [
@@ -917,9 +917,9 @@
],
"test.test[match_recognize-simple_paritioning-streaming-default.txt-Debug]": [
{
- "checksum": "5d5202ad383ce1d089d694cc81131011",
- "size": 3076,
- "uri": "https://{canondata_backend}/1773845/20507d42e246d2393b5ea9633b85501f042faf59/resource.tar.gz#test.test_match_recognize-simple_paritioning-streaming-default.txt-Debug_/opt.yql"
+ "checksum": "8e6443cf3e5f569f1e06dbf8207d334d",
+ "size": 3262,
+ "uri": "https://{canondata_backend}/1900335/3c460c2946ea734b521d2f5710a7c3237ef371b3/resource.tar.gz#test.test_match_recognize-simple_paritioning-streaming-default.txt-Debug_/opt.yql"
}
],
"test.test[match_recognize-simple_paritioning-streaming-default.txt-Results]": [
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json
index 7ea57dddf1..0d7fe3bcf9 100644
--- a/yql/essentials/tests/sql/sql2yql/canondata/result.json
+++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json
@@ -4096,86 +4096,93 @@
],
"test_sql2yql.test[match_recognize-after_match_skip_past_last_row]": [
{
- "checksum": "060a3ed010fea646088a7a4494c4a505",
- "size": 3125,
- "uri": "https://{canondata_backend}/1942173/99e88108149e222741552e7e6cddef041d6a2846/resource.tar.gz#test_sql2yql.test_match_recognize-after_match_skip_past_last_row_/sql.yql"
+ "checksum": "6455d6bae63fe3d95a749a7952863604",
+ "size": 3104,
+ "uri": "https://{canondata_backend}/1920236/5e37b541c71c89b1b95dee0463a5a2e9bc5999f4/resource.tar.gz#test_sql2yql.test_match_recognize-after_match_skip_past_last_row_/sql.yql"
}
],
"test_sql2yql.test[match_recognize-alerts-streaming]": [
{
- "checksum": "19cfd4ac71802d49039da78644667d60",
- "size": 9768,
- "uri": "https://{canondata_backend}/1775319/f1fa0c55bf9f13cff57cf1c990c2330caed8eb1b/resource.tar.gz#test_sql2yql.test_match_recognize-alerts-streaming_/sql.yql"
+ "checksum": "55a3af0bf19e627ca6dec04367c179c1",
+ "size": 10044,
+ "uri": "https://{canondata_backend}/1920236/5e37b541c71c89b1b95dee0463a5a2e9bc5999f4/resource.tar.gz#test_sql2yql.test_match_recognize-alerts-streaming_/sql.yql"
}
],
"test_sql2yql.test[match_recognize-alerts]": [
{
- "checksum": "08c72510d05ec8baa8050b2ab75175d3",
- "size": 9770,
- "uri": "https://{canondata_backend}/1775319/f1fa0c55bf9f13cff57cf1c990c2330caed8eb1b/resource.tar.gz#test_sql2yql.test_match_recognize-alerts_/sql.yql"
+ "checksum": "abe472c1e4dd4d38ce3af7954c0a4425",
+ "size": 10046,
+ "uri": "https://{canondata_backend}/1920236/5e37b541c71c89b1b95dee0463a5a2e9bc5999f4/resource.tar.gz#test_sql2yql.test_match_recognize-alerts_/sql.yql"
}
],
"test_sql2yql.test[match_recognize-alerts_without_order]": [
{
- "checksum": "d8075bb34a86c528ce47ea2be97a4e86",
- "size": 9651,
- "uri": "https://{canondata_backend}/1775319/f1fa0c55bf9f13cff57cf1c990c2330caed8eb1b/resource.tar.gz#test_sql2yql.test_match_recognize-alerts_without_order_/sql.yql"
+ "checksum": "7e6cd1cda9ddc8a2fe0f41ace902517e",
+ "size": 9927,
+ "uri": "https://{canondata_backend}/1920236/5e37b541c71c89b1b95dee0463a5a2e9bc5999f4/resource.tar.gz#test_sql2yql.test_match_recognize-alerts_without_order_/sql.yql"
}
],
"test_sql2yql.test[match_recognize-all_rows_per_match]": [
{
- "checksum": "ee193116db0e897c8a3fbb98a75caaac",
- "size": 6732,
- "uri": "https://{canondata_backend}/1889210/a0cc9e3113699f51443284ed56291923dfc3735d/resource.tar.gz#test_sql2yql.test_match_recognize-all_rows_per_match_/sql.yql"
+ "checksum": "4eca671d5cbce0a4457b1b7918ff3a7b",
+ "size": 6627,
+ "uri": "https://{canondata_backend}/1920236/5e37b541c71c89b1b95dee0463a5a2e9bc5999f4/resource.tar.gz#test_sql2yql.test_match_recognize-all_rows_per_match_/sql.yql"
}
],
"test_sql2yql.test[match_recognize-greedy_quantifiers]": [
{
- "checksum": "41e90a3a986f9b2a7a36a83b918667cc",
- "size": 4352,
- "uri": "https://{canondata_backend}/1942173/99e88108149e222741552e7e6cddef041d6a2846/resource.tar.gz#test_sql2yql.test_match_recognize-greedy_quantifiers_/sql.yql"
+ "checksum": "a37f9b7a1bbf0e3ab25d265bfb1cdf95",
+ "size": 4214,
+ "uri": "https://{canondata_backend}/1920236/5e37b541c71c89b1b95dee0463a5a2e9bc5999f4/resource.tar.gz#test_sql2yql.test_match_recognize-greedy_quantifiers_/sql.yql"
+ }
+ ],
+ "test_sql2yql.test[match_recognize-measures_aggregate]": [
+ {
+ "checksum": "a5c9c77b24db505963aa1c7af3e9e9ba",
+ "size": 7401,
+ "uri": "https://{canondata_backend}/1920236/5e37b541c71c89b1b95dee0463a5a2e9bc5999f4/resource.tar.gz#test_sql2yql.test_match_recognize-measures_aggregate_/sql.yql"
}
],
"test_sql2yql.test[match_recognize-permute]": [
{
- "checksum": "cf3fb8aecbdea0008e9bacd9025e97c8",
- "size": 8409,
- "uri": "https://{canondata_backend}/1942173/99e88108149e222741552e7e6cddef041d6a2846/resource.tar.gz#test_sql2yql.test_match_recognize-permute_/sql.yql"
+ "checksum": "319494095f46f445b1334166cb139412",
+ "size": 5237,
+ "uri": "https://{canondata_backend}/1920236/5e37b541c71c89b1b95dee0463a5a2e9bc5999f4/resource.tar.gz#test_sql2yql.test_match_recognize-permute_/sql.yql"
}
],
"test_sql2yql.test[match_recognize-simple_paritioning-streaming]": [
{
- "checksum": "9971f2943b1f9fcf52dfdc36ceabeaf2",
- "size": 5067,
- "uri": "https://{canondata_backend}/1775319/f1fa0c55bf9f13cff57cf1c990c2330caed8eb1b/resource.tar.gz#test_sql2yql.test_match_recognize-simple_paritioning-streaming_/sql.yql"
+ "checksum": "517bd077b852e7e35f5ef61b6ce5ab2e",
+ "size": 4882,
+ "uri": "https://{canondata_backend}/1920236/5e37b541c71c89b1b95dee0463a5a2e9bc5999f4/resource.tar.gz#test_sql2yql.test_match_recognize-simple_paritioning-streaming_/sql.yql"
}
],
"test_sql2yql.test[match_recognize-simple_paritioning]": [
{
- "checksum": "edf5680780dd393593d27f08aff839e2",
- "size": 5036,
- "uri": "https://{canondata_backend}/1775319/f1fa0c55bf9f13cff57cf1c990c2330caed8eb1b/resource.tar.gz#test_sql2yql.test_match_recognize-simple_paritioning_/sql.yql"
+ "checksum": "07184284d8c30188d7345a28f0cb4c8d",
+ "size": 4851,
+ "uri": "https://{canondata_backend}/1920236/5e37b541c71c89b1b95dee0463a5a2e9bc5999f4/resource.tar.gz#test_sql2yql.test_match_recognize-simple_paritioning_/sql.yql"
}
],
"test_sql2yql.test[match_recognize-test_type-streaming]": [
{
- "checksum": "1efff3745da1ff0c1c7a784d84b6e773",
- "size": 9899,
- "uri": "https://{canondata_backend}/1775319/f1fa0c55bf9f13cff57cf1c990c2330caed8eb1b/resource.tar.gz#test_sql2yql.test_match_recognize-test_type-streaming_/sql.yql"
+ "checksum": "26d2b2f5b48b4f537d82a9c70f2509e5",
+ "size": 9394,
+ "uri": "https://{canondata_backend}/1920236/5e37b541c71c89b1b95dee0463a5a2e9bc5999f4/resource.tar.gz#test_sql2yql.test_match_recognize-test_type-streaming_/sql.yql"
}
],
"test_sql2yql.test[match_recognize-test_type]": [
{
- "checksum": "c3477e1413f59cd6ecff1b7b5b1f7cdc",
- "size": 9904,
- "uri": "https://{canondata_backend}/1775319/f1fa0c55bf9f13cff57cf1c990c2330caed8eb1b/resource.tar.gz#test_sql2yql.test_match_recognize-test_type_/sql.yql"
+ "checksum": "9782d694d28decb54e3194bddadcaf87",
+ "size": 9399,
+ "uri": "https://{canondata_backend}/1920236/5e37b541c71c89b1b95dee0463a5a2e9bc5999f4/resource.tar.gz#test_sql2yql.test_match_recognize-test_type_/sql.yql"
}
],
"test_sql2yql.test[match_recognize-test_type_predicate]": [
{
- "checksum": "1578ebf4c45fe97f2acf85bd83c5707d",
- "size": 3307,
- "uri": "https://{canondata_backend}/1775319/f1fa0c55bf9f13cff57cf1c990c2330caed8eb1b/resource.tar.gz#test_sql2yql.test_match_recognize-test_type_predicate_/sql.yql"
+ "checksum": "bebea49359906f66cdfff88c13f467ec",
+ "size": 3221,
+ "uri": "https://{canondata_backend}/1920236/5e37b541c71c89b1b95dee0463a5a2e9bc5999f4/resource.tar.gz#test_sql2yql.test_match_recognize-test_type_predicate_/sql.yql"
}
],
"test_sql2yql.test[optimizers-and_absorption]": [
@@ -10087,6 +10094,11 @@
"uri": "file://test_sql_format.test_match_recognize-greedy_quantifiers_/formatted.sql"
}
],
+ "test_sql_format.test[match_recognize-measures_aggregate]": [
+ {
+ "uri": "file://test_sql_format.test_match_recognize-measures_aggregate_/formatted.sql"
+ }
+ ],
"test_sql_format.test[match_recognize-permute]": [
{
"uri": "file://test_sql_format.test_match_recognize-permute_/formatted.sql"
@@ -10800,6 +10812,13 @@
"uri": "https://{canondata_backend}/1937429/434276f26b2857be3c5ad3fdbbf877d2bf775ac5/resource.tar.gz#test_sql_negative.test_action-parallel_for_values-default.txt_/err_file.out"
}
],
+ "test_sql_negative.test[aggr_factory-fail_on_match_recognize_specific-default.txt]": [
+ {
+ "checksum": "2eaa5b85aec56e50ab4209c4508ff432",
+ "size": 366,
+ "uri": "https://{canondata_backend}/1817427/6ce5bfb2e802fb52a4df6bc050f6fcc1af3305e1/resource.tar.gz#test_sql_negative.test_aggr_factory-fail_on_match_recognize_specific-default.txt_/err_file.out"
+ }
+ ],
"test_sql_negative.test[aggregate-fail_group_by_struct_member-default.txt]": [
{
"checksum": "18d827f5fa448873f5b2ad05b664be70",
@@ -10807,6 +10826,13 @@
"uri": "https://{canondata_backend}/1937429/434276f26b2857be3c5ad3fdbbf877d2bf775ac5/resource.tar.gz#test_sql_negative.test_aggregate-fail_group_by_struct_member-default.txt_/err_file.out"
}
],
+ "test_sql_negative.test[aggregate-fail_on_match_recognize_specific-default.txt]": [
+ {
+ "checksum": "d4b1dfac0c7ded90c1a932c0bca076a9",
+ "size": 252,
+ "uri": "https://{canondata_backend}/1817427/6ce5bfb2e802fb52a4df6bc050f6fcc1af3305e1/resource.tar.gz#test_sql_negative.test_aggregate-fail_on_match_recognize_specific-default.txt_/err_file.out"
+ }
+ ],
"test_sql_negative.test[aggregate-having_without_aggregation-default.txt]": [
{
"checksum": "59ad103860c119f608fcecc67fdf1973",
@@ -11024,6 +11050,13 @@
"uri": "https://{canondata_backend}/1937429/434276f26b2857be3c5ad3fdbbf877d2bf775ac5/resource.tar.gz#test_sql_negative.test_lambda-lambda_no_dollar_assign-default.txt_/err_file.out"
}
],
+ "test_sql_negative.test[match_recognize-measures_aggr_factory-default.txt]": [
+ {
+ "checksum": "e66a11807db413ca11443f09d81f13f7",
+ "size": 209,
+ "uri": "https://{canondata_backend}/1899731/ca0a5d07c2713bd45430516401913aa15dee4ba5/resource.tar.gz#test_sql_negative.test_match_recognize-measures_aggr_factory-default.txt_/err_file.out"
+ }
+ ],
"test_sql_negative.test[order_by-order_by_subquery-default.txt]": [
{
"checksum": "373a17f049d856da759cae5496b6c772",
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_match_recognize-measures_aggregate_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_match_recognize-measures_aggregate_/formatted.sql
new file mode 100644
index 0000000000..3178e73e9c
--- /dev/null
+++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_match_recognize-measures_aggregate_/formatted.sql
@@ -0,0 +1,49 @@
+PRAGMA FeatureR010 = 'prototype';
+
+$input = (
+ SELECT
+ *
+ FROM
+ AS_TABLE([
+ <|time: 0, value: 1u, name: 'A'|>,
+ <|time: 100, value: 2u, name: 'A'|>,
+ <|time: 200, value: 3u, name: 'B'|>,
+ <|time: 300, value: 3u, name: 'B'|>,
+ <|time: 400, value: 4u, name: 'A'|>,
+ <|time: 500, value: 5u, name: 'A'|>,
+ ])
+);
+
+SELECT
+ *
+FROM
+ $input MATCH_RECOGNIZE (
+ ORDER BY
+ CAST(time AS Timestamp)
+ MEASURES
+ SUM(A.value + 1u + LENGTH(A.name)) AS aggr_expr,
+ FIRST(A.value) AS first_a,
+ LAST(A.value) AS last_a,
+ COUNT(A.value) AS count_a,
+ COUNT(DISTINCT A.value) AS count_distinct_a,
+ AGGREGATE_LIST(A.value) AS aggrlist_a,
+ AGGREGATE_LIST_DISTINCT(A.value) AS aggrlist_distinct_a,
+ FIRST(B.value) AS first_b,
+ LAST(B.value) AS last_b,
+ COUNT(B.value) AS count_b,
+ COUNT(DISTINCT B.value) AS count_distinct_b,
+ AGGREGATE_LIST(B.value) AS aggrlist_b,
+ AGGREGATE_LIST_DISTINCT(B.value) AS aggrlist_distinct_b,
+ FIRST(C.value) AS first_c,
+ LAST(C.value) AS last_c,
+ COUNT(C.value) AS count_c,
+ COUNT(DISTINCT C.value) AS count_distinct_c,
+ AGGREGATE_LIST(C.value) AS aggrlist_c,
+ AGGREGATE_LIST_DISTINCT(C.value) AS aggrlist_distinct_c
+ PATTERN (A * B C * B A *)
+ DEFINE
+ A AS A.name == 'A' AND COALESCE(FIRST(B.value + 1u + LENGTH(B.name)) == 5, TRUE),
+ B AS B.name == 'B' AND FIRST(A.value + 1u + LENGTH(A.name)) == 3,
+ C AS C.name == 'C'
+ )
+;
diff --git a/yql/essentials/tests/sql/suites/aggr_factory/fail_on_match_recognize_specific.sqlx b/yql/essentials/tests/sql/suites/aggr_factory/fail_on_match_recognize_specific.sqlx
new file mode 100644
index 0000000000..92059f0a23
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/aggr_factory/fail_on_match_recognize_specific.sqlx
@@ -0,0 +1,14 @@
+$input = SELECT * FROM AS_TABLE([
+ <|time: 0, value: 1u, name: "A"|>,
+ <|time: 100, value: 2u, name: "A"|>,
+ <|time: 200, value: 3u, name: "B"|>,
+ <|time: 300, value: 3u, name: "B"|>,
+ <|time: 400, value: 4u, name: "A"|>,
+ <|time: 500, value: 5u, name: "A"|>,
+]);
+
+SELECT
+ AGGREGATE_BY(value, AggregationFactory("first")) as first,
+ AGGREGATE_BY(value, AggregationFactory("last")) as last
+FROM $input
+GROUP BY name;
diff --git a/yql/essentials/tests/sql/suites/aggregate/fail_on_match_recognize_specific.sqlx b/yql/essentials/tests/sql/suites/aggregate/fail_on_match_recognize_specific.sqlx
new file mode 100644
index 0000000000..79b8bf01ac
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/aggregate/fail_on_match_recognize_specific.sqlx
@@ -0,0 +1,14 @@
+$input = SELECT * FROM AS_TABLE([
+ <|time: 0, value: 1u, name: "A"|>,
+ <|time: 100, value: 2u, name: "A"|>,
+ <|time: 200, value: 3u, name: "B"|>,
+ <|time: 300, value: 3u, name: "B"|>,
+ <|time: 400, value: 4u, name: "A"|>,
+ <|time: 500, value: 5u, name: "A"|>,
+]);
+
+SELECT
+ FIRST(value) as first,
+ LAST(value) as last
+FROM $input
+GROUP BY name;
diff --git a/yql/essentials/tests/sql/suites/match_recognize/measures_aggr_factory.sqlx b/yql/essentials/tests/sql/suites/match_recognize/measures_aggr_factory.sqlx
new file mode 100644
index 0000000000..7d74c6792f
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/match_recognize/measures_aggr_factory.sqlx
@@ -0,0 +1,14 @@
+PRAGMA FeatureR010="prototype";
+
+$input = SELECT * FROM AS_TABLE([
+ <|time: 0, value: 1u, name: "A"|>,
+]);
+
+SELECT * FROM $input MATCH_RECOGNIZE (
+ ORDER BY CAST(time AS Timestamp)
+ MEASURES
+ AGGREGATE_BY(A.value, AggregationFactory("sum")) AS aggr_factory_expr
+ PATTERN (A)
+ DEFINE
+ A AS A.name = "A"
+);
diff --git a/yql/essentials/tests/sql/suites/match_recognize/measures_aggregate.sql b/yql/essentials/tests/sql/suites/match_recognize/measures_aggregate.sql
new file mode 100644
index 0000000000..eec70ff8bc
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/match_recognize/measures_aggregate.sql
@@ -0,0 +1,39 @@
+PRAGMA FeatureR010="prototype";
+
+$input = SELECT * FROM AS_TABLE([
+ <|time: 0, value: 1u, name: "A"|>,
+ <|time: 100, value: 2u, name: "A"|>,
+ <|time: 200, value: 3u, name: "B"|>,
+ <|time: 300, value: 3u, name: "B"|>,
+ <|time: 400, value: 4u, name: "A"|>,
+ <|time: 500, value: 5u, name: "A"|>,
+]);
+
+SELECT * FROM $input MATCH_RECOGNIZE (
+ ORDER BY CAST(time AS Timestamp)
+ MEASURES
+ SUM(A.value + 1u + LENGTH(A.name)) AS aggr_expr,
+ FIRST(A.value) AS first_a,
+ LAST(A.value) AS last_a,
+ COUNT(A.value) AS count_a,
+ COUNT(DISTINCT A.value) AS count_distinct_a,
+ AGGREGATE_LIST(A.value) AS aggrlist_a,
+ AGGREGATE_LIST_DISTINCT(A.value) AS aggrlist_distinct_a,
+ FIRST(B.value) AS first_b,
+ LAST(B.value) AS last_b,
+ COUNT(B.value) AS count_b,
+ COUNT(DISTINCT B.value) AS count_distinct_b,
+ AGGREGATE_LIST(B.value) AS aggrlist_b,
+ AGGREGATE_LIST_DISTINCT(B.value) AS aggrlist_distinct_b,
+ FIRST(C.value) AS first_c,
+ LAST(C.value) AS last_c,
+ COUNT(C.value) AS count_c,
+ COUNT(DISTINCT C.value) AS count_distinct_c,
+ AGGREGATE_LIST(C.value) AS aggrlist_c,
+ AGGREGATE_LIST_DISTINCT(C.value) AS aggrlist_distinct_c
+ PATTERN (A* B C* B A*)
+ DEFINE
+ A AS A.name = "A" AND COALESCE(FIRST(B.value + 1u + LENGTH(B.name)) = 5, TRUE),
+ B AS B.name = "B" AND FIRST(A.value + 1u + LENGTH(A.name)) = 3,
+ C AS C.name = "C"
+);