diff options
author | vokayndzop <vokayndzop@yandex-team.com> | 2025-01-29 16:25:37 +0300 |
---|---|---|
committer | vokayndzop <vokayndzop@yandex-team.com> | 2025-01-29 17:18:25 +0300 |
commit | be07767ad39d693f1f8165c85adf05f925ff84bf (patch) | |
tree | 827003dadb10987e5aba89309eddb7902034ca31 /yql/essentials/sql/v1/sql_match_recognize.cpp | |
parent | 6a31c2b82c0136d2a116b7be4e155d1898d4e7eb (diff) | |
download | ydb-be07767ad39d693f1f8165c85adf05f925ff84bf.tar.gz |
MR: aggregation in MEASURES
commit_hash:5357736eed7a221ff5844d4351abe23e65930632
Diffstat (limited to 'yql/essentials/sql/v1/sql_match_recognize.cpp')
-rw-r--r-- | yql/essentials/sql/v1/sql_match_recognize.cpp | 662 |
1 files changed, 381 insertions, 281 deletions
diff --git a/yql/essentials/sql/v1/sql_match_recognize.cpp b/yql/essentials/sql/v1/sql_match_recognize.cpp index 41415b7f23..6fcd56f4d6 100644 --- a/yql/essentials/sql/v1/sql_match_recognize.cpp +++ b/yql/essentials/sql/v1/sql_match_recognize.cpp @@ -1,367 +1,467 @@ #include "sql_match_recognize.h" + #include "node.h" #include "sql_expression.h" + #include <yql/essentials/core/sql_types/match_recognize.h> -#include <algorithm> namespace NSQLTranslationV1 { -using namespace NSQLv1Generated; - -namespace { - -TPosition TokenPosition(const TToken& token){ - return TPosition{token.GetColumn(), token.GetLine()}; -} - -TString PatternVar(const TRule_row_pattern_variable_name& node, TSqlMatchRecognizeClause& ctx){ - return Id(node.GetRule_identifier1(), ctx); -} - -} //namespace +TSqlMatchRecognizeClause::TSqlMatchRecognizeClause(TContext& ctx, NSQLTranslation::ESqlMode mode) : TSqlTranslation(ctx, mode) {} TMatchRecognizeBuilderPtr TSqlMatchRecognizeClause::CreateBuilder(const NSQLv1Generated::TRule_row_pattern_recognition_clause &matchRecognizeClause) { - TPosition pos(matchRecognizeClause.GetToken1().GetColumn(), matchRecognizeClause.GetToken1().GetLine()); + auto pos = GetPos(matchRecognizeClause.GetToken1()); if (!Ctx.FeatureR010) { Ctx.Error(pos, TIssuesIds::CORE) << "Unexpected MATCH_RECOGNIZE"; return {}; } - TVector<TNamedFunction> partitioners; - TPosition partitionsPos = pos; - if (matchRecognizeClause.HasBlock3()) { - const auto& partitionClause = matchRecognizeClause.GetBlock3().GetRule_window_partition_clause1(); - partitionsPos = TokenPosition(partitionClause.GetToken1()); - partitioners = ParsePartitionBy(partitionClause); - if (!partitioners) - return {}; - } - TVector<TSortSpecificationPtr> sortSpecs; - TPosition orderByPos = pos; - if (matchRecognizeClause.HasBlock4()) { - const auto& orderByClause = matchRecognizeClause.GetBlock4().GetRule_order_by_clause1(); - orderByPos = TokenPosition(orderByClause.GetToken1()); - if (!OrderByClause(orderByClause, sortSpecs)) { - return {}; - } - } - TPosition measuresPos = pos; - TVector<TNamedFunction> measures; - if (matchRecognizeClause.HasBlock5()) { - const auto& measuresClause = matchRecognizeClause.GetBlock5().GetRule_row_pattern_measures1(); - measuresPos = TokenPosition(measuresClause.GetToken1()); - measures = ParseMeasures(measuresClause.GetRule_row_pattern_measure_list2()); + auto [partitionKeySelector, partitionColumns] = ParsePartitionBy( + pos, + matchRecognizeClause.HasBlock3() + ? std::addressof(matchRecognizeClause.GetBlock3().GetRule_window_partition_clause1()) + : nullptr + ); + + auto sortSpecs = ParseOrderBy( + matchRecognizeClause.HasBlock4() + ? std::addressof(matchRecognizeClause.GetBlock4().GetRule_order_by_clause1()) + : nullptr + ); + if (!sortSpecs) { + return {}; } - auto rowsPerMatch = std::pair {pos, NYql::NMatchRecognize::ERowsPerMatch::OneRow}; - if (matchRecognizeClause.HasBlock6()) { - rowsPerMatch = ParseRowsPerMatch(matchRecognizeClause.GetBlock6().GetRule_row_pattern_rows_per_match1()); + auto measures = ParseMeasures( + matchRecognizeClause.HasBlock5() + ? std::addressof(matchRecognizeClause.GetBlock5().GetRule_row_pattern_measures1().GetRule_row_pattern_measure_list2()) + : nullptr + ); + + auto rowsPerMatch = ParseRowsPerMatch( + pos, + matchRecognizeClause.HasBlock6() + ? std::addressof(matchRecognizeClause.GetBlock6().GetRule_row_pattern_rows_per_match1()) + : nullptr + ); + if (!rowsPerMatch) { + return {}; } const auto& commonSyntax = matchRecognizeClause.GetRule_row_pattern_common_syntax7(); - if (commonSyntax.HasBlock2()) { const auto& initialOrSeek = commonSyntax.GetBlock2().GetRule_row_pattern_initial_or_seek1(); - Ctx.Error(TokenPosition(initialOrSeek.GetToken1())) << "InitialOrSeek subclause is not allowed in FROM clause"; + Ctx.Error(GetPos(initialOrSeek.GetToken1())) << "InitialOrSeek subclause is not allowed in FROM clause"; return {}; } - auto pattern = ParsePattern(commonSyntax.GetRule_row_pattern5()); - const auto& patternPos = TokenPosition(commonSyntax.token3()); - - //this block is located before pattern block in grammar, - // but depends on it, so it is processed after pattern block - std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo> skipTo { - pos, - NYql::NMatchRecognize::TAfterMatchSkipTo{ - NYql::NMatchRecognize::EAfterMatchSkipTo::PastLastRow, - TString() - } - }; - if (commonSyntax.HasBlock1()){ - skipTo = ParseAfterMatchSkipTo(commonSyntax.GetBlock1().GetRule_row_pattern_skip_to3()); - const auto varRequired = - NYql::NMatchRecognize::EAfterMatchSkipTo::ToFirst == skipTo.second.To || - NYql::NMatchRecognize::EAfterMatchSkipTo::ToLast == skipTo.second.To || - NYql::NMatchRecognize::EAfterMatchSkipTo::To == skipTo.second.To; - if (varRequired) { - const auto& allVars = NYql::NMatchRecognize::GetPatternVars(pattern); - if (allVars.find(skipTo.second.Var) == allVars.cend()) { - Ctx.Error(skipTo.first) << "Unknown pattern variable in AFTER MATCH"; - return {}; - } - } + PatternVarNames.clear(); + PatternVars = BuildList(pos); + auto pattern = ParsePattern(pos, commonSyntax.GetRule_row_pattern5(), 0, true); + if (!pattern) { + return {}; } + auto skipTo = ParseAfterMatchSkipTo( + pos, + commonSyntax.HasBlock1() + ? std::addressof(commonSyntax.GetBlock1().GetRule_row_pattern_skip_to3()) + : nullptr + ); + if (!skipTo) { + return {}; + } - TNodePtr subset; - TPosition subsetPos = pos; - if (commonSyntax.HasBlock7()) { - const auto& rowPatternSubset = commonSyntax.GetBlock7().GetRule_row_pattern_subset_clause1(); - subsetPos = TokenPosition(rowPatternSubset.GetToken1()); - Ctx.Error() << "SUBSET is not implemented yet"; - //TODO https://st.yandex-team.ru/YQL-16225 + auto subset = ParseSubset( + pos, + commonSyntax.HasBlock7() + ? std::addressof(commonSyntax.GetBlock7().GetRule_row_pattern_subset_clause1()) + : nullptr + ); + if (!subset) { return {}; } - const auto& definitions = ParseDefinitions(commonSyntax.GetRule_row_pattern_definition_list9()); - const auto& definitionsPos = TokenPosition(commonSyntax.GetToken8()); - const auto& rowPatternVariables = GetPatternVars(pattern); + auto definitions = ParseDefinitions(commonSyntax.GetRule_row_pattern_definition_list9()); for (const auto& [callable, name]: definitions) { - if (!rowPatternVariables.contains(name)) { + if (!PatternVarNames.contains(name)) { Ctx.Error(callable->GetPos()) << "ROW PATTERN VARIABLE " << name << " is defined, but not mentioned in the PATTERN"; return {}; } } - return new TMatchRecognizeBuilder{ + return new TMatchRecognizeBuilder( pos, - std::pair{partitionsPos, std::move(partitioners)}, - std::pair{orderByPos, std::move(sortSpecs)}, - std::pair{measuresPos, measures}, + std::move(partitionKeySelector), + std::move(partitionColumns), + std::move(*sortSpecs), + std::move(measures), std::move(rowsPerMatch), std::move(skipTo), - std::pair{patternPos, std::move(pattern)}, - std::pair{subsetPos, std::move(subset)}, - std::pair{definitionsPos, std::move(definitions)} - }; - + std::move(pattern), + std::move(PatternVars), + std::move(*subset), + std::move(definitions) + ); +} +std::tuple<TNodePtr, TNodePtr> TSqlMatchRecognizeClause::ParsePartitionBy(TPosition pos, const TRule_window_partition_clause* node) { + auto [partitionKeySelector, partitionColumns] = [&]() -> std::tuple<TNodePtr, TNodePtr> { + auto partitionKeySelector = BuildList(pos); + auto partitionColumns = BuildList(pos); + if (!node) { + return {partitionKeySelector, partitionColumns}; + } + TColumnRefScope scope(Ctx, EColumnRefState::Allow); + TVector<TNodePtr> partitionExprs; + if (!NamedExprList(node->GetRule_named_expr_list4(), partitionExprs)) { + return {partitionKeySelector, partitionColumns}; + } + for (const auto& p : partitionExprs) { + auto label = p->GetLabel(); + if (!label && p->GetColumnName()) { + label = *p->GetColumnName(); + } + partitionKeySelector->Add(p); + partitionColumns->Add(BuildQuotedAtom(p->GetPos(), label)); + } + return {partitionKeySelector, partitionColumns}; + }(); + return { + BuildLambda(pos, BuildList(pos, {BuildAtom(pos, "row")}), BuildQuote(pos, std::move(partitionKeySelector))), + BuildQuote(pos, std::move(partitionColumns)) + }; } -TVector<TNamedFunction> TSqlMatchRecognizeClause::ParsePartitionBy(const TRule_window_partition_clause& partitionClause) { - TColumnRefScope scope(Ctx, EColumnRefState::Allow); - TVector<TNodePtr> partitionExprs; - if (!NamedExprList( - partitionClause.GetRule_named_expr_list4(), - partitionExprs)) { - return {}; +TMaybe<TVector<TSortSpecificationPtr>> TSqlMatchRecognizeClause::ParseOrderBy(const TRule_order_by_clause* node) { + if (!node) { + return TVector<TSortSpecificationPtr>{}; } - TVector<TNamedFunction> partitioners; - for (const auto& p: partitionExprs) { - auto label = p->GetLabel(); - if (!label && p->GetColumnName()) { - label = *p->GetColumnName(); - } - partitioners.push_back(TNamedFunction{p, label}); + TVector<TSortSpecificationPtr> result; + if (!OrderByClause(*node, result)) { + return {}; } - return partitioners; + return result; } TNamedFunction TSqlMatchRecognizeClause::ParseOneMeasure(const TRule_row_pattern_measure_definition& node) { - TColumnRefScope scope(Ctx, EColumnRefState::MatchRecognize); - const auto& expr = TSqlExpression(Ctx, Mode).Build(node.GetRule_expr1()); - const auto& name = Id(node.GetRule_an_id3(), *this); - //Each measure must be a lambda, that accepts 2 args: + TColumnRefScope scope(Ctx, EColumnRefState::MatchRecognizeMeasures); + auto callable = TSqlExpression(Ctx, Mode).Build(node.GetRule_expr1()); + auto measureName = Id(node.GetRule_an_id3(), *this); + // Each measure must be a lambda, that accepts 2 args: // - List<InputTableColumns + _yql_Classifier, _yql_MatchNumber> // - Struct that maps row pattern variables to ranges in the queue - return {expr, name}; + return {std::move(callable), std::move(measureName)}; } -TVector<TNamedFunction> TSqlMatchRecognizeClause::ParseMeasures(const TRule_row_pattern_measure_list& node) { - TVector<TNamedFunction> result{ ParseOneMeasure(node.GetRule_row_pattern_measure_definition1()) }; - for (const auto& m: node.GetBlock2()) { +TVector<TNamedFunction> TSqlMatchRecognizeClause::ParseMeasures(const TRule_row_pattern_measure_list* node) { + if (!node) { + return {}; + } + TVector<TNamedFunction> result{ParseOneMeasure(node->GetRule_row_pattern_measure_definition1())}; + for (const auto& m: node->GetBlock2()) { result.push_back(ParseOneMeasure(m.GetRule_row_pattern_measure_definition2())); } return result; } -std::pair<TPosition, NYql::NMatchRecognize::ERowsPerMatch> TSqlMatchRecognizeClause::ParseRowsPerMatch(const TRule_row_pattern_rows_per_match& rowsPerMatchClause) { - - switch(rowsPerMatchClause.GetAltCase()) { - case TRule_row_pattern_rows_per_match::kAltRowPatternRowsPerMatch1: - return std::pair { - TokenPosition(rowsPerMatchClause.GetAlt_row_pattern_rows_per_match1().GetToken1()), - NYql::NMatchRecognize::ERowsPerMatch::OneRow - }; - case TRule_row_pattern_rows_per_match::kAltRowPatternRowsPerMatch2: - return std::pair { - TokenPosition(rowsPerMatchClause.GetAlt_row_pattern_rows_per_match2().GetToken1()), - NYql::NMatchRecognize::ERowsPerMatch::AllRows - }; +TNodePtr TSqlMatchRecognizeClause::ParseRowsPerMatch(TPosition pos, const TRule_row_pattern_rows_per_match* node) { + const auto result = [&]() -> NYql::NMatchRecognize::ERowsPerMatch { + if (!node) { + return NYql::NMatchRecognize::ERowsPerMatch::OneRow; + } + switch (node->GetAltCase()) { + case TRule_row_pattern_rows_per_match::kAltRowPatternRowsPerMatch1: { + const auto& rowsPerMatch = node->GetAlt_row_pattern_rows_per_match1(); + pos = GetPos(rowsPerMatch.GetToken1()); + return NYql::NMatchRecognize::ERowsPerMatch::OneRow; + } + case TRule_row_pattern_rows_per_match::kAltRowPatternRowsPerMatch2: { + const auto& rowsPerMatch = node->GetAlt_row_pattern_rows_per_match2(); + pos = GetPos(rowsPerMatch.GetToken1()); + return NYql::NMatchRecognize::ERowsPerMatch::AllRows; + } case TRule_row_pattern_rows_per_match::ALT_NOT_SET: Y_ABORT("You should change implementation according to grammar changes"); - } + } + }(); + return BuildQuotedAtom(pos, "RowsPerMatch_" + ToString(result)); } -std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo> TSqlMatchRecognizeClause::ParseAfterMatchSkipTo(const TRule_row_pattern_skip_to& skipToClause) { - switch (skipToClause.GetAltCase()) { - case TRule_row_pattern_skip_to::kAltRowPatternSkipTo1: - return std::pair{ - TokenPosition(skipToClause.GetAlt_row_pattern_skip_to1().GetToken1()), - NYql::NMatchRecognize::TAfterMatchSkipTo{NYql::NMatchRecognize::EAfterMatchSkipTo::NextRow, ""} - }; - case TRule_row_pattern_skip_to::kAltRowPatternSkipTo2: - return std::pair{ - TokenPosition(skipToClause.GetAlt_row_pattern_skip_to2().GetToken1()), - NYql::NMatchRecognize::TAfterMatchSkipTo{NYql::NMatchRecognize::EAfterMatchSkipTo::PastLastRow, ""} - }; - case TRule_row_pattern_skip_to::kAltRowPatternSkipTo3: - return std::pair{ - TokenPosition(skipToClause.GetAlt_row_pattern_skip_to3().GetToken1()), - NYql::NMatchRecognize::TAfterMatchSkipTo{ - NYql::NMatchRecognize::EAfterMatchSkipTo::ToFirst, - skipToClause.GetAlt_row_pattern_skip_to3().GetRule_row_pattern_skip_to_variable_name4().GetRule_row_pattern_variable_name1().GetRule_identifier1().GetToken1().GetValue() - } - }; - case TRule_row_pattern_skip_to::kAltRowPatternSkipTo4: - return std::pair{ - TokenPosition(skipToClause.GetAlt_row_pattern_skip_to4().GetToken1()), - NYql::NMatchRecognize::TAfterMatchSkipTo{ - NYql::NMatchRecognize::EAfterMatchSkipTo::ToLast, - skipToClause.GetAlt_row_pattern_skip_to4().GetRule_row_pattern_skip_to_variable_name4().GetRule_row_pattern_variable_name1().GetRule_identifier1().GetToken1().GetValue() - } - }; - case TRule_row_pattern_skip_to::kAltRowPatternSkipTo5: - return std::pair{ - TokenPosition(skipToClause.GetAlt_row_pattern_skip_to5().GetToken1()), - NYql::NMatchRecognize::TAfterMatchSkipTo{ - NYql::NMatchRecognize::EAfterMatchSkipTo::To, - skipToClause.GetAlt_row_pattern_skip_to5().GetRule_row_pattern_skip_to_variable_name3().GetRule_row_pattern_variable_name1().GetRule_identifier1().GetToken1().GetValue() - } - }; +TNodePtr TSqlMatchRecognizeClause::ParseAfterMatchSkipTo(TPosition pos, const TRule_row_pattern_skip_to* node) { + auto skipToPos = pos; + auto varPos = pos; + const auto result = [&]() -> TMaybe<NYql::NMatchRecognize::TAfterMatchSkipTo> { + if (!node) { + return NYql::NMatchRecognize::TAfterMatchSkipTo{NYql::NMatchRecognize::EAfterMatchSkipTo::PastLastRow, ""}; + } + switch (node->GetAltCase()) { + case TRule_row_pattern_skip_to::kAltRowPatternSkipTo1: { + const auto& skipTo = node->GetAlt_row_pattern_skip_to1(); + skipToPos = GetPos(skipTo.GetToken1()); + return NYql::NMatchRecognize::TAfterMatchSkipTo{NYql::NMatchRecognize::EAfterMatchSkipTo::NextRow, ""}; + } + case TRule_row_pattern_skip_to::kAltRowPatternSkipTo2: { + const auto& skipTo = node->GetAlt_row_pattern_skip_to2(); + skipToPos = GetPos(skipTo.GetToken1()); + return NYql::NMatchRecognize::TAfterMatchSkipTo{NYql::NMatchRecognize::EAfterMatchSkipTo::PastLastRow, ""}; + } + case TRule_row_pattern_skip_to::kAltRowPatternSkipTo3: { + const auto& skipTo = node->GetAlt_row_pattern_skip_to3(); + skipToPos = GetPos(skipTo.GetToken1()); + const auto& identifier = skipTo.GetRule_row_pattern_skip_to_variable_name4().GetRule_row_pattern_variable_name1().GetRule_identifier1(); + auto var = identifier.GetToken1().GetValue(); + varPos = GetPos(identifier.GetToken1()); + if (!PatternVarNames.contains(var)) { + Ctx.Error(varPos) << "Unknown pattern variable in AFTER MATCH SKIP TO FIRST"; + return {}; + } + return NYql::NMatchRecognize::TAfterMatchSkipTo{NYql::NMatchRecognize::EAfterMatchSkipTo::ToFirst, std::move(var)}; + } + case TRule_row_pattern_skip_to::kAltRowPatternSkipTo4: { + const auto& skipTo = node->GetAlt_row_pattern_skip_to4(); + skipToPos = GetPos(skipTo.GetToken1()); + const auto& identifier = skipTo.GetRule_row_pattern_skip_to_variable_name4().GetRule_row_pattern_variable_name1().GetRule_identifier1(); + auto var = identifier.GetToken1().GetValue(); + varPos = GetPos(identifier.GetToken1()); + if (!PatternVarNames.contains(var)) { + Ctx.Error(varPos) << "Unknown pattern variable in AFTER MATCH SKIP TO LAST"; + return {}; + } + return NYql::NMatchRecognize::TAfterMatchSkipTo{NYql::NMatchRecognize::EAfterMatchSkipTo::ToLast, std::move(var)}; + } + case TRule_row_pattern_skip_to::kAltRowPatternSkipTo5: { + const auto& skipTo = node->GetAlt_row_pattern_skip_to5(); + skipToPos = GetPos(skipTo.GetToken1()); + const auto& identifier = skipTo.GetRule_row_pattern_skip_to_variable_name3().GetRule_row_pattern_variable_name1().GetRule_identifier1(); + auto var = identifier.GetToken1().GetValue(); + varPos = GetPos(identifier.GetToken1()); + if (!PatternVarNames.contains(var)) { + Ctx.Error(varPos) << "Unknown pattern variable in AFTER MATCH SKIP TO"; + return {}; + } + return NYql::NMatchRecognize::TAfterMatchSkipTo{NYql::NMatchRecognize::EAfterMatchSkipTo::To, std::move(var)}; + } case TRule_row_pattern_skip_to::ALT_NOT_SET: Y_ABORT("You should change implementation according to grammar changes"); + } + }(); + if (!result) { + return {}; } + return BuildTuple(pos, { + BuildQuotedAtom(skipToPos, "AfterMatchSkip_" + ToString(result->To)), + BuildQuotedAtom(varPos, std::move(result->Var)) + }); } -NYql::NMatchRecognize::TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTerm(const TRule_row_pattern_term& node, size_t patternNestingLevel, bool outputArg) { - NYql::NMatchRecognize::TRowPatternTerm term; - TPosition pos; - for (const auto& factor: node.GetBlock1()) { - const auto& primaryVar = factor.GetRule_row_pattern_factor1().GetRule_row_pattern_primary1(); - NYql::NMatchRecognize::TRowPatternPrimary primary; - bool output = outputArg; - switch (primaryVar.GetAltCase()) { - case TRule_row_pattern_primary::kAltRowPatternPrimary1: - primary = PatternVar(primaryVar.GetAlt_row_pattern_primary1().GetRule_row_pattern_primary_variable_name1().GetRule_row_pattern_variable_name1(), *this); - break; - case TRule_row_pattern_primary::kAltRowPatternPrimary2: - primary = primaryVar.GetAlt_row_pattern_primary2().GetToken1().GetValue(); - Y_ENSURE("$" == std::get<0>(primary)); - break; - case TRule_row_pattern_primary::kAltRowPatternPrimary3: - primary = primaryVar.GetAlt_row_pattern_primary3().GetToken1().GetValue(); - Y_ENSURE("^" == std::get<0>(primary)); - break; - case TRule_row_pattern_primary::kAltRowPatternPrimary4: { - if (patternNestingLevel <= NYql::NMatchRecognize::MaxPatternNesting) { - primary = ParsePattern(primaryVar.GetAlt_row_pattern_primary4().GetBlock2().GetRule_row_pattern1(), patternNestingLevel + 1, output); - } else { - Ctx.Error(TokenPosition(primaryVar.GetAlt_row_pattern_primary4().GetToken1())) - << "To big nesting level in the pattern"; - return NYql::NMatchRecognize::TRowPatternTerm{}; - } - break; +TNodePtr TSqlMatchRecognizeClause::BuildPatternFactor(TPosition pos, TNodePtr primary, std::tuple<ui64, ui64, bool, bool, bool> quantifier) { + return std::apply([&](const auto& ...args) { + return BuildTuple(pos, {std::move(primary), BuildQuotedAtom(pos, ToString(args))...}); + }, quantifier); +} + +TNodePtr TSqlMatchRecognizeClause::ParsePatternFactor(TPosition pos, const TRule_row_pattern_factor& node, size_t nestingLevel, bool output) { + if (nestingLevel > MaxPatternNesting) { + Ctx.Error(pos) << "To big nesting level in the pattern"; + return {}; + } + auto primary = [&]() -> TNodePtr { + const auto& primaryAlt = node.GetRule_row_pattern_primary1(); + switch (primaryAlt.GetAltCase()) { + case TRule_row_pattern_primary::kAltRowPatternPrimary1: { + const auto& primary = primaryAlt.GetAlt_row_pattern_primary1(); + const auto& identifier = primary.GetRule_row_pattern_primary_variable_name1().GetRule_row_pattern_variable_name1().GetRule_identifier1(); + const auto varName = Id(identifier, *this); + const auto var = BuildQuotedAtom(GetPos(identifier.GetToken1()), varName); + if (PatternVarNames.insert(varName).second) { + PatternVars->Add(var); } - case TRule_row_pattern_primary::kAltRowPatternPrimary5: - output = false; - primary = ParsePattern(primaryVar.GetAlt_row_pattern_primary5().GetRule_row_pattern3(), patternNestingLevel + 1, output); - break; - case TRule_row_pattern_primary::kAltRowPatternPrimary6: { - std::vector<NYql::NMatchRecognize::TRowPatternPrimary> items{ParsePattern( - primaryVar.GetAlt_row_pattern_primary6().GetRule_row_pattern_permute1().GetRule_row_pattern3(), patternNestingLevel + 1, output) - }; - for (const auto& p: primaryVar.GetAlt_row_pattern_primary6().GetRule_row_pattern_permute1().GetBlock4()) { - items.push_back(ParsePattern(p.GetRule_row_pattern2(), patternNestingLevel + 1, output)); - } - //Permutations now is a syntactic sugar and converted to all possible alternatives - if (items.size() > NYql::NMatchRecognize::MaxPermutedItems) { - Ctx.Error(TokenPosition(primaryVar.GetAlt_row_pattern_primary4().GetToken1())) - << "Too many items in permute"; - return NYql::NMatchRecognize::TRowPatternTerm{}; - } - std::vector<size_t> indexes(items.size()); - std::generate(begin(indexes), end(indexes), [n = 0] () mutable { return n++; }); - NYql::NMatchRecognize::TRowPattern permuted; - do { - NYql::NMatchRecognize::TRowPatternTerm term; - term.reserve(indexes.size()); - for (size_t i = 0; i != indexes.size(); ++i) { - term.push_back({items[indexes[i]], 1, 1, true, false, false}); - } - permuted.push_back(std::move(term)); - } while (std::next_permutation(indexes.begin(), indexes.end())); - primary = permuted; - break; + return var; + } + case TRule_row_pattern_primary::kAltRowPatternPrimary2: { + const auto& primary = primaryAlt.GetAlt_row_pattern_primary2(); + const auto& token = primary.GetToken1(); + const auto varName = token.GetValue(); + const auto var = BuildQuotedAtom(GetPos(token), varName); + if (PatternVarNames.insert(varName).second) { + PatternVars->Add(var); } - case TRule_row_pattern_primary::ALT_NOT_SET: - Y_ABORT("You should change implementation according to grammar changes"); + return var; } - uint64_t quantityMin = 1; - uint64_t quantityMax = 1; - constexpr uint64_t infinity = std::numeric_limits<uint64_t>::max(); - bool greedy = true; - if (factor.GetRule_row_pattern_factor1().HasBlock2()) { - const auto& quantifier = factor.GetRule_row_pattern_factor1().GetBlock2().GetRule_row_pattern_quantifier1(); - switch(quantifier.GetAltCase()){ - case TRule_row_pattern_quantifier::kAltRowPatternQuantifier1: //* - quantityMin = 0; - quantityMax = infinity; - greedy = !quantifier.GetAlt_row_pattern_quantifier1().HasBlock2(); - break; - case TRule_row_pattern_quantifier::kAltRowPatternQuantifier2: //+ - quantityMax = infinity; - greedy = !quantifier.GetAlt_row_pattern_quantifier2().HasBlock2(); - break; - case TRule_row_pattern_quantifier::kAltRowPatternQuantifier3: //? - quantityMin = 0; - greedy = !quantifier.GetAlt_row_pattern_quantifier3().HasBlock2(); - break; - case TRule_row_pattern_quantifier::kAltRowPatternQuantifier4: //{ 2?, 4?} - if (quantifier.GetAlt_row_pattern_quantifier4().HasBlock2()) { - quantityMin = FromString(quantifier.GetAlt_row_pattern_quantifier4().GetBlock2().GetRule_integer1().GetToken1().GetValue()); - } - else { - quantityMin = 0;; - } - if (quantifier.GetAlt_row_pattern_quantifier4().HasBlock4()) { - quantityMax = FromString(quantifier.GetAlt_row_pattern_quantifier4().GetBlock4().GetRule_integer1().GetToken1().GetValue()); - } - else { - quantityMax = infinity; - } - greedy = !quantifier.GetAlt_row_pattern_quantifier4().HasBlock6(); - - break; - case TRule_row_pattern_quantifier::kAltRowPatternQuantifier5: - quantityMin = quantityMax = FromString(quantifier.GetAlt_row_pattern_quantifier5().GetRule_integer2().GetToken1().GetValue()); - break; - case TRule_row_pattern_quantifier::ALT_NOT_SET: - Y_ABORT("You should change implementation according to grammar changes"); + case TRule_row_pattern_primary::kAltRowPatternPrimary3: { + const auto& primary = primaryAlt.GetAlt_row_pattern_primary3(); + const auto& token = primary.GetToken1(); + const auto varName = token.GetValue(); + const auto var = BuildQuotedAtom(GetPos(token), varName); + if (PatternVarNames.insert(varName).second) { + PatternVars->Add(var); } + return var; + } + case TRule_row_pattern_primary::kAltRowPatternPrimary4: { + const auto& primary = primaryAlt.GetAlt_row_pattern_primary4(); + return ParsePattern(pos, primary.GetBlock2().GetRule_row_pattern1(), nestingLevel + 1, output); + } + case TRule_row_pattern_primary::kAltRowPatternPrimary5: { + const auto& primary = primaryAlt.GetAlt_row_pattern_primary5(); + output = false; + return ParsePattern(pos, primary.GetRule_row_pattern3(), nestingLevel + 1, output); + } + case TRule_row_pattern_primary::kAltRowPatternPrimary6: { + const auto& primary = primaryAlt.GetAlt_row_pattern_primary6(); + std::vector<TNodePtr> items{ + ParsePattern(pos, primary.GetRule_row_pattern_permute1().GetRule_row_pattern3(), nestingLevel + 1, output) + }; + for (const auto& p: primary.GetRule_row_pattern_permute1().GetBlock4()) { + items.push_back(ParsePattern(pos, p.GetRule_row_pattern2(), nestingLevel + 1, output)); + } + if (items.size() > MaxPermutedItems) { + Ctx.Error(GetPos(primary.GetRule_row_pattern_permute1().GetToken1())) << "Too many items in permute"; + return {}; + } + std::vector<size_t> indexes(items.size()); + Iota(indexes.begin(), indexes.end(), 0); + std::vector<TNodePtr> result; + do { + std::vector<TNodePtr> term; + term.reserve(items.size()); + for (auto index : indexes) { + term.push_back(BuildPatternFactor(pos, items[index], std::tuple{1, 1, true, output, false})); + } + result.push_back(BuildPatternTerm(pos, std::move(term))); + } while (std::next_permutation(indexes.begin(), indexes.end())); + return BuildPattern(pos, std::move(result)); + } + case TRule_row_pattern_primary::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); } - term.push_back(NYql::NMatchRecognize::TRowPatternFactor{std::move(primary), quantityMin, quantityMax, greedy, output, false}); + }(); + if (!primary) { + return {}; } - return term; + + const auto quantifier = [&]() { + if (!node.HasBlock2()) { + const auto quantity = static_cast<ui64>(1); + return std::tuple{quantity, quantity, true, output, false}; + } + const auto& quantifierAlt = node.GetBlock2().GetRule_row_pattern_quantifier1(); + switch (quantifierAlt.GetAltCase()) { + case TRule_row_pattern_quantifier::kAltRowPatternQuantifier1: { // * + const auto& quantifier = quantifierAlt.GetAlt_row_pattern_quantifier1(); + pos = GetPos(quantifier.GetToken1()); + return std::tuple{static_cast<ui64>(0), static_cast<ui64>(Max()), !quantifier.HasBlock2(), output, false}; + } + case TRule_row_pattern_quantifier::kAltRowPatternQuantifier2: { // + + const auto& quantifier = quantifierAlt.GetAlt_row_pattern_quantifier2(); + pos = GetPos(quantifier.GetToken1()); + return std::tuple{static_cast<ui64>(1), static_cast<ui64>(Max()), !quantifier.HasBlock2(), output, false}; + } + case TRule_row_pattern_quantifier::kAltRowPatternQuantifier3: { // ? + const auto& quantifier = quantifierAlt.GetAlt_row_pattern_quantifier3(); + pos = GetPos(quantifier.GetToken1()); + return std::tuple{static_cast<ui64>(0), static_cast<ui64>(1), !quantifier.HasBlock2(), output, false}; + } + case TRule_row_pattern_quantifier::kAltRowPatternQuantifier4: { // {n?, m?} + const auto& quantifier = quantifierAlt.GetAlt_row_pattern_quantifier4(); + pos = GetPos(quantifier.GetToken1()); + return std::tuple{ + quantifier.HasBlock2() + ? FromString(quantifier.GetBlock2().GetRule_integer1().GetToken1().GetValue()) + : static_cast<ui64>(0), + quantifier.HasBlock4() + ? FromString(quantifier.GetBlock4().GetRule_integer1().GetToken1().GetValue()) + : static_cast<ui64>(Max()), + !quantifier.HasBlock6(), + output, + false + }; + } + case TRule_row_pattern_quantifier::kAltRowPatternQuantifier5: { // {n} + const auto quantifier = quantifierAlt.GetAlt_row_pattern_quantifier5(); + pos = GetPos(quantifier.GetToken1()); + const auto quantity = static_cast<ui64>(FromString(quantifier.GetRule_integer2().GetToken1().GetValue())); + return std::tuple{quantity, quantity, true, output, false}; + } + case TRule_row_pattern_quantifier::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + }(); + return BuildPatternFactor(pos, std::move(primary), std::move(quantifier)); } -NYql::NMatchRecognize::TRowPattern TSqlMatchRecognizeClause::ParsePattern(const TRule_row_pattern& node, size_t patternNestingLevel, bool output){ - TVector<NYql::NMatchRecognize::TRowPatternTerm> result; - result.push_back(ParsePatternTerm(node.GetRule_row_pattern_term1(), patternNestingLevel, output)); - for (const auto& term: node.GetBlock2()) - result.push_back(ParsePatternTerm(term.GetRule_row_pattern_term2(), patternNestingLevel, output)); +TNodePtr TSqlMatchRecognizeClause::BuildPatternTerm(TPosition pos, std::vector<TNodePtr> term) { + auto result = BuildList(pos); + for (auto& factor : term) { + if (!factor) { + return {}; + } + result->Add(std::move(factor)); + } + return BuildQuote(pos, std::move(result)); +} + +TNodePtr TSqlMatchRecognizeClause::ParsePatternTerm(TPosition pos, const TRule_row_pattern_term& node, size_t nestingLevel, bool output) { + std::vector<TNodePtr> result; + result.reserve(node.GetBlock1().size()); + for (const auto& factor: node.GetBlock1()) { + result.push_back(ParsePatternFactor(pos, factor.GetRule_row_pattern_factor1(), nestingLevel, output)); + } + return BuildPatternTerm(pos, std::move(result)); +} + +TNodePtr TSqlMatchRecognizeClause::BuildPattern(TPosition pos, std::vector<TNodePtr> pattern) { + const auto result = BuildList(pos, {BuildAtom(pos, "MatchRecognizePattern")}); + for (auto& term: pattern) { + if (!term) { + return {}; + } + result->Add(std::move(term)); + } return result; } -TNamedFunction TSqlMatchRecognizeClause::ParseOneDefinition(const TRule_row_pattern_definition& node){ - const auto& varName = PatternVar(node.GetRule_row_pattern_definition_variable_name1().GetRule_row_pattern_variable_name1(), *this); - TColumnRefScope scope(Ctx, EColumnRefState::MatchRecognize, true, varName); - const auto& searchCondition = TSqlExpression(Ctx, Mode).Build(node.GetRule_row_pattern_definition_search_condition3().GetRule_search_condition1().GetRule_expr1()); - return TNamedFunction{searchCondition, varName}; +TNodePtr TSqlMatchRecognizeClause::ParsePattern(TPosition pos, const TRule_row_pattern& node, size_t nestingLevel, bool output) { + std::vector<TNodePtr> result; + result.reserve(1 + node.GetBlock2().size()); + result.push_back(ParsePatternTerm(pos, node.GetRule_row_pattern_term1(), nestingLevel, output)); + for (const auto& term: node.GetBlock2()) { + result.push_back(ParsePatternTerm(pos, term.GetRule_row_pattern_term2(), nestingLevel, output)); + } + return BuildPattern(pos, std::move(result)); +} + +TMaybe<TNodePtr> TSqlMatchRecognizeClause::ParseSubset(TPosition pos, const TRule_row_pattern_subset_clause* node) { + if (!node) { + return TNodePtr{}; + } + pos = GetPos(node->GetToken1()); + // TODO https://st.yandex-team.ru/YQL-16225 + Ctx.Error(pos) << "SUBSET is not implemented yet"; + return {}; +} + +TNamedFunction TSqlMatchRecognizeClause::ParseOneDefinition(const TRule_row_pattern_definition& node) { + const auto& identifier = node.GetRule_row_pattern_definition_variable_name1().GetRule_row_pattern_variable_name1().GetRule_identifier1(); + auto defineName = Id(identifier, *this); + TColumnRefScope scope(Ctx, EColumnRefState::MatchRecognizeDefine, true, defineName); + const auto& searchCondition = node.GetRule_row_pattern_definition_search_condition3().GetRule_search_condition1().GetRule_expr1(); + auto callable = TSqlExpression(Ctx, Mode).Build(searchCondition); + // Each define must be a predicate lambda, that accepts 3 args: + // - List<input table rows> + // - A struct that maps row pattern variables to ranges in the queue + // - An index of the current row + return {std::move(callable), std::move(defineName)}; } TVector<TNamedFunction> TSqlMatchRecognizeClause::ParseDefinitions(const TRule_row_pattern_definition_list& node) { - TVector<TNamedFunction> result { ParseOneDefinition(node.GetRule_row_pattern_definition1())}; + TVector<TNamedFunction> result{ParseOneDefinition(node.GetRule_row_pattern_definition1())}; for (const auto& d: node.GetBlock2()) { - //Each define must be a predicate lambda, that accepts 3 args: - // - List<input table rows> - // - A struct that maps row pattern variables to ranges in the queue - // - An index of the current row result.push_back(ParseOneDefinition(d.GetRule_row_pattern_definition2())); } return result; } -} //namespace NSQLTranslationV1 +} // namespace NSQLTranslationV1 |