diff options
author | aneporada <aneporada@ydb.tech> | 2022-08-19 12:27:19 +0300 |
---|---|---|
committer | aneporada <aneporada@ydb.tech> | 2022-08-19 12:27:19 +0300 |
commit | a239dbebde59cbb6f194e52f3adbc9db4d70bdeb (patch) | |
tree | 61fdd2ae31893e0fee1931041d014c88d6d98929 | |
parent | 3e1f3244d4da002acabb189676344871bd01ff3c (diff) | |
download | ydb-a239dbebde59cbb6f194e52f3adbc9db4d70bdeb.tar.gz |
[] Fix hadling of tuples in GROUP BY expressions
-rw-r--r-- | ydb/library/yql/sql/v1/sql.cpp | 33 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/sql_ut.cpp | 30 |
2 files changed, 58 insertions, 5 deletions
diff --git a/ydb/library/yql/sql/v1/sql.cpp b/ydb/library/yql/sql/v1/sql.cpp index 1db7f5f41ab..509586f5738 100644 --- a/ydb/library/yql/sql/v1/sql.cpp +++ b/ydb/library/yql/sql/v1/sql.cpp @@ -927,6 +927,10 @@ public: SmartParenthesisMode = mode; } + void MarkAsNamed() { + MaybeUnnamedSmartParenOnTop = false; + } + TMaybe<TExprOrIdent> LiteralExpr(const TRule_literal_value& node); private: struct TTrailingQuestions { @@ -1002,6 +1006,7 @@ private: // // trailing QUESTIONS are used in optional simple types (String?) and optional lambda args: ($x, $y?) -> ($x) // ((double_question neq_subexpr) => double_question neq_subexpr | QUESTION+)?; YQL_ENSURE(tailExternal.Count == 0); + MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && !node.HasBlock3(); TTrailingQuestions tail; if (node.HasBlock3() && node.GetBlock3().Alt_case() == TRule_neq_subexpr::TBlock3::kAlt2) { auto& questions = node.GetBlock3().GetAlt2(); @@ -1079,6 +1084,7 @@ private: TNodePtr SmartParenthesis(const TRule_smart_parenthesis& node); ESmartParenthesis SmartParenthesisMode = ESmartParenthesis::Default; + bool MaybeUnnamedSmartParenOnTop = true; THashMap<TString, TNodePtr> ExprShortcuts; }; @@ -1230,6 +1236,9 @@ TNodePtr TSqlTranslation::NamedExpr(const TRule_named_expr& node, EExpr exprMode } else if (exprMode == EExpr::SqlLambdaParams) { expr.SetSmartParenthesisMode(TSqlExpression::ESmartParenthesis::SqlLambdaParams); } + if (node.HasBlock2()) { + expr.MarkAsNamed(); + } TNodePtr exprNode(expr.Build(node.GetRule_expr1())); if (!exprNode) { Ctx.IncrementMonCounter("sql_errors", "NamedExprInvalid"); @@ -3879,9 +3888,11 @@ TNodePtr TSqlExpression::UnaryExpr(const TUnarySubExprType& node, const TTrailin UnexpectedQuestionToken(tail); return {}; } else { + MaybeUnnamedSmartParenOnTop = false; return JsonApiExpr(node.GetAlt_unary_subexpr2().GetRule_json_api_expr1()); } } else { + MaybeUnnamedSmartParenOnTop = false; if (node.Alt_case() == TRule_in_unary_subexpr::kAltInUnarySubexpr1) { return UnaryCasualExpr(node.GetAlt_in_unary_subexpr1().GetRule_in_unary_casual_subexpr1(), tail); } else if (tail.Count) { @@ -4288,12 +4299,14 @@ TNodePtr TSqlExpression::UnaryCasualExpr(const TUnaryCasualExprRule& node, const const auto& suffix = node.GetRule_unary_subexpr_suffix2(); const bool suffixIsEmpty = suffix.GetBlock1().empty() && !suffix.HasBlock2(); + MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && suffixIsEmpty; TString name; TNodePtr expr; bool typePossible = false; auto& block = node.GetBlock1(); switch (block.Alt_case()) { case TUnaryCasualExprRule::TBlock1::kAlt1: { + MaybeUnnamedSmartParenOnTop = false; auto& alt = block.GetAlt1(); if constexpr (std::is_same_v<TUnaryCasualExprRule, TRule_unary_casual_subexpr>) { name = Id(alt.GetRule_id_expr1(), *this); @@ -4310,6 +4323,7 @@ TNodePtr TSqlExpression::UnaryCasualExpr(const TUnaryCasualExprRule& node, const if constexpr (std::is_same_v<TUnaryCasualExprRule, TRule_unary_casual_subexpr>) { exprOrId = AtomExpr(alt.GetRule_atom_expr1(), suffixIsEmpty ? tail : TTrailingQuestions{}); } else { + MaybeUnnamedSmartParenOnTop = false; exprOrId = InAtomExpr(alt.GetRule_in_atom_expr1(), suffixIsEmpty ? tail : TTrailingQuestions{}); } @@ -4537,6 +4551,7 @@ TNodePtr TSqlExpression::LambdaRule(const TRule_lambda& rule) { return SmartParenthesis(alt.GetRule_smart_parenthesis1()); } + MaybeUnnamedSmartParenOnTop = false; TNodePtr parenthesis; { // we allow column reference here to postpone error and report it with better description in SqlLambdaParams @@ -4724,6 +4739,7 @@ TMaybe<TExprOrIdent> TSqlExpression::AtomExpr(const TRule_atom_expr& node, const UnexpectedQuestionToken(tail); return {}; } + MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && (node.Alt_case() == TRule_atom_expr::kAltAtomExpr3); TExprOrIdent result; switch (node.Alt_case()) { case TRule_atom_expr::kAltAtomExpr1: @@ -5013,6 +5029,7 @@ TNodePtr TSqlExpression::SubExpr(const TRule_con_subexpr& node, const TTrailingQ case TRule_con_subexpr::kAltConSubexpr1: return UnaryExpr(node.GetAlt_con_subexpr1().GetRule_unary_subexpr1(), tail); case TRule_con_subexpr::kAltConSubexpr2: { + MaybeUnnamedSmartParenOnTop = false; Ctx.IncrementMonCounter("sql_features", "UnaryOperation"); TString opName; auto token = node.GetAlt_con_subexpr2().GetRule_unary_op1().GetToken1(); @@ -5040,6 +5057,7 @@ TNodePtr TSqlExpression::SubExpr(const TRule_con_subexpr& node, const TTrailingQ TNodePtr TSqlExpression::SubExpr(const TRule_xor_subexpr& node, const TTrailingQuestions& tail) { // xor_subexpr: eq_subexpr cond_expr?; + MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && !node.HasBlock2(); TNodePtr res(SubExpr(node.GetRule_eq_subexpr1(), node.HasBlock2() ? TTrailingQuestions{} : tail)); if (!res) { return {}; @@ -5332,6 +5350,8 @@ TNodePtr TSqlExpression::BinOper(const TString& opName, const TNode& node, TGetN if (begin == end) { return SubExpr(node, tail); } + // can't have top level smart_parenthesis node if any binary operation is present + MaybeUnnamedSmartParenOnTop = false; Ctx.IncrementMonCounter("sql_binary_operations", opName); const size_t listSize = end - begin; TVector<TNodePtr> nodes; @@ -5345,6 +5365,7 @@ TNodePtr TSqlExpression::BinOper(const TString& opName, const TNode& node, TGetN template <typename TNode, typename TGetNode, typename TIter> TNodePtr TSqlExpression::BinOpList(const TNode& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail) { + MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && (begin == end); TNodePtr partialResult = SubExpr(node, (begin == end) ? tail : TTrailingQuestions{}); while (begin != end) { Ctx.IncrementMonCounter("sql_features", "BinaryOperation"); @@ -5407,6 +5428,7 @@ TNodePtr TSqlExpression::BinOpList(const TNode& node, TGetNode getNode, TIter be template <typename TGetNode, typename TIter> TNodePtr TSqlExpression::BinOpList(const TRule_bit_subexpr& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail) { + MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && (begin == end); TNodePtr partialResult = SubExpr(node, (begin == end) ? tail : TTrailingQuestions{}); while (begin != end) { Ctx.IncrementMonCounter("sql_features", "BinaryOperation"); @@ -5490,6 +5512,7 @@ TNodePtr TSqlExpression::BinOpList(const TRule_bit_subexpr& node, TGetNode getNo template <typename TGetNode, typename TIter> TNodePtr TSqlExpression::BinOpList(const TRule_eq_subexpr& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail) { + MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && (begin == end); TNodePtr partialResult = SubExpr(node, (begin == end) ? tail : TTrailingQuestions{}); while (begin != end) { Ctx.IncrementMonCounter("sql_features", "BinaryOperation"); @@ -5578,6 +5601,8 @@ TNodePtr TSqlExpression::SmartParenthesis(const TRule_smart_parenthesis& node) { return {}; } + bool topLevelGroupBy = MaybeUnnamedSmartParenOnTop && SmartParenthesisMode == ESmartParenthesis::GroupBy; + bool hasAliases = false; bool hasUnnamed = false; for (const auto& expr: exprs) { @@ -5586,7 +5611,7 @@ TNodePtr TSqlExpression::SmartParenthesis(const TRule_smart_parenthesis& node) { } else { hasUnnamed = true; } - if (hasAliases && hasUnnamed && SmartParenthesisMode != ESmartParenthesis::GroupBy) { + if (hasAliases && hasUnnamed && !topLevelGroupBy) { Ctx.IncrementMonCounter("sql_errors", "AnonymousStructMembers"); Ctx.Error(pos) << "Structure does not allow anonymous members"; return nullptr; @@ -5595,11 +5620,11 @@ TNodePtr TSqlExpression::SmartParenthesis(const TRule_smart_parenthesis& node) { if (exprs.size() == 1 && hasUnnamed && !isTuple && !expectTuple) { return exprs.back(); } - if (SmartParenthesisMode == ESmartParenthesis::GroupBy) { - /// \todo support nested tuple\struct + if (topLevelGroupBy) { if (isTuple) { Ctx.IncrementMonCounter("sql_errors", "SimpleTupleInGroupBy"); - Ctx.Error(pos) << "Unable to use tuple in group by clause"; + Token(node.GetBlock3().GetToken1()); + Ctx.Error() << "Unexpected trailing comma in grouping elements list"; return nullptr; } Ctx.IncrementMonCounter("sql_features", "ListOfNamedNode"); diff --git a/ydb/library/yql/sql/v1/sql_ut.cpp b/ydb/library/yql/sql/v1/sql_ut.cpp index dbe336b2616..36153f5016b 100644 --- a/ydb/library/yql/sql/v1/sql_ut.cpp +++ b/ydb/library/yql/sql/v1/sql_ut.cpp @@ -1668,6 +1668,34 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) { UNIT_ASSERT(SqlToYql("select * from plato.T with columns = Struct<a:Int32, b:String>;").IsOk()); UNIT_ASSERT(SqlToYql("select * from plato.T with (format=csv_with_names, schema=(year Int32, month String, day String, a Utf8, b Uint16));").IsOk()); } + + Y_UNIT_TEST(AllowNestedTuplesInGroupBy) { + NYql::TAstParseResult res = SqlToYql("select count(*) from plato.Input group by 1 + (x, y, z);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Aggregate core '('\"group0\")")); + }; + + TWordCountHive elementStat({"Aggregate"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["Aggregate"] == 1); + } + + Y_UNIT_TEST(AllowGroupByWithParens) { + NYql::TAstParseResult res = SqlToYql("select count(*) from plato.Input group by (x, y as alias1, z);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Aggregate core '('\"x\" '\"alias1\" '\"z\")")); + }; + + TWordCountHive elementStat({"Aggregate"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["Aggregate"] == 1); + } } Y_UNIT_TEST_SUITE(ExternalFunction) { @@ -3229,7 +3257,7 @@ select FormatType($f()); Y_UNIT_TEST(ErrGroupBySmartParenAsTuple) { ExpectFailWithError("SELECT * FROM plato.Input GROUP BY (k, v,)", - "<main>:1:36: Error: Unable to use tuple in group by clause\n"); + "<main>:1:41: Error: Unexpected trailing comma in grouping elements list\n"); } Y_UNIT_TEST(HandleNestedSmartParensInGroupBy) { |