diff options
| author | vitya-smirnov <[email protected]> | 2025-10-01 20:51:41 +0300 |
|---|---|---|
| committer | vitya-smirnov <[email protected]> | 2025-10-01 21:25:50 +0300 |
| commit | f3f7b33a285e94cb3e27aaa4d9b23b543ece4106 (patch) | |
| tree | 89740eeddb803123f1ed3d6b6f7be2bcd865f68c /yql/essentials/sql/v1/sql_expression.cpp | |
| parent | 5cfb01d33937d2b2e038abf3b50e04136dcf7496 (diff) | |
YQL-20307: Support inline subqueries
- Alter grammar to support inline subqueries.
- Support inline subqueries in `sql/v1` (translator).
- Introduce `sql/v1/proto_ast/parse_tree.h` for reusable parse tree predicates.
- Support inline subqueries in `sql/v1/format`.
- Support inline subqueries in `sql/v1/complete`.
- Add some SQL-tests.
- Pass all tests.
commit_hash:075b2240778d071e1c7542f912d3cc83019ef849
Diffstat (limited to 'yql/essentials/sql/v1/sql_expression.cpp')
| -rw-r--r-- | yql/essentials/sql/v1/sql_expression.cpp | 201 |
1 files changed, 161 insertions, 40 deletions
diff --git a/yql/essentials/sql/v1/sql_expression.cpp b/yql/essentials/sql/v1/sql_expression.cpp index 4451e58a9b6..79d7c7c66b7 100644 --- a/yql/essentials/sql/v1/sql_expression.cpp +++ b/yql/essentials/sql/v1/sql_expression.cpp @@ -2,10 +2,12 @@ #include "sql_call_expr.h" #include "sql_select.h" #include "sql_values.h" +#include <yql/essentials/sql/v1/proto_parser/parse_tree.h> #include <yql/essentials/utils/utf8.h> #include <util/charset/wide.h> #include <util/string/ascii.h> #include <util/string/hex.h> +#include <util/generic/scope.h> #include "antlr_token.h" namespace NSQLTranslationV1 { @@ -15,7 +17,7 @@ using NALPDefaultAntlr4::SQLv1Antlr4Lexer; using namespace NSQLv1Generated; -TNodePtr TSqlExpression::Build(const TRule_expr& node) { +TNodePtr TSqlExpression::BuildSourceOrNode(const TRule_expr& node) { // expr: // or_subexpr (OR or_subexpr)* // | type_name_composite @@ -33,6 +35,16 @@ TNodePtr TSqlExpression::Build(const TRule_expr& node) { } } +TNodePtr TSqlExpression::Build(const TRule_expr& node) { + const bool prevIsSourceAllowed = IsSourceAllowed_; + Y_DEFER { + IsSourceAllowed_ = prevIsSourceAllowed; + }; + + IsSourceAllowed_ = false; + return BuildSourceOrNode(node); +} + TNodePtr TSqlExpression::Build(const TRule_lambda_or_parameter& node) { // lambda_or_parameter: // lambda @@ -58,6 +70,24 @@ TNodePtr TSqlExpression::Build(const TRule_lambda_or_parameter& node) { } } +TSourcePtr TSqlExpression::BuildSource(const TRule_select_or_expr& node) { + TNodePtr result = SelectOrExpr(node); + if (!result) { + return nullptr; + } + + if (TSourcePtr source = MoveOutIfSource(result)) { + return source; + } + + Ctx_.Error(result->GetPos()) << "Expected SELECT/PROCESS/REDUCE statement"; + return nullptr; +} + +TNodePtr TSqlExpression::BuildSourceOrNode(const TRule_smart_parenthesis& node) { + return SmartParenthesis(node); +} + TNodePtr TSqlExpression::SubExpr(const TRule_mul_subexpr& node, const TTrailingQuestions& tail) { // mul_subexpr: con_subexpr (DOUBLE_PIPE con_subexpr)*; auto getNode = [](const TRule_mul_subexpr::TBlock2& b) -> const TRule_con_subexpr& { return b.GetRule_con_subexpr2(); }; @@ -1520,40 +1550,20 @@ TMaybe<TExprOrIdent> TSqlExpression::InAtomExpr(const TRule_in_atom_expr& node, break; } case TRule_in_atom_expr::kAltInAtomExpr7: { - Token(node.GetAlt_in_atom_expr7().GetToken1()); - // reset column reference scope (select will reenable it where needed) - TColumnRefScope scope(Ctx_, EColumnRefState::Deny); - TSqlSelect select(Ctx_, Mode_); - TPosition pos; - auto source = select.Build(node.GetAlt_in_atom_expr7().GetRule_select_stmt2(), pos); - if (!source) { - Ctx_.IncrementMonCounter("sql_errors", "BadSource"); - return {}; - } - Ctx_.IncrementMonCounter("sql_features", "InSubquery"); - const auto alias = Ctx_.MakeName("subquerynode"); - const auto ref = Ctx_.MakeName("subquery"); - auto& blocks = Ctx_.GetCurrentBlocks(); - blocks.push_back(BuildSubquery(std::move(source), alias, Mode_ == NSQLTranslation::ESqlMode::SUBQUERY, -1, Ctx_.Scoped)); - blocks.back()->SetLabel(ref); - result.Expr = BuildSubqueryRef(blocks.back(), ref, -1); + result.Expr = ValueConstructor(node.GetAlt_in_atom_expr7().GetRule_value_constructor1()); break; } - case TRule_in_atom_expr::kAltInAtomExpr8: { - result.Expr = ValueConstructor(node.GetAlt_in_atom_expr8().GetRule_value_constructor1()); + case TRule_in_atom_expr::kAltInAtomExpr8: + result.Expr = BitCastRule(node.GetAlt_in_atom_expr8().GetRule_bitcast_expr1()); break; - } case TRule_in_atom_expr::kAltInAtomExpr9: - result.Expr = BitCastRule(node.GetAlt_in_atom_expr9().GetRule_bitcast_expr1()); + result.Expr = ListLiteral(node.GetAlt_in_atom_expr9().GetRule_list_literal1()); break; case TRule_in_atom_expr::kAltInAtomExpr10: - result.Expr = ListLiteral(node.GetAlt_in_atom_expr10().GetRule_list_literal1()); + result.Expr = DictLiteral(node.GetAlt_in_atom_expr10().GetRule_dict_literal1()); break; case TRule_in_atom_expr::kAltInAtomExpr11: - result.Expr = DictLiteral(node.GetAlt_in_atom_expr11().GetRule_dict_literal1()); - break; - case TRule_in_atom_expr::kAltInAtomExpr12: - result.Expr = StructLiteral(node.GetAlt_in_atom_expr12().GetRule_struct_literal1()); + result.Expr = StructLiteral(node.GetAlt_in_atom_expr11().GetRule_struct_literal1()); break; case TRule_in_atom_expr::ALT_NOT_SET: AltNotImplemented("in_atom_expr", node); @@ -2310,25 +2320,119 @@ TNodePtr TSqlExpression::SqlInExpr(const TRule_in_expr& node, const TTrailingQue TSqlExpression expr(Ctx_, Mode_); expr.SetSmartParenthesisMode(TSqlExpression::ESmartParenthesis::InStatement); auto result = expr.UnaryExpr(node.GetRule_in_unary_subexpr1(), tail); + + if (TSourcePtr source = MoveOutIfSource(result)) { + if (IsSubqueryRef(source)) { // Prevent redundant ref to ref + return source; + } + + Ctx_.IncrementMonCounter("sql_features", "InSubquery"); + + const auto alias = Ctx_.MakeName("subquerynode"); + const auto ref = Ctx_.MakeName("subquery"); + + auto& blocks = Ctx_.GetCurrentBlocks(); + blocks.emplace_back(BuildSubquery( + std::move(source), + alias, + /* inSubquery = */ Mode_ == NSQLTranslation::ESqlMode::SUBQUERY, + /* ensureTupleSize = */ -1, + Ctx_.Scoped)); + blocks.back()->SetLabel(ref); + + return BuildSubqueryRef(blocks.back(), ref, /* tupleIndex = */ -1); + } + return result; } -TNodePtr TSqlExpression::SmartParenthesis(const TRule_smart_parenthesis& node) { +bool TSqlExpression::IsTopLevelGroupBy() const { + return MaybeUnnamedSmartParenOnTop_ && + SmartParenthesisMode_ == ESmartParenthesis::GroupBy; +} + +TSourcePtr TSqlExpression::LangVersionedSubSelect(TSourcePtr source) { + if (!source) { + return nullptr; + } + + if (!IsSourceAllowed_ && !IsBackwardCompatibleFeatureAvailable(MakeLangVersion(2025, 04))) { + Ctx_.Error(source->GetPos()) << "Inline subquery is not available before 2025.04"; + return nullptr; + } + + return source; +} + +TNodePtr TSqlExpression::SelectSubExpr(const TRule_select_subexpr& node) { + TNodePtr result; + if (IsOnlySubExpr(node)) { + result = SelectOrExpr(node.GetRule_select_subexpr_intersect1() + .GetRule_select_or_expr1()); + } else { + result = LangVersionedSubSelect(TSqlSelect(Ctx_, Mode_).BuildSubSelect(node)); + } + + if (TSourcePtr source = MoveOutIfSource(result)) { + if (IsSourceAllowed_ || IsSubqueryRef(source)) { + return source; + } + + source->UseAsInner(); + result = BuildSourceNode(source->GetPos(), std::move(source)); + } + + return result; +} + +TNodePtr TSqlExpression::SelectOrExpr(const TRule_select_or_expr& node) { + switch (node.Alt_case()) { + case NSQLv1Generated::TRule_select_or_expr::kAltSelectOrExpr1: { + const auto& select_kind = node.GetAlt_select_or_expr1().GetRule_select_kind_partial1(); + TSourcePtr source = TSqlSelect(Ctx_, Mode_).BuildSubSelect(select_kind); + return LangVersionedSubSelect(std::move(source)); + } + case NSQLv1Generated::TRule_select_or_expr::kAltSelectOrExpr2: + return TupleOrExpr(node.GetAlt_select_or_expr2().GetRule_tuple_or_expr1()); + case NSQLv1Generated::TRule_select_or_expr::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TNodePtr TSqlExpression::TupleOrExpr(const TRule_tuple_or_expr& node) { TVector<TNodePtr> exprs; - Token(node.GetToken1()); const TPosition pos(Ctx_.Pos()); - const bool isTuple = node.HasBlock3(); + + const bool isTuple = node.HasBlock4(); + bool expectTuple = SmartParenthesisMode_ == ESmartParenthesis::InStatement; EExpr mode = EExpr::Regular; if (SmartParenthesisMode_ == ESmartParenthesis::SqlLambdaParams) { mode = EExpr::SqlLambdaParams; expectTuple = true; } - if (node.HasBlock2() && !NamedExprList(node.GetBlock2().GetRule_named_expr_list1(), exprs, mode)) { - return {}; - } - bool topLevelGroupBy = MaybeUnnamedSmartParenOnTop_ && SmartParenthesisMode_ == ESmartParenthesis::GroupBy; + { + const auto& head = node.GetRule_expr1(); + const auto* headName = node.HasBlock2() ? &node.GetBlock2().GetRule_an_id_or_type2() : nullptr; + + bool isDefinitelyTuple = isTuple || expectTuple || !node.GetBlock3().empty(); + if ((!headName && !isDefinitelyTuple) || IsSelect(head)) { + return BuildSourceOrNode(head); + } + + exprs.emplace_back(NamedExpr(head, headName, mode)); + if (!exprs.back()) { + return nullptr; + } + + for (const auto& item : node.GetBlock3()) { + exprs.emplace_back(NamedExpr(item.GetRule_named_expr2(), mode)); + if (!exprs.back()) { + return nullptr; + } + } + } bool hasAliases = false; bool hasUnnamed = false; @@ -2338,19 +2442,16 @@ TNodePtr TSqlExpression::SmartParenthesis(const TRule_smart_parenthesis& node) { } else { hasUnnamed = true; } - if (hasAliases && hasUnnamed && !topLevelGroupBy) { + if (hasAliases && hasUnnamed && !IsTopLevelGroupBy()) { Ctx_.IncrementMonCounter("sql_errors", "AnonymousStructMembers"); Ctx_.Error(pos) << "Structure does not allow anonymous members"; return nullptr; } } - if (exprs.size() == 1 && hasUnnamed && !isTuple && !expectTuple) { - return exprs.back(); - } - if (topLevelGroupBy) { + if (IsTopLevelGroupBy()) { if (isTuple) { Ctx_.IncrementMonCounter("sql_errors", "SimpleTupleInGroupBy"); - Token(node.GetBlock3().GetToken1()); + Token(node.GetBlock4().GetToken1()); Ctx_.Error() << "Unexpected trailing comma in grouping elements list"; return nullptr; } @@ -2361,4 +2462,24 @@ TNodePtr TSqlExpression::SmartParenthesis(const TRule_smart_parenthesis& node) { return (hasUnnamed || expectTuple || exprs.size() == 0) ? BuildTuple(pos, exprs) : BuildStructure(pos, exprs); } +TNodePtr TSqlExpression::EmptyTuple() { + if (IsTopLevelGroupBy()) { + return BuildListOfNamedNodes(Ctx_.Pos(), TVector<TNodePtr>{}); + } + + return BuildTuple(Ctx_.Pos(), TVector<TNodePtr>{}); +} + +TNodePtr TSqlExpression::SmartParenthesis(const TRule_smart_parenthesis& node) { + Token(node.GetToken1()); + switch (node.GetBlock2().GetAltCase()) { + case NSQLv1Generated::TRule_smart_parenthesis_TBlock2::kAlt1: + return SelectSubExpr(node.GetBlock2().GetAlt1().GetRule_select_subexpr1()); + case NSQLv1Generated::TRule_smart_parenthesis_TBlock2::kAlt2: + return EmptyTuple(); + case NSQLv1Generated::TRule_smart_parenthesis_TBlock2::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + } // namespace NSQLTranslationV1 |
