aboutsummaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v0/sql.cpp
diff options
context:
space:
mode:
authorvvvv <vvvv@yandex-team.com>2024-11-07 12:29:36 +0300
committervvvv <vvvv@yandex-team.com>2024-11-07 13:49:47 +0300
commitd4c258e9431675bab6745c8638df6e3dfd4dca6b (patch)
treeb5efcfa11351152a4c872fccaea35749141c0b11 /yql/essentials/sql/v0/sql.cpp
parent13a4f274caef5cfdaf0263b24e4d6bdd5521472b (diff)
downloadydb-d4c258e9431675bab6745c8638df6e3dfd4dca6b.tar.gz
Moved other yql/essentials libs YQL-19206
init commit_hash:7d4c435602078407bbf20dd3c32f9c90d2bbcbc0
Diffstat (limited to 'yql/essentials/sql/v0/sql.cpp')
-rw-r--r--yql/essentials/sql/v0/sql.cpp5318
1 files changed, 5318 insertions, 0 deletions
diff --git a/yql/essentials/sql/v0/sql.cpp b/yql/essentials/sql/v0/sql.cpp
new file mode 100644
index 00000000000..caf61bdd0e1
--- /dev/null
+++ b/yql/essentials/sql/v0/sql.cpp
@@ -0,0 +1,5318 @@
+#include "sql.h"
+
+#include "context.h"
+#include "node.h"
+
+#include <yql/essentials/ast/yql_expr.h>
+#include <yql/essentials/parser/proto_ast/collect_issues/collect_issues.h>
+#include <yql/essentials/parser/proto_ast/gen/v0/SQLLexer.h>
+#include <yql/essentials/parser/proto_ast/gen/v0/SQLParser.h>
+#include <yql/essentials/parser/proto_ast/gen/v0_proto_split/SQLParser.pb.main.h>
+#include <yql/essentials/minikql/mkql_program_builder.h>
+#include <yql/essentials/minikql/mkql_type_ops.h>
+#include <yql/essentials/providers/common/provider/yql_provider_names.h>
+#include <yql/essentials/sql/settings/translation_settings.h>
+
+#include <library/cpp/charset/ci_string.h>
+
+#include <google/protobuf/repeated_field.h>
+
+#include <util/generic/array_ref.h>
+#include <util/generic/set.h>
+#include <util/string/ascii.h>
+#include <util/string/cast.h>
+#include <util/string/reverse.h>
+#include <util/string/split.h>
+#include <util/string/hex.h>
+#include <util/string/join.h>
+
+#if defined(_tsan_enabled_)
+#include <util/system/mutex.h>
+#endif
+
+using namespace NYql;
+
+namespace NSQLTranslationV0 {
+
+using NALP::SQLLexerTokens;
+
+#if defined(_tsan_enabled_)
+ TMutex SanitizerSQLTranslationMutex;
+#endif
+
+using namespace NSQLGenerated;
+
+static TPosition GetPos(const TToken& token) {
+ return TPosition(token.GetColumn(), token.GetLine());
+}
+
+template <typename TToken>
+TIdentifier GetIdentifier(TTranslation& ctx, const TToken& node) {
+ auto token = node.GetToken1();
+ return TIdentifier(TPosition(token.GetColumn(), token.GetLine()), ctx.Identifier(token));
+}
+
+inline TIdentifier GetKeywordId(TTranslation& ctx, const TRule_keyword_restricted& node) {
+ switch (node.Alt_case()) {
+ case TRule_keyword_restricted::kAltKeywordRestricted1:
+ return GetIdentifier(ctx, node.GetAlt_keyword_restricted1().GetRule_keyword_compat1());
+ case TRule_keyword_restricted::kAltKeywordRestricted2:
+ return GetIdentifier(ctx, node.GetAlt_keyword_restricted2().GetRule_keyword_expr_uncompat1());
+ case TRule_keyword_restricted::kAltKeywordRestricted3:
+ return GetIdentifier(ctx, node.GetAlt_keyword_restricted3().GetRule_keyword_select_uncompat1());
+ case TRule_keyword_restricted::kAltKeywordRestricted4:
+ return GetIdentifier(ctx, node.GetAlt_keyword_restricted4().GetRule_keyword_in_uncompat1());
+ default:
+ Y_ABORT("You should change implementation according grammar changes");
+ }
+}
+
+inline TIdentifier GetKeywordId(TTranslation& ctx, const TRule_keyword& node) {
+ switch (node.Alt_case()) {
+ case TRule_keyword::kAltKeyword1:
+ return GetKeywordId(ctx, node.GetAlt_keyword1().GetRule_keyword_restricted1());
+ case TRule_keyword::kAltKeyword2:
+ return GetIdentifier(ctx, node.GetAlt_keyword2().GetRule_keyword_alter_uncompat1());
+ case TRule_keyword::kAltKeyword3:
+ return GetIdentifier(ctx, node.GetAlt_keyword3().GetRule_keyword_table_uncompat1());
+ default:
+ Y_ABORT("You should change implementation according grammar changes");
+ }
+}
+
+inline TString GetKeyword(TTranslation& ctx, const TRule_keyword& node) {
+ return GetKeywordId(ctx, node).Name;
+}
+
+inline TString GetKeyword(TTranslation& ctx, const TRule_keyword_restricted& node) {
+ return GetKeywordId(ctx, node).Name;
+}
+
+static TString Id(const TRule_id& node, TTranslation& ctx) {
+ // id: IDENTIFIER | keyword;
+ switch (node.Alt_case()) {
+ case TRule_id::kAltId1:
+ return ctx.Identifier(node.GetAlt_id1().GetToken1());
+ case TRule_id::kAltId2:
+ return GetKeyword(ctx, node.GetAlt_id2().GetRule_keyword1());
+ default:
+ Y_ABORT("You should change implementation according grammar changes");
+ }
+}
+
+static TString Id(const TRule_id_schema& node, TTranslation& ctx) {
+ switch (node.Alt_case()) {
+ case TRule_id_schema::kAltIdSchema1:
+ return ctx.Identifier(node.GetAlt_id_schema1().GetToken1());
+ case TRule_id_schema::kAltIdSchema2:
+ return GetKeyword(ctx, node.GetAlt_id_schema2().GetRule_keyword_restricted1());
+ default:
+ Y_ABORT("You should change implementation according grammar changes");
+ }
+}
+
+static std::pair<bool, TString> Id(const TRule_id_or_at& node, TTranslation& ctx) {
+ bool hasAt = node.HasBlock1();
+ return std::make_pair(hasAt, Id(node.GetRule_id2(), ctx) );
+}
+
+static TString Id(const TRule_id_table& node, TTranslation& ctx) {
+ // id_table: IDENTIFIER | keyword_restricted;
+ switch (node.Alt_case()) {
+ case TRule_id_table::kAltIdTable1:
+ return ctx.Identifier(node.GetAlt_id_table1().GetToken1());
+ case TRule_id_table::kAltIdTable2:
+ return GetKeyword(ctx, node.GetAlt_id_table2().GetRule_keyword_restricted1());
+ default:
+ Y_ABORT("You should change implementation according grammar changes");
+ }
+}
+
+static std::pair<bool, TString> Id(const TRule_id_table_or_at& node, TTranslation& ctx) {
+ // id_table_or_at: AT? id_table;
+ bool hasAt = node.HasBlock1();
+ return std::make_pair(hasAt, Id(node.GetRule_id_table2(), ctx));
+}
+
+static TString Id(const TRule_id_expr& node, TTranslation& ctx) {
+ switch (node.Alt_case()) {
+ case TRule_id_expr::kAltIdExpr1:
+ return ctx.Identifier(node.GetAlt_id_expr1().GetToken1());
+ case TRule_id_expr::kAltIdExpr2:
+ return ctx.Token(node.GetAlt_id_expr2().GetRule_keyword_compat1().GetToken1());
+ case TRule_id_expr::kAltIdExpr3:
+ return ctx.Token(node.GetAlt_id_expr3().GetRule_keyword_alter_uncompat1().GetToken1());
+ case TRule_id_expr::kAltIdExpr4:
+ return ctx.Token(node.GetAlt_id_expr4().GetRule_keyword_in_uncompat1().GetToken1());
+ default:
+ Y_ABORT("You should change implementation according grammar changes");
+ }
+}
+
+static TString Id(const TRule_in_id_expr& node, TTranslation& ctx) {
+ switch (node.Alt_case()) {
+ case TRule_in_id_expr::kAltInIdExpr1:
+ return ctx.Identifier(node.GetAlt_in_id_expr1().GetToken1());
+ case TRule_in_id_expr::kAltInIdExpr2:
+ return ctx.Token(node.GetAlt_in_id_expr2().GetRule_keyword_compat1().GetToken1());
+ case TRule_in_id_expr::kAltInIdExpr3:
+ return ctx.Token(node.GetAlt_in_id_expr3().GetRule_keyword_alter_uncompat1().GetToken1());
+ default:
+ Y_ABORT("You should change implementation according grammar changes");
+ }
+}
+
+static TIdentifier IdEx(const TRule_id& node, TTranslation& ctx) {
+ switch (node.Alt_case()) {
+ case TRule_id::kAltId1:
+ return GetIdentifier(ctx, node.GetAlt_id1());
+ case TRule_id::kAltId2:
+ return GetKeywordId(ctx, node.GetAlt_id2().GetRule_keyword1());
+ default:
+ Y_ABORT("You should change implementation according grammar changes");
+ }
+}
+
+static TString IdOrString(const TRule_id_or_string& node, TTranslation& ctx, bool rawString = false) {
+ switch (node.Alt_case()) {
+ case TRule_id_or_string::kAltIdOrString1: {
+ return ctx.Identifier(node.GetAlt_id_or_string1().GetToken1());
+ }
+ case TRule_id_or_string::kAltIdOrString2: {
+ auto raw = ctx.Token(node.GetAlt_id_or_string2().GetToken1());
+ return rawString ? raw : StringContent(ctx.Context(), raw);
+ }
+ case TRule_id_or_string::kAltIdOrString3:
+ return GetKeyword(ctx, node.GetAlt_id_or_string3().GetRule_keyword1());
+ default:
+ break;
+ }
+ Y_ABORT("You should change implementation according grammar changes");
+}
+
+static TString IdOrStringAsCluster(const TRule_id_or_string& node, TTranslation& ctx) {
+ TString cluster = IdOrString(node, ctx);
+ TString normalizedClusterName;
+ if (!ctx.Context().GetClusterProvider(cluster, normalizedClusterName)) {
+ ctx.Error() << "Unknown cluster: " << cluster;
+ return {};
+ }
+ return normalizedClusterName;
+}
+
+static TString OptIdPrefixAsStr(const TRule_opt_id_prefix& node, TTranslation& ctx, const TString& defaultStr = {}) {
+ if (!node.HasBlock1()) {
+ return defaultStr;
+ }
+ return IdOrString(node.GetBlock1().GetRule_id_or_string1(), ctx);
+}
+
+static TString OptIdPrefixAsClusterStr(const TRule_opt_id_prefix& node, TTranslation& ctx, const TString& defaultStr = {}) {
+ if (!node.HasBlock1()) {
+ return defaultStr;
+ }
+ return IdOrStringAsCluster(node.GetBlock1().GetRule_id_or_string1(), ctx);
+}
+
+static void PureColumnListStr(const TRule_pure_column_list& node, TTranslation& ctx, TVector<TString>& outList) {
+ outList.push_back(IdOrString(node.GetRule_id_or_string2(), ctx));
+ for (auto& block: node.GetBlock3()) {
+ outList.push_back(IdOrString(block.GetRule_id_or_string2(), ctx));
+ }
+}
+
+static TString NamedNodeImpl(const TRule_bind_parameter& node, TTranslation& ctx) {
+ // bind_parameter: DOLLAR id;
+ auto id = Id(node.GetRule_id2(), ctx);
+ return ctx.Token(node.GetToken1()) + id;
+}
+
+static TDeferredAtom PureColumnOrNamed(const TRule_pure_column_or_named& node, TTranslation& ctx) {
+ switch (node.Alt_case()) {
+ case TRule_pure_column_or_named::kAltPureColumnOrNamed1: {
+ auto named = NamedNodeImpl(node.GetAlt_pure_column_or_named1().GetRule_bind_parameter1(), ctx);
+ auto namedNode = ctx.GetNamedNode(named);
+ if (!namedNode) {
+ return {};
+ }
+
+ return TDeferredAtom(namedNode, ctx.Context());
+ }
+
+ case TRule_pure_column_or_named::kAltPureColumnOrNamed2:
+ return TDeferredAtom(ctx.Context().Pos(), IdOrString(node.GetAlt_pure_column_or_named2().GetRule_id_or_string1(), ctx));
+ default:
+ Y_ABORT("You should change implementation according grammar changes");
+ }
+}
+
+static bool PureColumnOrNamedListStr(const TRule_pure_column_or_named_list& node, TTranslation& ctx, TVector<TDeferredAtom>& outList) {
+ outList.push_back(PureColumnOrNamed(node.GetRule_pure_column_or_named2(), ctx));
+ if (outList.back().Empty()) {
+ return false;
+ }
+
+ for (auto& block : node.GetBlock3()) {
+ outList.push_back(PureColumnOrNamed(block.GetRule_pure_column_or_named2(), ctx));
+ if (outList.back().Empty()) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+namespace {
+
+bool IsDistinctOptSet(const TRule_opt_set_quantifier& node) {
+ return node.HasBlock1() && node.GetBlock1().GetToken1().GetId() == SQLLexerTokens::TOKEN_DISTINCT;
+}
+
+std::pair<TString, bool> FlexType(const TRule_flex_type& node, TTranslation& ctx) {
+ switch (node.Alt_case()) {
+ case TRule_flex_type::kAltFlexType1:
+ return std::make_pair(StringContent(ctx.Context(), ctx.Token(node.GetAlt_flex_type1().GetToken1())), true);
+ case TRule_flex_type::kAltFlexType2: {
+ const auto& typeName = node.GetAlt_flex_type2().GetRule_type_name1();
+ const TString& paramOne = typeName.HasBlock2() ? typeName.GetBlock2().GetRule_integer2().GetToken1().GetValue() : TString();
+ const TString& paramTwo = !paramOne.empty() && typeName.GetBlock2().HasBlock3() ? typeName.GetBlock2().GetBlock3().GetRule_integer2().GetToken1().GetValue() : TString();
+ TString stringType = Id(typeName.GetRule_id1(), ctx);
+ if (!paramOne.empty() && !paramTwo.empty()) {
+ TStringStream strm;
+ strm << '(' << paramOne << ',' << paramTwo << ')';
+ stringType += strm.Str();
+ }
+ return std::make_pair(stringType, false);
+ }
+ default:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+}
+
+static TColumnSchema ColumnSchemaImpl(const TRule_column_schema& node, TTranslation& ctx) {
+ const bool nullable = !node.HasBlock3() || !node.GetBlock3().HasBlock1();
+ const TString name(Id(node.GetRule_id_schema1(), ctx));
+ const TPosition pos(ctx.Context().Pos());
+ const auto& type = FlexType(node.GetRule_flex_type2(), ctx);
+ return TColumnSchema(pos, name, type.first, nullable, type.second);
+}
+
+static bool CreateTableEntry(const TRule_create_table_entry& node, TTranslation& ctx,
+ TVector<TColumnSchema>& columns, TVector<TIdentifier>& pkColumns,
+ TVector<TIdentifier>& partitionByColumns, TVector<std::pair<TIdentifier, bool>>& orderByColumns)
+{
+ switch (node.Alt_case()) {
+ case TRule_create_table_entry::kAltCreateTableEntry1:
+ columns.push_back(ColumnSchemaImpl(node.GetAlt_create_table_entry1().GetRule_column_schema1(), ctx));
+ break;
+
+ case TRule_create_table_entry::kAltCreateTableEntry2:
+ {
+ auto& constraint = node.GetAlt_create_table_entry2().GetRule_table_constraint1();
+ switch (constraint.Alt_case()) {
+ case TRule_table_constraint::kAltTableConstraint1: {
+ auto& pkConstraint = constraint.GetAlt_table_constraint1();
+ pkColumns.push_back(IdEx(pkConstraint.GetRule_id4(), ctx));
+ for (auto& block : pkConstraint.GetBlock5()) {
+ pkColumns.push_back(IdEx(block.GetRule_id2(), ctx));
+ }
+ break;
+ }
+ case TRule_table_constraint::kAltTableConstraint2: {
+ auto& pbConstraint = constraint.GetAlt_table_constraint2();
+ partitionByColumns.push_back(IdEx(pbConstraint.GetRule_id4(), ctx));
+ for (auto& block : pbConstraint.GetBlock5()) {
+ partitionByColumns.push_back(IdEx(block.GetRule_id2(), ctx));
+ }
+ break;
+ }
+ case TRule_table_constraint::kAltTableConstraint3: {
+ auto& obConstraint = constraint.GetAlt_table_constraint3();
+ auto extractDirection = [&ctx] (const TRule_column_order_by_specification& spec, bool& desc) {
+ desc = false;
+ if (!spec.HasBlock2()) {
+ return true;
+ }
+
+ auto& token = spec.GetBlock2().GetToken1();
+ switch (token.GetId()) {
+ case SQLLexerTokens::TOKEN_ASC:
+ return true;
+ case SQLLexerTokens::TOKEN_DESC:
+ desc = true;
+ return true;
+ default:
+ ctx.Error() << "Unsupported direction token: " << token.GetId();
+ return false;
+ }
+ };
+
+ bool desc = false;
+ auto& obSpec = obConstraint.GetRule_column_order_by_specification4();
+ if (!extractDirection(obSpec, desc)) {
+ return false;
+ }
+ orderByColumns.push_back(std::make_pair(IdEx(obSpec.GetRule_id1(), ctx), desc));
+
+ for (auto& block : obConstraint.GetBlock5()) {
+ auto& obSpec = block.GetRule_column_order_by_specification2();
+ if (!extractDirection(obSpec, desc)) {
+ return false;
+ }
+ orderByColumns.push_back(std::make_pair(IdEx(obSpec.GetRule_id1(), ctx), desc));
+ }
+ break;
+ }
+ default:
+ ctx.AltNotImplemented("table_constraint", constraint);
+ return false;
+ }
+ break;
+ }
+ default:
+ ctx.AltNotImplemented("create_table_entry", node);
+ return false;
+ }
+ return true;
+}
+
+static std::pair<TString, TString> TableKeyImpl(const std::pair<bool, TString>& nameWithAt, TString view, TTranslation& ctx) {
+ if (nameWithAt.first) {
+ view = "@";
+ ctx.Context().IncrementMonCounter("sql_features", "AnonymousTable");
+ }
+
+ return std::make_pair(nameWithAt.second, view);
+}
+
+static std::pair<TString, TString> TableKeyImpl(const TRule_table_key& node, TTranslation& ctx) {
+ auto nameWithAt(Id(node.GetRule_id_table_or_at1(), ctx));
+ TString view;
+ if (node.HasBlock2()) {
+ view = IdOrString(node.GetBlock2().GetRule_id_or_string2(), ctx);
+ ctx.Context().IncrementMonCounter("sql_features", "View");
+ }
+
+ return TableKeyImpl(nameWithAt, view, ctx);
+}
+
+static TVector<TString> TableHintsImpl(const TRule_table_hints& node, TTranslation& ctx) {
+ TVector<TString> hints;
+ auto& block = node.GetBlock2();
+ switch (block.Alt_case()) {
+ case TRule_table_hints::TBlock2::kAlt1: {
+ hints.push_back(IdOrString(block.GetAlt1().GetRule_id_or_string1(), ctx));
+ break;
+ }
+ case TRule_table_hints::TBlock2::kAlt2: {
+ PureColumnListStr(block.GetAlt2().GetRule_pure_column_list1(), ctx, hints);
+ break;
+ }
+ default:
+ Y_ABORT("You should change implementation according grammar changes");
+ }
+ return hints;
+}
+
+/// \return optional prefix
+static TString ColumnNameAsStr(TTranslation& ctx, const TRule_column_name& node, TString& id) {
+ id = IdOrString(node.GetRule_id_or_string2(), ctx);
+ return OptIdPrefixAsStr(node.GetRule_opt_id_prefix1(), ctx);
+}
+
+static TString ColumnNameAsSingleStr(TTranslation& ctx, const TRule_column_name& node) {
+ TString body;
+ const TString prefix = ColumnNameAsStr(ctx, node, body);
+ return prefix ? prefix + '.' + body : body;
+}
+
+static void FillTargetList(TTranslation& ctx, const TRule_set_target_list& node, TVector<TString>& targetList) {
+ targetList.push_back(ColumnNameAsSingleStr(ctx, node.GetRule_set_target2().GetRule_column_name1()));
+ for (auto& block: node.GetBlock3()) {
+ targetList.push_back(ColumnNameAsSingleStr(ctx, block.GetRule_set_target2().GetRule_column_name1()));
+ }
+}
+
+TVector<TString> GetContextHints(TContext& ctx) {
+ TVector<TString> hints;
+ if (ctx.PragmaInferSchema) {
+ hints.push_back("infer_scheme"); // TODO use yt.InferSchema instead
+ }
+ if (ctx.PragmaDirectRead) {
+ hints.push_back("direct_read");
+ }
+
+ return hints;
+}
+
+static TVector<TString> GetTableFuncHints(TStringBuf funcName) {
+ TCiString func(funcName);
+ if (func.StartsWith("range") || func.StartsWith("like") || func.StartsWith("regexp") || func.StartsWith("filter")
+ || func.StartsWith("each")) {
+
+ return TVector<TString>{"ignore_non_existing"};
+ }
+
+ return {};
+}
+
+
+static TTableRef SimpleTableRefImpl(const TRule_simple_table_ref& node, NSQLTranslation::ESqlMode mode, TTranslation& ctx) {
+ TMaybe<TTableRef> tr;
+ TString cluster;
+ switch (node.GetBlock1().Alt_case()) {
+ case TRule_simple_table_ref_TBlock1::AltCase::kAlt1: {
+ if (node.GetBlock1().GetAlt1().GetBlock1().HasRule_opt_id_prefix1()) {
+ cluster = OptIdPrefixAsClusterStr(node.GetBlock1().GetAlt1().GetBlock1().GetRule_opt_id_prefix1(), ctx, ctx.Context().CurrCluster);
+ if (!cluster && ctx.Context().CurrCluster) {
+ return TTableRef(ctx.Context().MakeName("table"), ctx.Context().CurrCluster, nullptr);
+ }
+ }
+
+ tr.ConstructInPlace(ctx.Context().MakeName("table"), cluster.empty() ? ctx.Context().CurrCluster : cluster, nullptr);
+ auto tableOrAt = Id(node.GetBlock1().GetAlt1().GetBlock1().GetRule_id_or_at2(), ctx);
+ auto tableAndView = TableKeyImpl(tableOrAt, "", ctx);
+ tr->Keys = BuildTableKey(ctx.Context().Pos(), tr->Cluster, TDeferredAtom(ctx.Context().Pos(), tableAndView.first), tableAndView.second);
+ break;
+ }
+ case TRule_simple_table_ref_TBlock1::AltCase::kAlt2: {
+ auto at = node.GetBlock1().GetAlt2().HasBlock1();
+ auto named = ctx.GetNamedNode(NamedNodeImpl(node.GetBlock1().GetAlt2().GetRule_bind_parameter2(), ctx));
+ if (!named) {
+ return TTableRef(ctx.Context().MakeName("table"), ctx.Context().CurrCluster, nullptr);
+ }
+
+ TDeferredAtom table;
+ if (!TryMakeClusterAndTableFromExpression(named, cluster, table, ctx.Context())) {
+ ctx.Error() << "Cannot infer cluster and table name";
+ return TTableRef(ctx.Context().MakeName("table"), ctx.Context().CurrCluster, nullptr);
+ }
+
+ tr.ConstructInPlace(ctx.Context().MakeName("table"), cluster.empty() ? ctx.Context().CurrCluster : cluster, nullptr);
+ tr->Keys = BuildTableKey(ctx.Context().Pos(), tr->Cluster, table, at ? "@" : "");
+ break;
+ }
+ default:
+ Y_ABORT("You should change implementation according grammar changes");
+ }
+
+ if (mode == NSQLTranslation::ESqlMode::LIMITED_VIEW && !cluster.empty()) {
+ ctx.Error() << "Cluster should not be used in limited view";
+ return TTableRef(ctx.Context().MakeName("table"), ctx.Context().CurrCluster, nullptr);
+ }
+
+ if (cluster.empty()) {
+ cluster = ctx.Context().CurrCluster;
+ }
+
+ TVector<TString> hints = GetContextHints(ctx.Context());
+ if (node.HasBlock2()) {
+ hints = TableHintsImpl(node.GetBlock2().GetRule_table_hints1(), ctx);
+ }
+
+ if (!hints.empty()) {
+ tr->Options = BuildInputOptions(ctx.Context().Pos(), hints);
+ }
+
+ return *tr;
+}
+
+static bool ValidateForCounters(const TString& input) {
+ for (auto c : input) {
+ if (!(IsAlnum(c) || c == '_')) {
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool IsColumnsOnly(const TVector<TSortSpecificationPtr>& container) {
+ for (const auto& elem: container) {
+ if (!elem->OrderExpr->GetColumnName()) {
+ return false;
+ }
+ }
+ return true;
+}
+
+class TSqlTranslation: public TTranslation {
+protected:
+ TSqlTranslation(TContext& ctx, NSQLTranslation::ESqlMode mode)
+ : TTranslation(ctx)
+ , Mode(mode)
+ {
+ /// \todo remove NSQLTranslation::ESqlMode params
+ YQL_ENSURE(ctx.Settings.Mode == mode);
+ }
+
+protected:
+ enum class EExpr {
+ Regular,
+ GroupBy,
+ SqlLambdaParams,
+ };
+ TNodePtr NamedExpr(const TRule_named_expr& node, EExpr exprMode = EExpr::Regular);
+ bool NamedExprList(const TRule_named_expr_list& node, TVector<TNodePtr>& exprs, EExpr exprMode = EExpr::Regular);
+ bool BindList(const TRule_bind_parameter_list& node, TVector<TString>& bindNames);
+ bool ModulePath(const TRule_module_path& node, TVector<TString>& path);
+ bool NamedBindList(const TRule_named_bind_parameter_list& node, TVector<TNodePtr>& bindNames);
+ TNodePtr NamedBindParam(const TRule_named_bind_parameter& node);
+ TNodePtr NamedNode(const TRule_named_nodes_stmt& rule, TVector<TString>& names);
+
+ bool ImportStatement(const TRule_import_stmt& stmt, TVector<TString>* namesPtr = nullptr);
+ TNodePtr DoStatement(const TRule_do_stmt& stmt, bool makeLambda, const TVector<TString>& args = {});
+ bool DefineActionOrSubqueryStatement(const TRule_define_action_or_subquery_stmt& stmt);
+ TNodePtr EvaluateIfStatement(const TRule_evaluate_if_stmt& stmt);
+ TNodePtr EvaluateForStatement(const TRule_evaluate_for_stmt& stmt);
+ TMaybe<TTableArg> TableArgImpl(const TRule_table_arg& node);
+ TTableRef TableRefImpl(const TRule_table_ref& node);
+ TMaybe<TSourcePtr> AsTableImpl(const TRule_table_ref& node);
+
+ NSQLTranslation::ESqlMode Mode;
+};
+
+class TSqlExpression: public TSqlTranslation {
+public:
+ enum class ESmartParenthesis {
+ Default,
+ GroupBy,
+ InStatement,
+ SqlLambdaParams,
+ };
+
+ TSqlExpression(TContext& ctx, NSQLTranslation::ESqlMode mode)
+ : TSqlTranslation(ctx, mode)
+ {
+ }
+
+ TNodePtr Build(const TRule_expr& node) {
+ // expr: or_subexpr (OR or_subexpr)*;
+ auto getNode = [](const TRule_expr::TBlock2& b) -> const TRule_or_subexpr& { return b.GetRule_or_subexpr2(); };
+ return BinOper("Or", node.GetRule_or_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end());
+ }
+
+ TNodePtr WrapExprShortcuts(const TNodePtr& node) {
+ return node;
+ //if (!ExprShortcuts || !node) {
+ //return node;
+ //}
+ //TIntrusivePtr<TAstListNodeImpl> prepareNode = new TAstListNodeImpl(Ctx.Pos());
+ //for (const auto& aliasPair: ExprShortcuts) {
+ //prepareNode->Add(prepareNode->Y("let", aliasPair.first, aliasPair.second));
+ //}
+ //prepareNode->Add(prepareNode->Y("return", node));
+ //return prepareNode->Y("block", prepareNode->Q(prepareNode));
+ }
+
+ void SetSmartParenthesisMode(ESmartParenthesis mode) {
+ SmartParenthesisMode = mode;
+ }
+
+ TNodePtr ExprShortcut(const TString& baseName, const TNodePtr& node) {
+ return BuildShortcutNode(node, baseName);
+ //auto alias = Ctx.MakeName(baseName);
+ //ExprShortcuts.emplace(alias, node);
+ //return BuildAtom(node->GetPos(), alias, TNodeFlags::Default);
+ }
+
+ TNodePtr LiteralExpr(const TRule_literal_value& node);
+private:
+ template<typename TWindowFunctionRule>
+ TNodePtr WindowFunctionRule(const TWindowFunctionRule& rule);
+ TNodePtr BindParameterRule(const TRule_bind_parameter& rule);
+ TNodePtr LambdaRule(const TRule_lambda& rule);
+ TNodePtr CastRule(const TRule_cast_expr& rule);
+ TNodePtr BitCastRule(const TRule_bitcast_expr& rule);
+ TNodePtr ExistsRule(const TRule_exists_expr& rule);
+ TNodePtr CaseRule(const TRule_case_expr& rule);
+
+ TNodePtr AtomExpr(const TRule_atom_expr& node);
+ TNodePtr InAtomExpr(const TRule_in_atom_expr& node);
+
+ template<typename TUnarySubExprRule>
+ TNodePtr UnaryExpr(const TUnarySubExprRule& node);
+
+ bool SqlLambdaParams(const TNodePtr& node, TVector<TString>& args);
+ bool SqlLambdaExprBody(TContext& ctx, const TRule_lambda_body& node, TVector<TNodePtr>& exprSeq);
+
+ TNodePtr KeyExpr(const TRule_key_expr& node) {
+ TSqlExpression expr(Ctx, Mode);
+ return expr.WrapExprShortcuts(expr.Build(node.GetRule_expr2()));
+ }
+
+ TNodePtr SubExpr(const TRule_con_subexpr& node);
+ TNodePtr SubExpr(const TRule_xor_subexpr& node);
+
+ TNodePtr SubExpr(const TRule_mul_subexpr& node) {
+ // mul_subexpr: con_subexpr (DOUBLE_PIPE con_subexpr)*;
+ auto getNode = [](const TRule_mul_subexpr::TBlock2& b) -> const TRule_con_subexpr& { return b.GetRule_con_subexpr2(); };
+ return BinOper("Concat", node.GetRule_con_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end());
+ }
+
+ TNodePtr SubExpr(const TRule_add_subexpr& node) {
+ // add_subexpr: mul_subexpr ((ASTERISK | SLASH | PERCENT) mul_subexpr)*;
+ auto getNode = [](const TRule_add_subexpr::TBlock2& b) -> const TRule_mul_subexpr& { return b.GetRule_mul_subexpr2(); };
+ return BinOpList(node.GetRule_mul_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end());
+ }
+
+ TNodePtr SubExpr(const TRule_bit_subexpr& node) {
+ // bit_subexpr: add_subexpr ((PLUS | MINUS) add_subexpr)*;
+ auto getNode = [](const TRule_bit_subexpr::TBlock2& b) -> const TRule_add_subexpr& { return b.GetRule_add_subexpr2(); };
+ return BinOpList(node.GetRule_add_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end());
+ }
+
+ TNodePtr SubExpr(const TRule_neq_subexpr& node) {
+ // neq_subexpr: bit_subexpr ((SHIFT_LEFT | SHIFT_RIGHT | ROT_LEFT | ROT_RIGHT | AMPERSAND | PIPE | CARET) bit_subexpr)* (DOUBLE_QUESTION neq_subexpr)?;
+ auto getNode = [](const TRule_neq_subexpr::TBlock2& b) -> const TRule_bit_subexpr& { return b.GetRule_bit_subexpr2(); };
+ auto result = BinOpList(node.GetRule_bit_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end());
+ if (!result) {
+ return nullptr;
+ }
+ if (node.HasBlock3()) {
+ auto block = node.GetBlock3();
+ TSqlExpression altExpr(Ctx, Mode);
+ auto altResult = altExpr.WrapExprShortcuts(SubExpr(block.GetRule_neq_subexpr2()));
+ if (!altResult) {
+ return nullptr;
+ }
+ const TVector<TNodePtr> args({result, altResult});
+ Token(block.GetToken1());
+ result = BuildBuiltinFunc(Ctx, Ctx.Pos(), "Coalesce", args);
+ }
+ return result;
+ }
+
+ TNodePtr SubExpr(const TRule_eq_subexpr& node) {
+ // eq_subexpr: neq_subexpr ((LESS | LESS_OR_EQ | GREATER | GREATER_OR_EQ) neq_subexpr)*;
+ auto getNode = [](const TRule_eq_subexpr::TBlock2& b) -> const TRule_neq_subexpr& { return b.GetRule_neq_subexpr2(); };
+ return BinOpList(node.GetRule_neq_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end());
+ }
+
+ TNodePtr SubExpr(const TRule_or_subexpr& node) {
+ // or_subexpr: and_subexpr (AND and_subexpr)*;
+ auto getNode = [](const TRule_or_subexpr::TBlock2& b) -> const TRule_and_subexpr& { return b.GetRule_and_subexpr2(); };
+ return BinOper("And", node.GetRule_and_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end());
+ }
+
+ TNodePtr SubExpr(const TRule_and_subexpr& node) {
+ // and_subexpr: xor_subexpr (XOR xor_subexpr)*;
+ auto getNode = [](const TRule_and_subexpr::TBlock2& b) -> const TRule_xor_subexpr& { return b.GetRule_xor_subexpr2(); };
+ return BinOper("Xor", node.GetRule_xor_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end());
+ }
+
+ template <typename TNode, typename TGetNode, typename TIter>
+ TNodePtr BinOpList(const TNode& node, TGetNode getNode, TIter begin, TIter end);
+
+ TNodePtr BinOperList(const TString& opName, TVector<TNodePtr>::const_iterator begin, TVector<TNodePtr>::const_iterator end) const;
+
+ template <typename TNode, typename TGetNode, typename TIter>
+ TNodePtr BinOper(const TString& operName, const TNode& node, TGetNode getNode, TIter begin, TIter end);
+
+ TNodePtr SqlInExpr(const TRule_in_expr& node);
+ TNodePtr SmartParenthesis(const TRule_smart_parenthesis& node);
+
+ ESmartParenthesis SmartParenthesisMode = ESmartParenthesis::Default;
+
+ THashMap<TString, TNodePtr> ExprShortcuts;
+};
+
+class TSqlCallExpr: public TSqlTranslation {
+public:
+ TSqlCallExpr(TContext& ctx, NSQLTranslation::ESqlMode mode, TSqlExpression* usedExpr = nullptr)
+ : TSqlTranslation(ctx, mode)
+ , UsedExpr(usedExpr)
+ {
+ }
+
+ TSqlCallExpr(const TSqlCallExpr& call, const TVector<TNodePtr>& args)
+ : TSqlTranslation(call.Ctx, call.Mode)
+ , Pos(call.Pos)
+ , Func(call.Func)
+ , Module(call.Module)
+ , Node(call.Node)
+ , Args(args)
+ , AggMode(call.AggMode)
+ {
+ }
+
+ template<typename TCallExprRule>
+ bool Init(const TCallExprRule& node);
+ void IncCounters();
+
+ TNodePtr BuildUdf(bool withArgsType) {
+ auto result = Node ? Node : BuildCallable(Pos, Module, Func, withArgsType ? Args : TVector<TNodePtr>());
+ if (to_lower(Module) == "tensorflow" && Func == "RunBatch") {
+ Args.erase(Args.begin() + 2);
+ }
+ return result;
+ }
+
+ TNodePtr BuildCall() {
+ TVector<TNodePtr> args;
+ if (Node) {
+ Module = "YQL";
+ Func = NamedArgs.empty() ? "Apply" : "NamedApply";
+ args.push_back(Node);
+ }
+ bool mustUseNamed = !NamedArgs.empty();
+ if (mustUseNamed) {
+ if (Node) {
+ mustUseNamed = false;
+ }
+ args.emplace_back(BuildTuple(Pos, PositionalArgs));
+ args.emplace_back(BuildStructure(Pos, NamedArgs));
+ } else {
+ args.insert(args.end(), Args.begin(), Args.end());
+ }
+ TFuncPrepareNameNode funcPrepareNameNode;
+ if (UsedExpr) {
+ funcPrepareNameNode = [this](const TString& baseName, const TNodePtr& node) {
+ return UsedExpr->ExprShortcut(baseName, node);
+ };
+ }
+ auto result = BuildBuiltinFunc(Ctx, Pos, Func, args, Module, AggMode, &mustUseNamed, funcPrepareNameNode);
+ if (mustUseNamed) {
+ Error() << "Named args are used for call, but unsupported by function: " << Func;
+ return nullptr;
+ }
+ return result;
+ }
+
+ TPosition GetPos() const {
+ return Pos;
+ }
+
+ const TVector<TNodePtr>& GetArgs() const {
+ return Args;
+ }
+
+ bool EnsureNotDistinct(const TString& request) const {
+ if (AggMode == EAggregateMode::Distinct) {
+ Ctx.Error() << request << " does not allow DISTINCT arguments";
+ return false;
+ }
+ return true;
+ }
+
+ void SetOverWindow() {
+ YQL_ENSURE(AggMode == EAggregateMode::Normal);
+ AggMode = EAggregateMode::OverWindow;
+ }
+
+ void SetIgnoreNulls() {
+ Func += "_IgnoreNulls";
+ }
+
+private:
+ TPosition Pos;
+ TString Func;
+ TString Module;
+ TNodePtr Node;
+ TVector<TNodePtr> Args;
+ TVector<TNodePtr> PositionalArgs;
+ TVector<TNodePtr> NamedArgs;
+ EAggregateMode AggMode = EAggregateMode::Normal;
+ TSqlExpression* UsedExpr = nullptr;
+};
+
+TNodePtr TSqlTranslation::NamedExpr(const TRule_named_expr& node, EExpr exprMode) {
+ TSqlExpression expr(Ctx, Mode);
+ if (exprMode == EExpr::GroupBy) {
+ expr.SetSmartParenthesisMode(TSqlExpression::ESmartParenthesis::GroupBy);
+ } else if (exprMode == EExpr::SqlLambdaParams) {
+ expr.SetSmartParenthesisMode(TSqlExpression::ESmartParenthesis::SqlLambdaParams);
+ }
+ TNodePtr exprNode(expr.WrapExprShortcuts(expr.Build(node.GetRule_expr1())));
+ if (!exprNode) {
+ Ctx.IncrementMonCounter("sql_errors", "NamedExprInvalid");
+ return nullptr;
+ }
+ if (node.HasBlock2()) {
+ exprNode = SafeClone(exprNode);
+ exprNode->SetLabel(IdOrString(node.GetBlock2().GetRule_id_or_string2(), *this));
+ }
+ return exprNode;
+}
+
+bool TSqlTranslation::NamedExprList(const TRule_named_expr_list& node, TVector<TNodePtr>& exprs, EExpr exprMode) {
+ exprs.emplace_back(NamedExpr(node.GetRule_named_expr1(), exprMode));
+ if (!exprs.back()) {
+ return false;
+ }
+ for (auto& b: node.GetBlock2()) {
+ exprs.emplace_back(NamedExpr(b.GetRule_named_expr2(), exprMode));
+ if (!exprs.back()) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool TSqlTranslation::BindList(const TRule_bind_parameter_list& node, TVector<TString>& bindNames) {
+ bindNames.emplace_back(NamedNodeImpl(node.GetRule_bind_parameter1(), *this));
+ for (auto& b: node.GetBlock2()) {
+ bindNames.emplace_back(NamedNodeImpl(b.GetRule_bind_parameter2(), *this));
+ }
+ return true;
+}
+
+bool TSqlTranslation::ModulePath(const TRule_module_path& node, TVector<TString>& path) {
+ if (node.HasBlock1()) {
+ path.emplace_back(TString());
+ }
+ path.emplace_back(Id(node.GetRule_id2(), *this));
+ for (auto& b: node.GetBlock3()) {
+ path.emplace_back(Id(b.GetRule_id2(), *this));
+ }
+ return true;
+}
+
+bool TSqlTranslation::NamedBindList(const TRule_named_bind_parameter_list& node, TVector<TNodePtr>& bindNames) {
+ bindNames.emplace_back(NamedBindParam(node.GetRule_named_bind_parameter1()));
+ for (auto& b: node.GetBlock2()) {
+ bindNames.emplace_back(NamedBindParam(b.GetRule_named_bind_parameter2()));
+ }
+ return std::find(bindNames.begin(), bindNames.end(), nullptr) == bindNames.end();
+}
+
+TNodePtr TSqlTranslation::NamedBindParam(const TRule_named_bind_parameter& node) {
+ auto bindName = NamedNodeImpl(node.GetRule_bind_parameter1(), *this);
+ auto result = BuildAtom(Ctx.Pos(), bindName, NYql::TNodeFlags::Default);
+ if (node.HasBlock2()) {
+ result->SetLabel(NamedNodeImpl(node.GetBlock2().GetRule_bind_parameter2(), *this));
+ }
+ return result;
+}
+
+TMaybe<TTableArg> TSqlTranslation::TableArgImpl(const TRule_table_arg& node) {
+ TTableArg ret;
+ ret.HasAt = node.HasBlock1();
+ TSqlExpression expr(Ctx, Mode);
+ ret.Expr = expr.Build(node.GetRule_expr2());
+ if (!ret.Expr) {
+ return Nothing();
+ }
+
+ if (node.HasBlock3()) {
+ ret.View = IdOrString(node.GetBlock3().GetRule_id_or_string2(), *this);
+ Context().IncrementMonCounter("sql_features", "View");
+ }
+
+ return ret;
+}
+
+TTableRef TSqlTranslation::TableRefImpl(const TRule_table_ref& node) {
+ if (Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW && node.GetRule_opt_id_prefix1().HasBlock1()) {
+ Ctx.Error() << "Cluster should not be used in limited view";
+ return TTableRef(Ctx.MakeName("table"), Ctx.CurrCluster, nullptr);
+ }
+ auto cluster = OptIdPrefixAsClusterStr(node.GetRule_opt_id_prefix1(), *this, Context().CurrCluster);
+ if (!cluster) {
+ return TTableRef(Ctx.MakeName("table"), Ctx.CurrCluster, nullptr);
+ }
+
+ TTableRef tr(Context().MakeName("table"), cluster, nullptr);
+ TPosition pos(Context().Pos());
+ TVector<TString> tableHints;
+ auto& block = node.GetBlock2();
+ switch (block.Alt_case()) {
+ case TRule_table_ref::TBlock2::kAlt1: {
+ auto pair = TableKeyImpl(block.GetAlt1().GetRule_table_key1(), *this);
+ tr.Keys = BuildTableKey(pos, cluster, TDeferredAtom(pos, pair.first), pair.second);
+ break;
+ }
+ case TRule_table_ref::TBlock2::kAlt2: {
+ auto& alt = block.GetAlt2();
+ const TString func(Id(alt.GetRule_id_expr1(), *this));
+ auto arg = TableArgImpl(alt.GetRule_table_arg3());
+ if (!arg) {
+ return TTableRef(Ctx.MakeName("table"), Ctx.CurrCluster, nullptr);
+ }
+ TVector<TTableArg> args(1, *arg);
+ for (auto& b : alt.GetBlock4()) {
+ arg = TableArgImpl(b.GetRule_table_arg2());
+ if (!arg) {
+ return TTableRef(Ctx.MakeName("table"), Ctx.CurrCluster, nullptr);
+ }
+
+ args.push_back(*arg);
+ }
+ tableHints = GetTableFuncHints(func);
+ tr.Keys = BuildTableKeys(pos, cluster, func, args);
+ break;
+ }
+ default:
+ Y_ABORT("You should change implementation according grammar changes");
+ }
+ TVector<TString> hints = GetContextHints(Ctx);
+ if (node.HasBlock3()) {
+ hints = TableHintsImpl(node.GetBlock3().GetRule_table_hints1(), *this);
+ }
+
+ hints.insert(hints.end(), tableHints.begin(), tableHints.end());
+
+ if (!hints.empty()) {
+ tr.Options = BuildInputOptions(pos, hints);
+ }
+
+ return tr;
+}
+
+TMaybe<TSourcePtr> TSqlTranslation::AsTableImpl(const TRule_table_ref& node) {
+ const auto& block = node.GetBlock2();
+
+ if (block.Alt_case() == TRule_table_ref::TBlock2::kAlt2) {
+ auto& alt = block.GetAlt2();
+ TCiString func(Id(alt.GetRule_id_expr1(), *this));
+
+ if (func == "as_table") {
+ if (node.GetRule_opt_id_prefix1().HasBlock1()) {
+ Ctx.Error() << "Cluster shouldn't be specified for AS_TABLE source";
+ return TMaybe<TSourcePtr>(nullptr);
+ }
+
+ if (!alt.GetBlock4().empty()) {
+ Ctx.Error() << "Expected single argument for AS_TABLE source";
+ return TMaybe<TSourcePtr>(nullptr);
+ }
+
+ if (node.HasBlock3()) {
+ Ctx.Error() << "No hints expected for AS_TABLE source";
+ return TMaybe<TSourcePtr>(nullptr);
+ }
+
+ auto arg = TableArgImpl(alt.GetRule_table_arg3());
+ if (!arg) {
+ return TMaybe<TSourcePtr>(nullptr);
+ }
+
+ if (arg->Expr->GetSource()) {
+ Ctx.Error() << "AS_TABLE shouldn't be used for table sources";
+ return TMaybe<TSourcePtr>(nullptr);
+ }
+
+ return BuildNodeSource(Ctx.Pos(), arg->Expr);
+ }
+ }
+
+ return Nothing();
+}
+
+bool Expr(TSqlExpression& sqlExpr, TVector<TNodePtr>& exprNodes, const TRule_expr& node) {
+ TNodePtr exprNode = sqlExpr.Build(node);
+ if (!exprNode) {
+ return false;
+ }
+ exprNodes.push_back(exprNode);
+ return true;
+}
+
+bool ExprList(TSqlExpression& sqlExpr, TVector<TNodePtr>& exprNodes, const TRule_expr_list& node) {
+ if (!Expr(sqlExpr, exprNodes, node.GetRule_expr1())) {
+ return false;
+ }
+ for (auto b: node.GetBlock2()) {
+ sqlExpr.Token(b.GetToken1());
+ if (!Expr(sqlExpr, exprNodes, b.GetRule_expr2())) {
+ return false;
+ }
+ }
+ return true;
+}
+
+template<typename TCallExprRule>
+bool TSqlCallExpr::Init(const TCallExprRule& node) {
+ // call_expr: ((id_or_string NAMESPACE id_or_string) | id_expr | bind_parameter) LPAREN (opt_set_quantifier named_expr_list COMMA? | ASTERISK)? RPAREN;
+ // OR
+ // in_call_expr: ((id_or_string NAMESPACE id_or_string) | in_id_expr | bind_parameter) LPAREN (opt_set_quantifier named_expr_list COMMA? | ASTERISK)? RPAREN;
+ const auto& block = node.GetBlock1();
+ switch (block.Alt_case()) {
+ case TCallExprRule::TBlock1::kAlt1: {
+ auto& subblock = block.GetAlt1().GetBlock1();
+ Module = IdOrString(subblock.GetRule_id_or_string1(), *this);
+ Func = IdOrString(subblock.GetRule_id_or_string3(), *this);
+ break;
+ }
+ case TCallExprRule::TBlock1::kAlt2: {
+ if constexpr (std::is_same_v<TCallExprRule, TRule_call_expr>) {
+ Func = Id(block.GetAlt2().GetRule_id_expr1(), *this);
+ } else {
+ Func = Id(block.GetAlt2().GetRule_in_id_expr1(), *this);
+ }
+ break;
+ }
+ case TCallExprRule::TBlock1::kAlt3:
+ Node = GetNamedNode(NamedNodeImpl(block.GetAlt3().GetRule_bind_parameter1(), *this));
+ if (!Node) {
+ return false;
+ }
+ break;
+ default:
+ Y_ABORT("You should change implementation according grammar changes");
+ }
+ Pos = Ctx.Pos();
+ if (node.HasBlock3()) {
+ switch (node.GetBlock3().Alt_case()) {
+ case TCallExprRule::TBlock3::kAlt1: {
+ const auto& alt = node.GetBlock3().GetAlt1();
+ if (IsDistinctOptSet(alt.GetRule_opt_set_quantifier1())) {
+ YQL_ENSURE(AggMode == EAggregateMode::Normal);
+ AggMode = EAggregateMode::Distinct;
+ Ctx.IncrementMonCounter("sql_features", "DistinctInCallExpr");
+ }
+ if (!NamedExprList(alt.GetRule_named_expr_list2(), Args)) {
+ return false;
+ }
+ for (const auto& arg: Args) {
+ if (arg->GetLabel()) {
+ NamedArgs.push_back(arg);
+ } else {
+ PositionalArgs.push_back(arg);
+ if (!NamedArgs.empty()) {
+ Ctx.Error(arg->GetPos()) << "Unnamed arguments can not follow after named one";
+ return false;
+ }
+ }
+ }
+ break;
+ }
+ case TCallExprRule::TBlock3::kAlt2:
+ Args.push_back(BuildColumn(Pos, "*"));
+ break;
+ default:
+ Y_ABORT("You should change implementation according grammar changes");
+ }
+ }
+ return true;
+}
+
+void TSqlCallExpr::IncCounters() {
+ if (Node) {
+ Ctx.IncrementMonCounter("sql_features", "NamedNodeUseApply");
+ } else if (!Module.empty()) {
+ if (ValidateForCounters(Module)) {
+ Ctx.IncrementMonCounter("udf_modules", Module);
+ Ctx.IncrementMonCounter("sql_features", "CallUdf");
+ if (ValidateForCounters(Func)) {
+ auto scriptType = NKikimr::NMiniKQL::ScriptTypeFromStr(Module);
+ if (scriptType == NKikimr::NMiniKQL::EScriptType::Unknown) {
+ Ctx.IncrementMonCounter("udf_functions", Module + "." + Func);
+ }
+ }
+ }
+ } else if (ValidateForCounters(Func)) {
+ Ctx.IncrementMonCounter("sql_builtins", Func);
+ Ctx.IncrementMonCounter("sql_features", "CallBuiltin");
+ }
+}
+
+class TSqlSelect: public TSqlTranslation {
+public:
+ TSqlSelect(TContext& ctx, NSQLTranslation::ESqlMode mode)
+ : TSqlTranslation(ctx, mode)
+ {
+ }
+
+ TSourcePtr Build(const TRule_select_stmt& node, TPosition& selectPos);
+
+private:
+ bool SelectTerm(TVector<TNodePtr>& terms, const TRule_result_column& node);
+ bool ValidateSelectColumns(const TVector<TNodePtr>& terms);
+ bool ColumnName(TVector<TNodePtr>& keys, const TRule_column_name& node);
+ bool ColumnList(TVector<TNodePtr>& keys, const TRule_column_list& node);
+ bool NamedColumn(TVector<TNodePtr>& columnList, const TRule_named_column& node);
+ bool NamedColumnList(TVector<TNodePtr>& columnList, const TRule_named_column_list& node);
+ bool SortSpecification(const TRule_sort_specification& node, TVector<TSortSpecificationPtr>& sortSpecs);
+ bool SortSpecificationList(const TRule_sort_specification_list& node, TVector<TSortSpecificationPtr>& sortSpecs);
+ TSourcePtr SingleSource(const TRule_single_source& node);
+ TSourcePtr NamedSingleSource(const TRule_named_single_source& node);
+ TVector<TNodePtr> OrdinaryNamedColumnList(const TRule_ordinary_named_column_list& node);
+ TSourcePtr FlattenSource(const TRule_flatten_source& node);
+ TSourcePtr JoinSource(const TRule_join_source& node);
+ bool JoinOp(ISource* join, const TRule_join_source::TBlock2& block);
+ TNodePtr JoinExpr(ISource*, const TRule_join_constraint& node);
+ TSourcePtr ProcessCore(const TRule_process_core& node, const TWriteSettings& settings, TPosition& selectPos);
+ TSourcePtr ReduceCore(const TRule_reduce_core& node, const TWriteSettings& settings, TPosition& selectPos);
+ TSourcePtr SelectCore(const TRule_select_core& node, const TWriteSettings& settings, TPosition& selectPos);
+ bool FrameStart(const TRule_window_frame_start& rule, TNodePtr& node, bool beginBound);
+ bool FrameBound(const TRule_window_frame_bound& rule, TNodePtr& node, bool beginBound);
+ bool FrameClause(const TRule_window_frame_clause& node, TMaybe<TFrameSpecification>& winSpecPtr);
+ TWindowSpecificationPtr WindowSpecification(const TRule_window_specification_details& rule);
+ bool WindowDefenition(const TRule_window_definition& node, TWinSpecs& winSpecs);
+ bool WindowClause(const TRule_window_clause& node, TWinSpecs& winSpecs);
+ bool OrderByClause(const TRule_order_by_clause& node, TVector<TSortSpecificationPtr>& orderBy);
+ TSourcePtr SelectKind(const TRule_select_kind& node, TPosition& selectPos);
+ TSourcePtr SelectKind(const TRule_select_kind_partial& node, TPosition& selectPos);
+ TSourcePtr SelectKind(const TRule_select_kind_parenthesis& node, TPosition& selectPos);
+};
+
+class TGroupByClause: public TSqlTranslation {
+ enum class EGroupByFeatures {
+ Begin,
+ Ordinary = Begin,
+ Expression,
+ Rollup,
+ Cube,
+ GroupingSet,
+ Empty,
+ End,
+ };
+ typedef TEnumBitSet<EGroupByFeatures, static_cast<int>(EGroupByFeatures::Begin), static_cast<int>(EGroupByFeatures::End)> TGroupingSetFeatures;
+
+ class TGroupByClauseCtx: public TSimpleRefCount<TGroupByClauseCtx> {
+ public:
+ typedef TIntrusivePtr<TGroupByClauseCtx> TPtr;
+
+ TGroupingSetFeatures GroupFeatures;
+ TMap<TString, TNodePtr> NodeAliases;
+ size_t UnnamedCount = 0;
+ };
+
+public:
+ TGroupByClause(TContext& ctx, NSQLTranslation::ESqlMode mode, TGroupByClauseCtx::TPtr groupSetContext = {})
+ : TSqlTranslation(ctx, mode)
+ , GroupSetContext(groupSetContext ? groupSetContext : TGroupByClauseCtx::TPtr(new TGroupByClauseCtx()))
+ {}
+
+ bool Build(const TRule_group_by_clause& node, bool stream);
+ bool ParseList(const TRule_grouping_element_list& groupingListNode);
+
+ void SetFeatures(const TString& field) const;
+ TVector<TNodePtr>& Content();
+ TMap<TString, TNodePtr>& Aliases();
+ THoppingWindowSpecPtr GetHoppingWindow();
+
+private:
+ TVector<TNodePtr> MultiplyGroupingSets(const TVector<TNodePtr>& lhs, const TVector<TNodePtr>& rhs) const;
+ void ResolveGroupByAndGrouping();
+ bool GroupingElement(const TRule_grouping_element& node);
+ void FeedCollection(const TNodePtr& elem, TVector<TNodePtr>& collection, bool& hasEmpty) const;
+ bool OrdinaryGroupingSet(const TRule_ordinary_grouping_set& node);
+ bool OrdinaryGroupingSetList(const TRule_ordinary_grouping_set_list& node);
+ bool HoppingWindow(const TRule_hopping_window_specification& node);
+
+ bool IsNodeColumnsOrNamedExpression(const TVector<TNodePtr>& content, const TString& construction) const;
+
+ TGroupingSetFeatures& Features();
+ const TGroupingSetFeatures& Features() const;
+ bool AddAlias(const TString& label, const TNodePtr& node);
+ TString GenerateGroupByExprName();
+ bool IsAutogenerated(const TString* name) const;
+
+ TVector<TNodePtr> GroupBySet;
+ TGroupByClauseCtx::TPtr GroupSetContext;
+ THoppingWindowSpecPtr HoppingWindowSpec; // stream queries
+ static const TString AutogenerateNamePrefix;
+};
+
+const TString TGroupByClause::AutogenerateNamePrefix = "group";
+
+bool ParseNumbers(TContext& ctx, const TString& strOrig, ui64& value, TString& suffix) {
+ const auto str = to_lower(strOrig);
+ const auto strLen = str.size();
+ ui64 base = 10;
+ if (strLen > 2 && str[0] == '0') {
+ const auto formatChar = str[1];
+ if (formatChar == 'x') {
+ base = 16;
+ } else if (formatChar == 'o') {
+ base = 8;
+ } else if (formatChar == 'b') {
+ base = 2;
+ }
+ }
+ if (strLen > 1) {
+ auto iter = str.cend() - 1;
+ if (*iter == 'l' || *iter == 's' || *iter == 't' || /* deprecated */ *iter == 'b') {
+ --iter;
+ }
+ if (*iter == 'u') {
+ --iter;
+ }
+ suffix = TString(++iter, str.cend());
+ }
+ value = 0;
+ const TString digString(str.begin() + (base == 10 ? 0 : 2), str.end() - suffix.size());
+ for (const char& cur: digString) {
+ const ui64 curDigit = Char2DigitTable[static_cast<int>(cur)];
+ if (curDigit >= base) {
+ ctx.Error(ctx.Pos()) << "Failed to parse number from string: " << strOrig << ", char: '" << cur <<
+ "' is out of base: " << base;
+ return false;
+ }
+ const auto curValue = value;
+ value *= base;
+ value += curDigit;
+ if (curValue > value) {
+ ctx.Error(ctx.Pos()) << "Failed to parse number from string: " << strOrig << ", number limit overflow";
+ return false;
+ }
+ }
+ return true;
+}
+
+TNodePtr LiteralNumber(TContext& ctx, const TRule_integer& node) {
+ const TString intergerString = ctx.Token(node.GetToken1());
+ ui64 value;
+ TString suffix;
+ if (!ParseNumbers(ctx, intergerString, value, suffix)) {
+ return {};
+ }
+
+ if (suffix == "ub" || suffix == "b") {
+ ctx.Warning(ctx.Pos(), TIssuesIds::YQL_DEPRECATED_TINY_INT_LITERAL_SUFFIX) << "Deprecated suffix 'b' - please use 't' suffix for 8-bit integers";
+ }
+
+ const bool noSpaceForInt32 = value >> 31;
+ const bool noSpaceForInt64 = value >> 63;
+ if (suffix == "") {
+ if (noSpaceForInt64) {
+ return new TLiteralNumberNode<ui64>(ctx.Pos(), "Uint64", ToString(value));
+ } else if (noSpaceForInt32) {
+ return new TLiteralNumberNode<i64>(ctx.Pos(), "Int64", ToString(value));
+ }
+ return new TLiteralNumberNode<i32>(ctx.Pos(), "Int32", ToString(value));
+ } else if (suffix == "u") {
+ return new TLiteralNumberNode<ui32>(ctx.Pos(), "Uint32", ToString(value));
+ } else if (suffix == "ul") {
+ return new TLiteralNumberNode<ui64>(ctx.Pos(), "Uint64", ToString(value));
+ } else if (suffix == "ut" || suffix == "ub") {
+ return new TLiteralNumberNode<ui8>(ctx.Pos(), "Uint8", ToString(value));
+ } else if (suffix == "t" || suffix == "b") {
+ return new TLiteralNumberNode<i8>(ctx.Pos(), "Int8", ToString(value));
+ } else if (suffix == "l") {
+ return new TLiteralNumberNode<i64>(ctx.Pos(), "Int64", ToString(value));
+ } else if (suffix == "us") {
+ return new TLiteralNumberNode<ui16>(ctx.Pos(), "Uint16", ToString(value));
+ } else if (suffix == "s") {
+ return new TLiteralNumberNode<i16>(ctx.Pos(), "Int16", ToString(value));
+ } else {
+ ctx.Error(ctx.Pos()) << "Failed to parse number from string: " << intergerString << ", invalid suffix: " << suffix;
+ return {};
+ }
+}
+
+TNodePtr LiteralReal(TContext& ctx, const TRule_real& node) {
+ const TString value(ctx.Token(node.GetToken1()));
+ YQL_ENSURE(!value.empty());
+ const auto lastValue = value[value.size() - 1];
+ if (lastValue == 'f' || lastValue == 'F') {
+ return new TLiteralNumberNode<float>(ctx.Pos(), "Float", value.substr(0, value.size()-1));
+ } else {
+ return new TLiteralNumberNode<double>(ctx.Pos(), "Double", value);
+ }
+}
+
+TNodePtr Literal(TContext& ctx, const TRule_unsigned_number& rule) {
+ switch (rule.Alt_case()) {
+ case TRule_unsigned_number::kAltUnsignedNumber1:
+ return LiteralNumber(ctx, rule.GetAlt_unsigned_number1().GetRule_integer1());
+ case TRule_unsigned_number::kAltUnsignedNumber2:
+ return LiteralReal(ctx, rule.GetAlt_unsigned_number2().GetRule_real1());
+ default:
+ Y_ABORT("Unsigned number: you should change implementation according grammar changes");
+ }
+}
+
+TNodePtr TSqlExpression::LiteralExpr(const TRule_literal_value& node) {
+ switch (node.Alt_case()) {
+ case TRule_literal_value::kAltLiteralValue1: {
+ return LiteralNumber(Ctx, node.GetAlt_literal_value1().GetRule_integer1());
+ }
+ case TRule_literal_value::kAltLiteralValue2: {
+ return LiteralReal(Ctx, node.GetAlt_literal_value2().GetRule_real1());
+ }
+ case TRule_literal_value::kAltLiteralValue3: {
+ const TString value(Token(node.GetAlt_literal_value3().GetToken1()));
+ return BuildLiteralSmartString(Ctx, value);
+ }
+ case TRule_literal_value::kAltLiteralValue5: {
+ Token(node.GetAlt_literal_value5().GetToken1());
+ return BuildLiteralNull(Ctx.Pos());
+ }
+ case TRule_literal_value::kAltLiteralValue9: {
+ const TString value(Token(node.GetAlt_literal_value9().GetRule_bool_value1().GetToken1()));
+ return BuildLiteralBool(Ctx.Pos(), value);
+ }
+ case TRule_literal_value::kAltLiteralValue10: {
+ return BuildEmptyAction(Ctx.Pos());
+ }
+ default:
+ AltNotImplemented("literal_value", node);
+ }
+ return nullptr;
+}
+
+template<typename TUnarySubExprType>
+TNodePtr TSqlExpression::UnaryExpr(const TUnarySubExprType& node) {
+ //unary_subexpr: (id_expr | atom_expr ) key_expr* (DOT (bind_parameter | DIGITS | id_or_string) key_expr*)* (COLLATE id)?;
+ // OR
+ //in_unary_subexpr: (in_id_expr | in_atom_expr) key_expr* (DOT (bind_parameter | DIGITS | id_or_string) key_expr*)* (COLLATE id)?;
+ TVector<INode::TIdPart> ids;
+ auto& block = node.GetBlock1();
+ switch (block.Alt_case()) {
+ case TUnarySubExprType::TBlock1::kAlt1: {
+ auto& alt = block.GetAlt1();
+ TString name;
+ if constexpr (std::is_same_v<TUnarySubExprType, TRule_unary_subexpr>) {
+ name = Id(alt.GetRule_id_expr1(), *this);
+ } else {
+ name = Id(alt.GetRule_in_id_expr1(), *this);
+ }
+ ids.push_back(BuildColumn(Ctx.Pos()));
+ ids.push_back(name);
+ break;
+ }
+ case TUnarySubExprType::TBlock1::kAlt2: {
+ TNodePtr expr;
+ if constexpr (std::is_same_v<TUnarySubExprType, TRule_unary_subexpr>) {
+ expr = AtomExpr(block.GetAlt2().GetRule_atom_expr1());
+ } else {
+ expr = InAtomExpr(block.GetAlt2().GetRule_in_atom_expr1());
+ }
+
+ if (!expr) {
+ Ctx.IncrementMonCounter("sql_errors", "BadAtomExpr");
+ return nullptr;
+ }
+ ids.push_back(expr);
+ break;
+ }
+ default:
+ Y_ABORT("You should change implementation according grammar changes");
+ }
+ bool isLookup = false;
+ for (auto& b: node.GetBlock2()) {
+ auto expr = KeyExpr(b.GetRule_key_expr1());
+ if (!expr) {
+ Ctx.IncrementMonCounter("sql_errors", "BadKeyExpr");
+ return nullptr;
+ }
+ ids.push_back(expr);
+ isLookup = true;
+ }
+ TPosition pos(Ctx.Pos());
+ for (auto& dotBlock: node.GetBlock3()) {
+ auto bb = dotBlock.GetBlock2();
+ switch (bb.Alt_case()) {
+ case TUnarySubExprType::TBlock3::TBlock2::kAlt1: {
+ auto named = NamedNodeImpl(bb.GetAlt1().GetRule_bind_parameter1(), *this);
+ auto namedNode = GetNamedNode(named);
+ if (!namedNode) {
+ return nullptr;
+ }
+
+ ids.push_back(named);
+ ids.back().Expr = namedNode;
+ break;
+ }
+ case TUnarySubExprType::TBlock3::TBlock2::kAlt2: {
+ const TString str(Token(bb.GetAlt2().GetToken1()));
+ i32 pos = -1;
+ if (!TryFromString<i32>(str, pos)) {
+ Ctx.Error() << "Failed to parse i32 from string: " << str;
+ Ctx.IncrementMonCounter("sql_errors", "FailedToParsePos");
+ return nullptr;
+ }
+ ids.push_back(pos);
+ break;
+ }
+ case TUnarySubExprType::TBlock3::TBlock2::kAlt3: {
+ ids.push_back(IdOrString(bb.GetAlt3().GetRule_id_or_string1(), *this));
+ break;
+ }
+ default:
+ Y_ABORT("You should change implementation according grammar changes");
+ }
+ for (auto& b: dotBlock.GetBlock3()) {
+ auto expr = KeyExpr(b.GetRule_key_expr1());
+ if (!expr) {
+ Ctx.IncrementMonCounter("sql_errors", "BadKeyExpr");
+ return nullptr;
+ }
+ ids.push_back(expr);
+ isLookup = true;
+ }
+ }
+ if (node.HasBlock4()) {
+ Ctx.IncrementMonCounter("sql_errors", "CollateUnarySubexpr");
+ Error() << "unary_subexpr: COLLATE is not implemented yet";
+ }
+ Y_DEBUG_ABORT_UNLESS(!ids.empty());
+ Y_DEBUG_ABORT_UNLESS(ids.front().Expr);
+ return ids.size() > 1 ? BuildAccess(pos, ids, isLookup) : ids.back().Expr;
+}
+
+TNodePtr TSqlExpression::BindParameterRule(const TRule_bind_parameter& rule) {
+ const auto namedArg = NamedNodeImpl(rule, *this);
+ if (SmartParenthesisMode == ESmartParenthesis::SqlLambdaParams) {
+ Ctx.IncrementMonCounter("sql_features", "LambdaArgument");
+ return BuildAtom(Ctx.Pos(), namedArg);
+ } else {
+ Ctx.IncrementMonCounter("sql_features", "NamedNodeUseAtom");
+ return GetNamedNode(namedArg);
+ }
+}
+
+TNodePtr TSqlExpression::LambdaRule(const TRule_lambda& rule) {
+ const auto& alt = rule;
+ const bool isSqlLambda = alt.HasBlock2();
+ if (!isSqlLambda) {
+ return SmartParenthesis(alt.GetRule_smart_parenthesis1());
+ }
+ TSqlExpression expr(Ctx, Mode);
+ expr.SetSmartParenthesisMode(ESmartParenthesis::SqlLambdaParams);
+ auto parenthesis = expr.SmartParenthesis(alt.GetRule_smart_parenthesis1());
+ if (!parenthesis) {
+ return {};
+ }
+ TVector<TString> args;
+ if (!SqlLambdaParams(parenthesis, args)) {
+ return {};
+ }
+ auto bodyBlock = alt.GetBlock2();
+ Token(bodyBlock.GetToken1());
+ TPosition pos(Ctx.Pos());
+ TVector<TNodePtr> exprSeq;
+ for (const auto& arg: args) {
+ PushNamedNode(arg, BuildAtom(pos, arg, NYql::TNodeFlags::Default));
+ }
+ const bool ret = SqlLambdaExprBody(Ctx, bodyBlock.GetRule_lambda_body3(), exprSeq);
+ for (const auto& arg : args) {
+ PopNamedNode(arg);
+ }
+ if (!ret) {
+ return {};
+ }
+ return BuildSqlLambda(pos, std::move(args), std::move(exprSeq));
+}
+
+TNodePtr TSqlExpression::CastRule(const TRule_cast_expr& rule) {
+ Ctx.IncrementMonCounter("sql_features", "Cast");
+ const auto& alt = rule;
+ Token(alt.GetToken1());
+ TPosition pos(Ctx.Pos());
+ TSqlExpression expr(Ctx, Mode);
+ const auto& paramOne = alt.GetRule_type_name5().HasBlock2() ? alt.GetRule_type_name5().GetBlock2().GetRule_integer2().GetToken1().GetValue() : TString();
+ const auto& paramTwo = !paramOne.empty() && alt.GetRule_type_name5().GetBlock2().HasBlock3() ? alt.GetRule_type_name5().GetBlock2().GetBlock3().GetRule_integer2().GetToken1().GetValue() : TString();
+ return BuildCast(Ctx, pos, expr.Build(alt.GetRule_expr3()), Id(alt.GetRule_type_name5().GetRule_id1(), *this), paramOne, paramTwo);
+}
+
+TNodePtr TSqlExpression::BitCastRule(const TRule_bitcast_expr& rule) {
+ Ctx.IncrementMonCounter("sql_features", "BitCast");
+ const auto& alt = rule;
+ Token(alt.GetToken1());
+ TPosition pos(Ctx.Pos());
+ TSqlExpression expr(Ctx, Mode);
+ const auto& paramOne = alt.GetRule_type_name5().HasBlock2() ? alt.GetRule_type_name5().GetBlock2().GetRule_integer2().GetToken1().GetValue() : TString();
+ const auto& paramTwo = !paramOne.empty() && alt.GetRule_type_name5().GetBlock2().HasBlock3() ? alt.GetRule_type_name5().GetBlock2().GetBlock3().GetRule_integer2().GetToken1().GetValue() : TString();
+ return BuildBitCast(Ctx, pos, expr.Build(alt.GetRule_expr3()), Id(alt.GetRule_type_name5().GetRule_id1(), *this), paramOne, paramTwo);
+}
+
+TNodePtr TSqlExpression::ExistsRule(const TRule_exists_expr& rule) {
+ Ctx.IncrementMonCounter("sql_features", "Exists");
+ const auto& alt = rule;
+
+ Token(alt.GetToken2());
+ TSqlSelect select(Ctx, Mode);
+ TPosition pos;
+ auto source = select.Build(alt.GetRule_select_stmt3(), pos);
+ if (!source) {
+ Ctx.IncrementMonCounter("sql_errors", "BadSource");
+ return nullptr;
+ }
+ const bool checkExist = true;
+ return BuildBuiltinFunc(Ctx, Ctx.Pos(), "ListHasItems", {BuildSourceNode(pos, std::move(source), checkExist)});
+}
+
+TNodePtr TSqlExpression::CaseRule(const TRule_case_expr& rule) {
+ Ctx.IncrementMonCounter("sql_features", "Case");
+ const auto& alt = rule;
+ Token(alt.GetToken1());
+ TNodePtr elseExpr;
+ if (alt.HasBlock4()) {
+ Token(alt.GetBlock4().GetToken1());
+ TSqlExpression expr(Ctx, Mode);
+ elseExpr = expr.Build(alt.GetBlock4().GetRule_expr2());
+ } else {
+ Ctx.IncrementMonCounter("sql_errors", "ElseIsRequired");
+ Error() << "ELSE is required";
+ return nullptr;
+ }
+ TVector<TNodePtr> args;
+ for (int i = alt.Block3Size() - 1; i >= 0; --i) {
+ const auto& block = alt.GetBlock3(i).GetRule_when_expr1();
+ args.clear();
+ Token(block.GetToken1());
+ TSqlExpression condExpr(Ctx, Mode);
+ args.push_back(condExpr.Build(block.GetRule_expr2()));
+ if (alt.HasBlock2()) {
+ TSqlExpression expr(Ctx, Mode);
+ args.back() = BuildBinaryOp(Ctx.Pos(), "==", expr.Build(alt.GetBlock2().GetRule_expr1()), args.back());
+ }
+ Token(block.GetToken3());
+ TSqlExpression thenExpr(Ctx, Mode);
+ args.push_back(thenExpr.Build(block.GetRule_expr4()));
+ args.push_back(elseExpr);
+ if (i > 0) {
+ elseExpr = BuildBuiltinFunc(Ctx, Ctx.Pos(), "If", args);
+ }
+ }
+ return BuildBuiltinFunc(Ctx, Ctx.Pos(), "If", args);
+}
+
+template<typename TWindowFunctionType>
+TNodePtr TSqlExpression::WindowFunctionRule(const TWindowFunctionType& rule) {
+ // window_function: call_expr (null_treatment? OVER window_name_or_specification)?;
+ // OR
+ // in_window_function: in_call_expr (null_treatment? OVER window_name_or_specification)?;
+ const bool overWindow = rule.HasBlock2();
+ TSqlCallExpr call(Ctx, Mode, this);
+
+ bool initResult;
+ if constexpr (std::is_same_v<TWindowFunctionType, TRule_window_function>) {
+ initResult = call.Init(rule.GetRule_call_expr1());
+ } else {
+ initResult = call.Init(rule.GetRule_in_call_expr1());
+ }
+ if (!initResult) {
+ return {};
+ }
+
+ call.IncCounters();
+ if (!overWindow) {
+ return call.BuildCall();
+ }
+ auto funcSpec = rule.GetBlock2();
+ call.SetOverWindow();
+ auto winRule = funcSpec.GetRule_window_name_or_specification3();
+ if (winRule.Alt_case() != TRule_window_name_or_specification::kAltWindowNameOrSpecification1) {
+ Error() << "Inline window specification is not supported yet! You can define window function in WINDOW clause.";
+ return {};
+ }
+ if (funcSpec.HasBlock1() && funcSpec.GetBlock1().GetRule_null_treatment1().Alt_case() == TRule_null_treatment::kAltNullTreatment2) {
+ call.SetIgnoreNulls();
+ }
+ const TString windowName = Id(winRule.GetAlt_window_name_or_specification1().GetRule_window_name1().GetRule_id1(), *this);
+ Ctx.IncrementMonCounter("sql_features", "WindowFunctionOver");
+ return BuildCalcOverWindow(Ctx.Pos(), windowName, call.BuildCall());
+}
+
+TNodePtr TSqlExpression::AtomExpr(const TRule_atom_expr& node) {
+ // atom_expr:
+ // literal_value
+ // | bind_parameter
+ // | window_function
+ // | lambda
+ // | cast_expr
+ // | exists_expr
+ // | case_expr
+ // | id_or_string NAMESPACE id_or_string
+ // ;
+
+ switch (node.Alt_case()) {
+ case TRule_atom_expr::kAltAtomExpr1:
+ Ctx.IncrementMonCounter("sql_features", "LiteralExpr");
+ return LiteralExpr(node.GetAlt_atom_expr1().GetRule_literal_value1());
+ case TRule_atom_expr::kAltAtomExpr2:
+ return BindParameterRule(node.GetAlt_atom_expr2().GetRule_bind_parameter1());
+ case TRule_atom_expr::kAltAtomExpr3:
+ return WindowFunctionRule(node.GetAlt_atom_expr3().GetRule_window_function1());
+ case TRule_atom_expr::kAltAtomExpr4:
+ return LambdaRule(node.GetAlt_atom_expr4().GetRule_lambda1());
+ case TRule_atom_expr::kAltAtomExpr5:
+ return CastRule(node.GetAlt_atom_expr5().GetRule_cast_expr1());
+ case TRule_atom_expr::kAltAtomExpr6:
+ return ExistsRule(node.GetAlt_atom_expr6().GetRule_exists_expr1());
+ case TRule_atom_expr::kAltAtomExpr7:
+ return CaseRule(node.GetAlt_atom_expr7().GetRule_case_expr1());
+ case TRule_atom_expr::kAltAtomExpr8: {
+ const auto& alt = node.GetAlt_atom_expr8();
+ const TString module(IdOrString(alt.GetRule_id_or_string1(), *this));
+ TPosition pos(Ctx.Pos());
+ bool rawString = true;
+ const TString name(IdOrString(alt.GetRule_id_or_string3(), *this, rawString));
+ return BuildCallable(pos, module, name, {});
+ }
+ case TRule_atom_expr::kAltAtomExpr9:
+ return BitCastRule(node.GetAlt_atom_expr9().GetRule_bitcast_expr1());
+ default:
+ AltNotImplemented("atom_expr", node);
+ }
+ return nullptr;
+}
+
+TNodePtr TSqlExpression::InAtomExpr(const TRule_in_atom_expr& node) {
+ // in_atom_expr:
+ // literal_value
+ // | bind_parameter
+ // | in_window_function
+ // | smart_parenthesis
+ // | cast_expr
+ // | case_expr
+ // | LPAREN select_stmt RPAREN
+ // ;
+
+ switch (node.Alt_case()) {
+ case TRule_in_atom_expr::kAltInAtomExpr1:
+ Ctx.IncrementMonCounter("sql_features", "LiteralExpr");
+ return LiteralExpr(node.GetAlt_in_atom_expr1().GetRule_literal_value1());
+ case TRule_in_atom_expr::kAltInAtomExpr2:
+ return BindParameterRule(node.GetAlt_in_atom_expr2().GetRule_bind_parameter1());
+ case TRule_in_atom_expr::kAltInAtomExpr3:
+ return WindowFunctionRule(node.GetAlt_in_atom_expr3().GetRule_in_window_function1());
+ case TRule_in_atom_expr::kAltInAtomExpr4:
+ return SmartParenthesis(node.GetAlt_in_atom_expr4().GetRule_smart_parenthesis1());
+ case TRule_in_atom_expr::kAltInAtomExpr5:
+ return CastRule(node.GetAlt_in_atom_expr5().GetRule_cast_expr1());
+ case TRule_in_atom_expr::kAltInAtomExpr6:
+ return CaseRule(node.GetAlt_in_atom_expr6().GetRule_case_expr1());
+ case TRule_in_atom_expr::kAltInAtomExpr7: {
+ Token(node.GetAlt_in_atom_expr7().GetToken1());
+ TSqlSelect select(Ctx, Mode);
+ TPosition pos;
+ auto source = select.Build(node.GetAlt_in_atom_expr7().GetRule_select_stmt2(), pos);
+ if (!source) {
+ Ctx.IncrementMonCounter("sql_errors", "BadSource");
+ return {};
+ }
+ Ctx.IncrementMonCounter("sql_features", "InSubquery");
+ return BuildSelectResult(pos, std::move(source), false, Mode == NSQLTranslation::ESqlMode::SUBQUERY);
+ }
+ case TRule_in_atom_expr::kAltInAtomExpr8:
+ return BitCastRule(node.GetAlt_in_atom_expr8().GetRule_bitcast_expr1());
+ default:
+ AltNotImplemented("in_atom_expr", node);
+ }
+ return nullptr;
+}
+
+bool TSqlExpression::SqlLambdaParams(const TNodePtr& node, TVector<TString>& args) {
+ auto errMsg = TStringBuf("Invalid lambda arguments syntax. Lambda arguments should starts with '$' as named value.");
+ auto tupleNodePtr = dynamic_cast<TTupleNode*>(node.Get());
+ if (!tupleNodePtr) {
+ Ctx.Error(node->GetPos()) << errMsg;
+ return false;
+ }
+ THashSet<TString> dupArgsChecker;
+ for (const auto& argPtr: tupleNodePtr->Elements()) {
+ auto contentPtr = argPtr->GetAtomContent();
+ if (!contentPtr || !contentPtr->StartsWith("$")) {
+ Ctx.Error(argPtr->GetPos()) << errMsg;
+ return false;
+ }
+ if (!dupArgsChecker.insert(*contentPtr).second) {
+ Ctx.Error(argPtr->GetPos()) << "Duplicate lambda argument parametr: '" << *contentPtr << "'.";
+ return false;
+ }
+ args.push_back(*contentPtr);
+ }
+ return true;
+}
+
+bool TSqlExpression::SqlLambdaExprBody(TContext& ctx, const TRule_lambda_body& node, TVector<TNodePtr>& exprSeq) {
+ TSqlExpression expr(ctx, ctx.Settings.Mode);
+ TVector<TString> localNames;
+ bool hasError = false;
+ for (auto& block: node.GetBlock1()) {
+ const auto& rule = block.GetRule_lambda_stmt1();
+ switch (rule.Alt_case()) {
+ case TRule_lambda_stmt::kAltLambdaStmt1: {
+ TVector<TString> names;
+ auto nodeExpr = NamedNode(rule.GetAlt_lambda_stmt1().GetRule_named_nodes_stmt1(), names);
+ if (!nodeExpr) {
+ hasError = true;
+ continue;
+ } else if (nodeExpr->GetSource()) {
+ ctx.Error() << "SELECT is not supported inside lambda body";
+ hasError = true;
+ continue;
+ }
+
+ if (names.size() > 1) {
+ auto ref = ctx.MakeName("tie");
+ exprSeq.push_back(nodeExpr->Y("EnsureTupleSize", nodeExpr, nodeExpr->Q(ToString(names.size()))));
+ exprSeq.back()->SetLabel(ref);
+ for (size_t i = 0; i < names.size(); ++i) {
+ TNodePtr nthExpr = nodeExpr->Y("Nth", ref, nodeExpr->Q(ToString(i)));
+ nthExpr->SetLabel(names[i]);
+ localNames.push_back(names[i]);
+ PushNamedNode(names[i], BuildAtom(nodeExpr->GetPos(), names[i], NYql::TNodeFlags::Default));
+ exprSeq.push_back(nthExpr);
+ }
+ } else {
+ nodeExpr->SetLabel(names.front());
+ localNames.push_back(names.front());
+ PushNamedNode(names.front(), BuildAtom(nodeExpr->GetPos(), names.front(), NYql::TNodeFlags::Default));
+ exprSeq.push_back(nodeExpr);
+ }
+ break;
+ }
+ case TRule_lambda_stmt::kAltLambdaStmt2: {
+ if (!ImportStatement(rule.GetAlt_lambda_stmt2().GetRule_import_stmt1(), &localNames)) {
+ hasError = true;
+ }
+ break;
+ }
+ default:
+ Y_ABORT("SampleClause: does not correspond to grammar changes");
+ }
+ }
+
+ TNodePtr nodeExpr;
+ if (!hasError) {
+ nodeExpr = expr.Build(node.GetRule_expr3());
+ }
+
+ for (const auto& name : localNames) {
+ PopNamedNode(name);
+ }
+
+ if (!nodeExpr) {
+ return false;
+ }
+ exprSeq.push_back(nodeExpr);
+ return true;
+}
+
+TNodePtr TSqlExpression::SubExpr(const TRule_con_subexpr& node) {
+ // con_subexpr: unary_subexpr | unary_op unary_subexpr;
+ switch (node.Alt_case()) {
+ case TRule_con_subexpr::kAltConSubexpr1:
+ return UnaryExpr(node.GetAlt_con_subexpr1().GetRule_unary_subexpr1());
+ case TRule_con_subexpr::kAltConSubexpr2: {
+ Ctx.IncrementMonCounter("sql_features", "UnaryOperation");
+ TString opName;
+ auto token = node.GetAlt_con_subexpr2().GetRule_unary_op1().GetToken1();
+ Token(token);
+ TPosition pos(Ctx.Pos());
+ switch (token.GetId()) {
+ case SQLLexerTokens::TOKEN_NOT: opName = "Not"; break;
+ case SQLLexerTokens::TOKEN_PLUS: opName = "Plus"; break;
+ case SQLLexerTokens::TOKEN_MINUS: opName = "Minus"; break;
+ case SQLLexerTokens::TOKEN_TILDA: opName = "BitNot"; break;
+ default:
+ Ctx.IncrementMonCounter("sql_errors", "UnsupportedUnaryOperation");
+ Error() << "Unsupported unary operation: " << token.GetValue();
+ return nullptr;
+ }
+ Ctx.IncrementMonCounter("sql_unary_operations", opName);
+ return BuildUnaryOp(pos, opName, UnaryExpr(node.GetAlt_con_subexpr2().GetRule_unary_subexpr2()));
+ }
+ default:
+ Y_ABORT("You should change implementation according grammar changes");
+ }
+ return nullptr;
+}
+
+TNodePtr TSqlExpression::SubExpr(const TRule_xor_subexpr& node) {
+ // xor_subexpr: eq_subexpr cond_expr?;
+ TNodePtr res(SubExpr(node.GetRule_eq_subexpr1()));
+ if (!res) {
+ return {};
+ }
+ TPosition pos(Ctx.Pos());
+ if (node.HasBlock2()) {
+ auto cond = node.GetBlock2().GetRule_cond_expr1();
+ switch (cond.Alt_case()) {
+ case TRule_cond_expr::kAltCondExpr1: {
+ const auto& matchOp = cond.GetAlt_cond_expr1();
+ const bool notMatch = matchOp.HasBlock1();
+ const TCiString& opName = Token(matchOp.GetRule_match_op2().GetToken1());
+ const auto& pattern = SubExpr(cond.GetAlt_cond_expr1().GetRule_eq_subexpr3());
+ if (!pattern) {
+ return {};
+ }
+ TNodePtr isMatch;
+ if (opName == "like" || opName == "ilike") {
+ const TString* escapeLiteral = nullptr;
+ TNodePtr escapeNode;
+ const auto& escaper = BuildUdf(Ctx, pos, "Re2", "PatternFromLike", {});
+ TVector<TNodePtr> escaperArgs({ escaper, pattern });
+
+ if (matchOp.HasBlock4()) {
+ const auto& escapeBlock = matchOp.GetBlock4();
+ TNodePtr escapeExpr = SubExpr(escapeBlock.GetRule_eq_subexpr2());
+ if (!escapeExpr) {
+ return {};
+ }
+ escapeLiteral = escapeExpr->GetLiteral("String");
+ escapeNode = escapeExpr;
+ if (escapeLiteral) {
+ Ctx.IncrementMonCounter("sql_features", "LikeEscape");
+ if (escapeLiteral->size() != 1) {
+ Ctx.IncrementMonCounter("sql_errors", "LikeMultiCharEscape");
+ Error() << "ESCAPE clause requires single character argument";
+ return nullptr;
+ }
+ if (escapeLiteral[0] == "%" || escapeLiteral[0] == "_" || escapeLiteral[0] == "\\") {
+ Ctx.IncrementMonCounter("sql_errors", "LikeUnsupportedEscapeChar");
+ Error() << "'%', '_' and '\\' are currently not supported in ESCAPE clause, ";
+ Error() << "please choose any other character";
+ return nullptr;
+ }
+ escaperArgs.push_back(BuildLiteralRawString(pos, *escapeLiteral));
+ } else {
+ Ctx.IncrementMonCounter("sql_errors", "LikeNotLiteralEscape");
+ Error() << "ESCAPE clause requires String literal argument";
+ return nullptr;
+ }
+ }
+
+ auto re2options = BuildUdf(Ctx, pos, "Re2", "Options", {});
+ TString csMode;
+ if (opName == "ilike") {
+ Ctx.IncrementMonCounter("sql_features", "CaseInsensitiveLike");
+ csMode = "false";
+ } else {
+ csMode = "true";
+ }
+ auto csModeLiteral = BuildLiteralBool(pos, csMode);
+ csModeLiteral->SetLabel("CaseSensitive");
+ auto csOption = BuildStructure(pos, { csModeLiteral });
+ auto optionsApply = new TCallNodeImpl(pos, "NamedApply", { re2options, BuildTuple(pos, {}), csOption });
+
+ const TNodePtr escapedPattern = new TCallNodeImpl(pos, "Apply", { escaperArgs });
+ auto list = new TAstListNodeImpl(pos, { escapedPattern, optionsApply });
+ auto runConfig = new TAstListNodeImpl(pos, { new TAstAtomNodeImpl(pos, "quote", 0), list });
+
+ const auto& matcher = BuildUdf(Ctx, pos, "Re2", "Match", { runConfig });
+ isMatch = new TCallNodeImpl(pos, "Apply", { matcher, res });
+
+ const TString* literalPattern = pattern->GetLiteral("String");
+ if (literalPattern) {
+ TStringBuilder lowerBound;
+ bool inEscape = false;
+ bool hasPattern = false;
+ for (const char c : *literalPattern) {
+ if (escapeLiteral && c == escapeLiteral->at(0)) {
+ if (inEscape) {
+ lowerBound.append(c);
+ inEscape = false;
+ } else {
+ inEscape = true;
+ }
+ } else {
+ if (c == '%' || c == '_') {
+ if (inEscape) {
+ lowerBound.append(c);
+ inEscape = false;
+ } else {
+ hasPattern = true;
+ break;
+ }
+ } else {
+ if (inEscape) {
+ Ctx.IncrementMonCounter("sql_errors", "LikeEscapeNormalSymbol");
+ Error() << "Escape symbol should be used twice consecutively in LIKE pattern to be considered literal";
+ return nullptr;
+ } else {
+ lowerBound.append(c);
+ }
+ }
+ }
+ }
+ if (inEscape) {
+ Ctx.IncrementMonCounter("sql_errors", "LikeEscapeSymbolEnd");
+ Error() << "LIKE pattern should not end with escape symbol";
+ return nullptr;
+ }
+ if (opName != "ilike") {
+ if (!hasPattern) {
+ isMatch = BuildBinaryOp(pos, "==", res, BuildLiteralSmartString(Ctx,
+ TStringBuilder() << "@@" << lowerBound << "@@"));
+ } else if (!lowerBound.empty()) {
+ const auto& lowerBoundOp = BuildBinaryOp(pos, ">=", res, BuildLiteralSmartString(Ctx,
+ TStringBuilder() << "@@" << lowerBound << "@@"));
+ auto& isMatchCopy = isMatch;
+ TStringBuilder upperBound;
+ bool madeIncrement = false;
+
+ for (i64 i = lowerBound.size() - 1; i >=0 ; --i) {
+ if (!madeIncrement) {
+ upperBound.append(lowerBound[i] + 1);
+ madeIncrement = true;
+ } else {
+ upperBound.append(lowerBound[i]);
+ }
+ }
+ if (madeIncrement) {
+ ReverseInPlace(upperBound);
+ const auto& between = BuildBinaryOp(
+ pos,
+ "And",
+ lowerBoundOp,
+ BuildBinaryOp(pos, "<", res, BuildLiteralSmartString(Ctx,
+ TStringBuilder() << "@@" << upperBound << "@@"))
+ );
+ isMatch = BuildBinaryOp(pos, "And", between, isMatchCopy);
+ } else {
+ isMatch = BuildBinaryOp(pos, "And", lowerBoundOp, isMatchCopy);
+ }
+ }
+ }
+ }
+
+ Ctx.IncrementMonCounter("sql_features", notMatch ? "NotLike" : "Like");
+
+ } else if (opName == "regexp" || opName == "rlike" || opName == "match") {
+ if (matchOp.HasBlock4()) {
+ Ctx.IncrementMonCounter("sql_errors", "RegexpEscape");
+ TString opNameUpper(opName);
+ opNameUpper.to_upper();
+ Error() << opName << " and ESCAPE clauses should not be used together";
+ return nullptr;
+ }
+
+ const auto& matcher = BuildUdf(Ctx, pos, "Pcre", opName == "match" ? "BacktrackingMatch" : "BacktrackingGrep", { pattern });
+ isMatch = new TCallNodeImpl(pos, "Apply", { matcher, res });
+ if (opName != "match") {
+ Ctx.IncrementMonCounter("sql_features", notMatch ? "NotRegexp" : "Regexp");
+ } else {
+ Ctx.IncrementMonCounter("sql_features", notMatch ? "NotMatch" : "Match");
+ }
+ } else {
+ Ctx.IncrementMonCounter("sql_errors", "UnknownMatchOp");
+ AltNotImplemented("match_op", cond);
+ return nullptr;
+ }
+ return notMatch ? BuildUnaryOp(pos, "Not", isMatch) : isMatch;
+ }
+ case TRule_cond_expr::kAltCondExpr2: {
+ auto altInExpr = cond.GetAlt_cond_expr2();
+ const bool notIn = altInExpr.HasBlock1();
+ auto hints = BuildTuple(pos, {});
+ if (altInExpr.HasBlock3()) {
+ Ctx.IncrementMonCounter("sql_features", "IsCompactHint");
+ auto sizeHint = BuildTuple(pos, { BuildQuotedAtom(pos, "isCompact", NYql::TNodeFlags::Default) });
+ hints = BuildTuple(pos, { sizeHint });
+ }
+ TSqlExpression inSubexpr(Ctx, Mode);
+ auto inRight = inSubexpr.SqlInExpr(altInExpr.GetRule_in_expr4());
+ auto isIn = BuildBuiltinFunc(Ctx, pos, "In", {res, inRight, hints});
+ Ctx.IncrementMonCounter("sql_features", notIn ? "NotIn" : "In");
+ return notIn ? BuildUnaryOp(pos, "Not", isIn) : isIn;
+ }
+ case TRule_cond_expr::kAltCondExpr3: {
+ auto altCase = cond.GetAlt_cond_expr3().GetBlock1().Alt_case();
+ const bool notNoll =
+ altCase == TRule_cond_expr::TAlt3::TBlock1::kAlt2 ||
+ altCase == TRule_cond_expr::TAlt3::TBlock1::kAlt4
+ ;
+
+ if (altCase == TRule_cond_expr::TAlt3::TBlock1::kAlt4 &&
+ !cond.GetAlt_cond_expr3().GetBlock1().GetAlt4().HasBlock1())
+ {
+ Ctx.Warning(Ctx.Pos(), TIssuesIds::YQL_MISSING_IS_BEFORE_NOT_NULL) << "Missing IS keyword before NOT NULL";
+ }
+
+ auto isNull = BuildIsNullOp(pos, res);
+ Ctx.IncrementMonCounter("sql_features", notNoll ? "NotNull" : "Null");
+ return notNoll ? BuildUnaryOp(pos, "Not", isNull) : isNull;
+ }
+ case TRule_cond_expr::kAltCondExpr4: {
+ auto alt = cond.GetAlt_cond_expr4();
+ if (alt.HasBlock1()) {
+ Ctx.IncrementMonCounter("sql_features", "NotBetween");
+ return BuildBinaryOp(
+ pos,
+ "Or",
+ BuildBinaryOp(pos, "<", res, SubExpr(alt.GetRule_eq_subexpr3())),
+ BuildBinaryOp(pos, ">", res, SubExpr(alt.GetRule_eq_subexpr5()))
+ );
+ } else {
+ Ctx.IncrementMonCounter("sql_features", "Between");
+ return BuildBinaryOp(
+ pos,
+ "And",
+ BuildBinaryOp(pos, ">=", res, SubExpr(alt.GetRule_eq_subexpr3())),
+ BuildBinaryOp(pos, "<=", res, SubExpr(alt.GetRule_eq_subexpr5()))
+ );
+ }
+ }
+ case TRule_cond_expr::kAltCondExpr5: {
+ auto alt = cond.GetAlt_cond_expr5();
+ auto getNode = [](const TRule_cond_expr::TAlt5::TBlock1& b) -> const TRule_eq_subexpr& { return b.GetRule_eq_subexpr2(); };
+ return BinOpList(node.GetRule_eq_subexpr1(), getNode, alt.GetBlock1().begin(), alt.GetBlock1().end());
+ }
+ default:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownConditionExpr");
+ AltNotImplemented("cond_expr", cond);
+ return nullptr;
+ }
+ }
+ return res;
+}
+
+TNodePtr TSqlExpression::BinOperList(const TString& opName, TVector<TNodePtr>::const_iterator begin, TVector<TNodePtr>::const_iterator end) const {
+ TPosition pos(Ctx.Pos());
+ const size_t opCount = end - begin;
+ Y_DEBUG_ABORT_UNLESS(opCount >= 2);
+ if (opCount == 2) {
+ return BuildBinaryOp(pos, opName, *begin, *(begin+1));
+ } if (opCount == 3) {
+ return BuildBinaryOp(pos, opName, BuildBinaryOp(pos, opName, *begin, *(begin+1)), *(begin+2));
+ } else {
+ auto mid = begin + opCount / 2;
+ return BuildBinaryOp(pos, opName, BinOperList(opName, begin, mid), BinOperList(opName, mid, end));
+ }
+}
+
+template <typename TNode, typename TGetNode, typename TIter>
+TNodePtr TSqlExpression::BinOper(const TString& opName, const TNode& node, TGetNode getNode, TIter begin, TIter end) {
+ if (begin == end) {
+ return SubExpr(node);
+ }
+ Ctx.IncrementMonCounter("sql_binary_operations", opName);
+ const size_t listSize = end - begin;
+ TVector<TNodePtr> nodes;
+ nodes.reserve(1 + listSize);
+ nodes.push_back(SubExpr(node));
+ for (; begin != end; ++begin) {
+ nodes.push_back(SubExpr(getNode(*begin)));
+ }
+ return BinOperList(opName, nodes.begin(), nodes.end());
+}
+
+template <typename TNode, typename TGetNode, typename TIter>
+TNodePtr TSqlExpression::BinOpList(const TNode& node, TGetNode getNode, TIter begin, TIter end) {
+ TNodePtr partialResult = SubExpr(node);
+ while (begin != end) {
+ Ctx.IncrementMonCounter("sql_features", "BinaryOperation");
+ Token(begin->GetToken1());
+ TPosition pos(Ctx.Pos());
+ TString opName;
+ auto tokenId = begin->GetToken1().GetId();
+ switch (tokenId) {
+ case SQLLexerTokens::TOKEN_LESS:
+ Ctx.IncrementMonCounter("sql_binary_operations", "Less");
+ opName = "<";
+ break;
+ case SQLLexerTokens::TOKEN_LESS_OR_EQ:
+ opName = "<=";
+ Ctx.IncrementMonCounter("sql_binary_operations", "LessOrEq");
+ break;
+ case SQLLexerTokens::TOKEN_GREATER:
+ opName = ">";
+ Ctx.IncrementMonCounter("sql_binary_operations", "Greater");
+ break;
+ case SQLLexerTokens::TOKEN_GREATER_OR_EQ:
+ opName = ">=";
+ Ctx.IncrementMonCounter("sql_binary_operations", "GreaterOrEq");
+ break;
+ case SQLLexerTokens::TOKEN_PLUS:
+ opName = "+";
+ Ctx.IncrementMonCounter("sql_binary_operations", "Plus");
+ break;
+ case SQLLexerTokens::TOKEN_MINUS:
+ opName = "-";
+ Ctx.IncrementMonCounter("sql_binary_operations", "Minus");
+ break;
+ case SQLLexerTokens::TOKEN_ASTERISK:
+ opName = "*";
+ Ctx.IncrementMonCounter("sql_binary_operations", "Multiply");
+ break;
+ case SQLLexerTokens::TOKEN_SLASH:
+ opName = "/";
+ Ctx.IncrementMonCounter("sql_binary_operations", "Divide");
+ if (!Ctx.PragmaClassicDivision) {
+ partialResult = BuildCast(Ctx, pos, partialResult, "Double");
+ }
+ break;
+ case SQLLexerTokens::TOKEN_PERCENT:
+ opName = "%";
+ Ctx.IncrementMonCounter("sql_binary_operations", "Mod");
+ break;
+ case SQLLexerTokens::TOKEN_EQUALS:
+ Ctx.IncrementMonCounter("sql_binary_operations", "Equals");
+ [[fallthrough]];
+ case SQLLexerTokens::TOKEN_EQUALS2:
+ Ctx.IncrementMonCounter("sql_binary_operations", "Equals2");
+ opName = "==";
+ break;
+ case SQLLexerTokens::TOKEN_NOT_EQUALS:
+ Ctx.IncrementMonCounter("sql_binary_operations", "NotEquals");
+ [[fallthrough]];
+ case SQLLexerTokens::TOKEN_NOT_EQUALS2:
+ Ctx.IncrementMonCounter("sql_binary_operations", "NotEquals2");
+ opName = "!=";
+ break;
+ case SQLLexerTokens::TOKEN_AMPERSAND:
+ opName = "BitAnd";
+ Ctx.IncrementMonCounter("sql_binary_operations", "BitAnd");
+ break;
+ case SQLLexerTokens::TOKEN_PIPE:
+ opName = "BitOr";
+ Ctx.IncrementMonCounter("sql_binary_operations", "BitOr");
+ break;
+ case SQLLexerTokens::TOKEN_CARET:
+ opName = "BitXor";
+ Ctx.IncrementMonCounter("sql_binary_operations", "BitXor");
+ break;
+ case SQLLexerTokens::TOKEN_SHIFT_LEFT:
+ opName = "ShiftLeft";
+ Ctx.IncrementMonCounter("sql_binary_operations", "ShiftLeft");
+ break;
+ case SQLLexerTokens::TOKEN_SHIFT_RIGHT:
+ opName = "ShiftRight";
+ Ctx.IncrementMonCounter("sql_binary_operations", "ShiftRight");
+ break;
+ case SQLLexerTokens::TOKEN_ROT_LEFT:
+ opName = "RotLeft";
+ Ctx.IncrementMonCounter("sql_binary_operations", "RotLeft");
+ break;
+ case SQLLexerTokens::TOKEN_ROT_RIGHT:
+ opName = "RotRight";
+ Ctx.IncrementMonCounter("sql_binary_operations", "RotRight");
+ break;
+ default:
+ Ctx.IncrementMonCounter("sql_errors", "UnsupportedBinaryOperation");
+ Error() << "Unsupported binary operation token: " << tokenId;
+ return nullptr;
+ }
+
+ partialResult = BuildBinaryOp(pos, opName, partialResult, SubExpr(getNode(*begin)));
+ ++begin;
+ }
+
+ return partialResult;
+}
+
+TNodePtr TSqlExpression::SqlInExpr(const TRule_in_expr& node) {
+ TSqlExpression expr(Ctx, Mode);
+ expr.SetSmartParenthesisMode(TSqlExpression::ESmartParenthesis::InStatement);
+ auto result = expr.WrapExprShortcuts(expr.UnaryExpr(node.GetRule_in_unary_subexpr1()));
+ return result;
+}
+
+TNodePtr TSqlExpression::SmartParenthesis(const TRule_smart_parenthesis& node) {
+ TVector<TNodePtr> exprs;
+ Token(node.GetToken1());
+ const TPosition pos(Ctx.Pos());
+ const bool isTuple = node.HasBlock3();
+ bool expectTuple = SmartParenthesisMode == ESmartParenthesis::InStatement;
+ EExpr mode = EExpr::Regular;
+ if (SmartParenthesisMode == ESmartParenthesis::GroupBy) {
+ mode = EExpr::GroupBy;
+ } else if (SmartParenthesisMode == ESmartParenthesis::SqlLambdaParams) {
+ mode = EExpr::SqlLambdaParams;
+ expectTuple = true;
+ }
+ if (node.HasBlock2() && !NamedExprList(node.GetBlock2().GetRule_named_expr_list1(), exprs, mode)) {
+ return {};
+ }
+
+ bool hasAliases = false;
+ bool hasUnnamed = false;
+ for (const auto& expr: exprs) {
+ if (expr->GetLabel()) {
+ hasAliases = true;
+ } else {
+ hasUnnamed = true;
+ }
+ if (hasAliases && hasUnnamed && SmartParenthesisMode != ESmartParenthesis::GroupBy) {
+ Ctx.IncrementMonCounter("sql_errors", "AnonymousStructMembers");
+ Ctx.Error(pos) << "Structure does not allow anonymous members";
+ return nullptr;
+ }
+ }
+ if (exprs.size() == 1 && hasUnnamed && !isTuple && !expectTuple) {
+ return exprs.back();
+ }
+ if (SmartParenthesisMode == ESmartParenthesis::GroupBy) {
+ /// \todo support nested tuple\struct
+ if (isTuple) {
+ Ctx.IncrementMonCounter("sql_errors", "SimpleTupleInGroupBy");
+ Ctx.Error(pos) << "Unable to use tuple in group by clause";
+ return nullptr;
+ }
+ Ctx.IncrementMonCounter("sql_features", "ListOfNamedNode");
+ return BuildListOfNamedNodes(pos, std::move(exprs));
+ }
+ Ctx.IncrementMonCounter("sql_features", hasUnnamed ? "SimpleTuple" : "SimpleStruct");
+ return hasUnnamed || expectTuple ? BuildTuple(pos, exprs) : BuildStructure(pos, exprs);
+}
+
+TNodePtr TSqlTranslation::NamedNode(const TRule_named_nodes_stmt& rule, TVector<TString>& names) {
+ // named_nodes_stmt: bind_parameter_list EQUALS (expr | LPAREN select_stmt RPAREN);
+ BindList(rule.GetRule_bind_parameter_list1(), names);
+
+ TNodePtr nodeExpr = nullptr;
+ switch (rule.GetBlock3().Alt_case()) {
+ case TRule_named_nodes_stmt::TBlock3::kAlt1: {
+ TSqlExpression expr(Ctx, Mode);
+ return expr.Build(rule.GetBlock3().GetAlt1().GetRule_expr1());
+ }
+
+ case TRule_named_nodes_stmt::TBlock3::kAlt2: {
+ TSqlSelect expr(Ctx, Mode);
+ TPosition pos;
+ auto source = expr.Build(rule.GetBlock3().GetAlt2().GetRule_select_stmt2(), pos);
+ if (!source) {
+ return {};
+ }
+ return BuildSourceNode(pos, std::move(source));
+ }
+
+ default:
+ AltNotImplemented("named_node", rule.GetBlock3());
+ Ctx.IncrementMonCounter("sql_errors", "UnknownNamedNode");
+ return nullptr;
+ }
+}
+
+bool TSqlTranslation::ImportStatement(const TRule_import_stmt& stmt, TVector<TString>* namesPtr) {
+ TVector<TString> modulePath;
+ if (!ModulePath(stmt.GetRule_module_path2(), modulePath)) {
+ return false;
+ }
+ TVector<TNodePtr> bindNames;
+ if (!NamedBindList(stmt.GetRule_named_bind_parameter_list4(), bindNames)) {
+ return false;
+ }
+ const TString moduleAlias = Ctx.AddImport(std::move(modulePath));
+ if (!moduleAlias) {
+ return false;
+ }
+ for (const TNodePtr& name: bindNames) {
+ const TString* contentPtr = name->GetAtomContent();
+ YQL_ENSURE(contentPtr);
+ const TString& nameAlias = name->GetLabel();
+ const auto varName = nameAlias ? nameAlias : *contentPtr;
+ PushNamedNode(varName, name->Y("bind", moduleAlias, name->Q(*contentPtr)));
+ if (namesPtr) {
+ namesPtr->push_back(varName);
+ }
+ }
+ return true;
+}
+
+TNodePtr TSqlTranslation::DoStatement(const TRule_do_stmt& stmt, bool makeLambda, const TVector<TString>& args) {
+ TNodePtr action;
+ switch (stmt.GetBlock2().GetAltCase()) {
+ case TRule_do_stmt_TBlock2::kAlt1: {
+ auto bindName = NamedNodeImpl(stmt.GetBlock2().GetAlt1().GetRule_bind_parameter1(), *this);
+ action = GetNamedNode(bindName);
+ if (!action) {
+ return nullptr;
+ }
+ break;
+ }
+ case TRule_do_stmt_TBlock2::kAlt2:
+ action = BuildEmptyAction(Ctx.Pos());
+ break;
+ default:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownDoStmt");
+ AltNotImplemented("do_stmt", stmt.GetBlock2());
+ return nullptr;
+ }
+
+ TVector<TNodePtr> values;
+ values.push_back(new TAstAtomNodeImpl(Ctx.Pos(), "Apply", TNodeFlags::Default));
+ values.push_back(action);
+ values.push_back(new TAstAtomNodeImpl(Ctx.Pos(), "world", TNodeFlags::Default));
+
+ TSqlExpression sqlExpr(Ctx, Mode);
+ if (stmt.HasBlock4() && !ExprList(sqlExpr, values, stmt.GetBlock4().GetRule_expr_list1())) {
+ return nullptr;
+ }
+
+ TNodePtr apply = new TAstListNodeImpl(Ctx.Pos(), std::move(values));
+ if (!makeLambda) {
+ return BuildDoCall(Ctx.Pos(), apply);
+ }
+
+ TNodePtr params = new TAstListNodeImpl(Ctx.Pos());
+ params->Add("world");
+ for (const auto& arg : args) {
+ params->Add(new TAstAtomNodeImpl(Ctx.Pos(), arg, TNodeFlags::ArbitraryContent));
+ }
+
+ return BuildDoCall(Ctx.Pos(), BuildLambda(Ctx.Pos(), params, apply));
+}
+
+bool TSqlSelect::JoinOp(ISource* join, const TRule_join_source::TBlock2& block) {
+ const auto& node = block.GetRule_join_op1();
+ switch (node.Alt_case()) {
+ case TRule_join_op::kAltJoinOp1:
+ Ctx.IncrementMonCounter("sql_join_operations", "CartesianProduct");
+ Error() << "Cartesian product of tables is forbidden";
+ return false;
+ case TRule_join_op::kAltJoinOp2: {
+ auto alt = node.GetAlt_join_op2();
+ if (alt.HasBlock1()) {
+ Ctx.IncrementMonCounter("sql_join_operations", "Natural");
+ Error() << "Natural join is not implemented yet";
+ return false;
+ }
+ TString joinOp("Inner");
+ switch (alt.GetBlock2().Alt_case()) {
+ case TRule_join_op::TAlt2::TBlock2::kAlt1:
+ if (alt.GetBlock2().GetAlt1().HasBlock1()) {
+ auto block = alt.GetBlock2().GetAlt1().GetBlock1();
+ switch (block.Alt_case()) {
+ case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::kAlt1:
+ // left
+ joinOp = Token(block.GetAlt1().GetToken1());
+ if (block.GetAlt1().HasBlock2()) {
+ joinOp += " " + Token(block.GetAlt1().GetBlock2().GetToken1());
+ }
+ break;
+ case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::kAlt2:
+ // right
+ joinOp = Token(block.GetAlt2().GetToken1());
+ if (block.GetAlt2().HasBlock2()) {
+ joinOp += " " + Token(block.GetAlt2().GetBlock2().GetToken1());
+ }
+
+ break;
+ case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::kAlt3:
+ // exclusion
+ joinOp = Token(block.GetAlt3().GetToken1());
+ break;
+ case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::kAlt4:
+ // full
+ joinOp = Token(block.GetAlt4().GetToken1());
+ break;
+ default:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownJoinOperation");
+ AltNotImplemented("join_op", node);
+ return false;
+ }
+ }
+ break;
+ case TRule_join_op::TAlt2::TBlock2::kAlt2:
+ joinOp = Token(alt.GetBlock2().GetAlt2().GetToken1());
+ break;
+ case TRule_join_op::TAlt2::TBlock2::kAlt3:
+ joinOp = Token(alt.GetBlock2().GetAlt3().GetToken1());
+ break;
+ default:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownJoinOperation");
+ AltNotImplemented("join_op", node);
+ return false;
+ }
+
+ joinOp = NormalizeJoinOp(joinOp);
+ Ctx.IncrementMonCounter("sql_features", "Join");
+ Ctx.IncrementMonCounter("sql_join_operations", joinOp);
+
+ TNodePtr joinKeyExpr;
+ if (block.HasBlock3()) {
+ if (joinOp == "Cross") {
+ Error() << "Cross join should not have ON or USING expression";
+ Ctx.IncrementMonCounter("sql_errors", "BadJoinExpr");
+ return false;
+ }
+
+ joinKeyExpr = JoinExpr(join, block.GetBlock3().GetRule_join_constraint1());
+ if (!joinKeyExpr) {
+ Ctx.IncrementMonCounter("sql_errors", "BadJoinExpr");
+ return false;
+ }
+ }
+ else {
+ if (joinOp != "Cross") {
+ Error() << "Expected ON or USING expression";
+ Ctx.IncrementMonCounter("sql_errors", "BadJoinExpr");
+ return false;
+ }
+ }
+
+ Y_DEBUG_ABORT_UNLESS(join->GetJoin());
+ join->GetJoin()->SetupJoin(joinOp, joinKeyExpr);
+ break;
+ }
+ default:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownJoinOperation2");
+ AltNotImplemented("join_op", node);
+ return false;
+ }
+ return true;
+}
+
+TNodePtr TSqlSelect::JoinExpr(ISource* join, const TRule_join_constraint& node) {
+ switch (node.Alt_case()) {
+ case TRule_join_constraint::kAltJoinConstraint1: {
+ auto& alt = node.GetAlt_join_constraint1();
+ Token(alt.GetToken1());
+ TSqlExpression expr(Ctx, Mode);
+ return expr.Build(alt.GetRule_expr2());
+ }
+ case TRule_join_constraint::kAltJoinConstraint2: {
+ auto& alt = node.GetAlt_join_constraint2();
+ Token(alt.GetToken1());
+ TPosition pos(Ctx.Pos());
+ TVector<TDeferredAtom> names;
+ if (!PureColumnOrNamedListStr(alt.GetRule_pure_column_or_named_list2(), *this, names)) {
+ return nullptr;
+ }
+
+ Y_DEBUG_ABORT_UNLESS(join->GetJoin());
+ return join->GetJoin()->BuildJoinKeys(Ctx, names);
+ }
+ default:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownJoinConstraint");
+ AltNotImplemented("join_constraint", node);
+ break;
+ }
+ return nullptr;
+}
+
+TVector<TNodePtr> TSqlSelect::OrdinaryNamedColumnList(const TRule_ordinary_named_column_list& node) {
+ TVector<TNodePtr> result;
+ switch (node.Alt_case()) {
+ case TRule_ordinary_named_column_list::kAltOrdinaryNamedColumnList1:
+ if (!NamedColumn(result, node.GetAlt_ordinary_named_column_list1().GetRule_named_column1())) {
+ return {};
+ }
+ break;
+ case TRule_ordinary_named_column_list::kAltOrdinaryNamedColumnList2:
+ if (!NamedColumnList(result, node.GetAlt_ordinary_named_column_list2().GetRule_named_column_list2())) {
+ return {};
+ }
+ break;
+ default:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownOrdinaryNamedColumn");
+ AltNotImplemented("ordinary_named_column_list", node);
+ }
+ return result;
+}
+
+TSourcePtr TSqlSelect::FlattenSource(const TRule_flatten_source& node) {
+ auto source = NamedSingleSource(node.GetRule_named_single_source1());
+ if (!source) {
+ return nullptr;
+ }
+ if (node.HasBlock2()) {
+ auto flatten = node.GetBlock2();
+ auto flatten2 = flatten.GetBlock2();
+ switch (flatten2.Alt_case()) {
+ case TRule_flatten_source::TBlock2::TBlock2::kAlt1: {
+ TString mode = "auto";
+ if (flatten2.GetAlt1().HasBlock1()) {
+ mode = to_lower(Token(flatten2.GetAlt1().GetBlock1().GetToken1()));
+ }
+
+ auto flattenColumns = OrdinaryNamedColumnList(flatten2.GetAlt1().GetRule_ordinary_named_column_list3());
+ if (flattenColumns.empty()) {
+ return nullptr;
+ }
+
+ Ctx.IncrementMonCounter("sql_features", "FlattenByColumns");
+ if (!source->AddExpressions(Ctx, flattenColumns, EExprSeat::FlattenBy)) {
+ return nullptr;
+ }
+
+ source->SetFlattenByMode(mode);
+ break;
+ }
+ case TRule_flatten_source::TBlock2::TBlock2::kAlt2: {
+ Ctx.IncrementMonCounter("sql_features", "FlattenColumns");
+ source->MarkFlattenColumns();
+ break;
+ }
+
+ default:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownOrdinaryNamedColumn");
+ AltNotImplemented("flatten_source", flatten2);
+ }
+ }
+ return source;
+}
+
+TSourcePtr TSqlSelect::JoinSource(const TRule_join_source& node) {
+ TSourcePtr source(FlattenSource(node.GetRule_flatten_source1()));
+ if (!source) {
+ return nullptr;
+ }
+ if (node.Block2Size()) {
+ TPosition pos(Ctx.Pos());
+ TVector<TSourcePtr> sources;
+ sources.emplace_back(std::move(source));
+ for (auto& block: node.GetBlock2()) {
+ sources.emplace_back(FlattenSource(block.GetRule_flatten_source2()));
+ if (!sources.back()) {
+ Ctx.IncrementMonCounter("sql_errors", "NoJoinWith");
+ return nullptr;
+ }
+ }
+ source = BuildEquiJoin(pos, std::move(sources));
+ for (auto& block: node.GetBlock2()) {
+ if (!JoinOp(source.Get(), block)) {
+ Ctx.IncrementMonCounter("sql_errors", "NoJoinOp");
+ return nullptr;
+ }
+ }
+ }
+ return source;
+}
+
+bool TSqlSelect::SelectTerm(TVector<TNodePtr>& terms, const TRule_result_column& node) {
+ // result_column:
+ // opt_id_prefix ASTERISK
+ // | expr (AS id_or_string)?
+ // ;
+ switch (node.Alt_case()) {
+ case TRule_result_column::kAltResultColumn1: {
+ auto alt = node.GetAlt_result_column1();
+
+ Token(alt.GetToken2());
+ auto idAsteriskQualify = OptIdPrefixAsStr(alt.GetRule_opt_id_prefix1(), *this);
+ Ctx.IncrementMonCounter("sql_features", idAsteriskQualify ? "QualifyAsterisk" : "Asterisk");
+ terms.push_back(BuildColumn(Ctx.Pos(), "*", idAsteriskQualify));
+ break;
+ }
+ case TRule_result_column::kAltResultColumn2: {
+ auto alt = node.GetAlt_result_column2();
+ TSqlExpression expr(Ctx, Mode);
+ TNodePtr term(expr.Build(alt.GetRule_expr1()));
+ if (!term) {
+ Ctx.IncrementMonCounter("sql_errors", "NoTerm");
+ return false;
+ }
+ if (alt.HasBlock2()) {
+ term->SetLabel(IdOrString(alt.GetBlock2().GetRule_id_or_string2(), *this));
+ }
+ terms.push_back(term);
+ break;
+ }
+ default:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownResultColumn");
+ AltNotImplemented("result_column", node);
+ return false;
+ }
+ return true;
+}
+
+bool TSqlSelect::ValidateSelectColumns(const TVector<TNodePtr>& terms) {
+ TSet<TString> labels;
+ TSet<TString> asteriskSources;
+ for (const auto& term: terms) {
+ const auto& label = term->GetLabel();
+ if (!Ctx.PragmaAllowDotInAlias && label.find('.') != TString::npos) {
+ Ctx.Error(term->GetPos()) << "Unable to use '.' in column name. Invalid column name: " << label;
+ return false;
+ }
+ if (!label.empty()) {
+ if (!labels.insert(label).second) {
+ Ctx.Error(term->GetPos()) << "Unable to use duplicate column names. Collision in name: " << label;
+ return false;
+ }
+ }
+ if (term->IsAsterisk()) {
+ const auto& source = *term->GetSourceName();
+ if (source.empty() && terms.ysize() > 1) {
+ Ctx.Error(term->GetPos()) << "Unable to use general '*' with other columns, either specify concrete table like '<table>.*', either specify concrete columns.";
+ return false;
+ } else if (!asteriskSources.insert(source).second) {
+ Ctx.Error(term->GetPos()) << "Unable to use twice same quialified asterisk. Invalid source: " << source;
+ return false;
+ }
+ } else if (label.empty()) {
+ const auto* column = term->GetColumnName();
+ if (column && !column->empty()) {
+ const auto& source = *term->GetSourceName();
+ const auto usedName = source.empty() ? *column : source + '.' + *column;
+ if (!labels.insert(usedName).second) {
+ Ctx.Error(term->GetPos()) << "Unable to use duplicate column names. Collision in name: " << usedName;
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+}
+
+TSourcePtr TSqlSelect::SingleSource(const TRule_single_source& node) {
+ switch (node.Alt_case()) {
+ case TRule_single_source::kAltSingleSource1: {
+ const auto& alt = node.GetAlt_single_source1();
+ const auto& table_ref = alt.GetRule_table_ref1();
+
+ if (auto maybeSource = AsTableImpl(table_ref)) {
+ auto source = *maybeSource;
+ if (!source) {
+ return nullptr;
+ }
+
+ if (!source->Init(Ctx, source.Get())) {
+ return nullptr;
+ }
+
+ return source;
+ } else {
+ TTableRef table(TableRefImpl(alt.GetRule_table_ref1()));
+ TPosition pos(Ctx.Pos());
+ Ctx.IncrementMonCounter("sql_select_clusters", table.Cluster);
+ if (!table.Check(Ctx)) {
+ return nullptr;
+ }
+ const auto serviceName = to_lower(table.ServiceName(Ctx));
+ const bool stream = serviceName == RtmrProviderName;
+
+ return BuildTableSource(pos, table, stream);
+ }
+ }
+ case TRule_single_source::kAltSingleSource2: {
+ const auto& alt = node.GetAlt_single_source2();
+ Token(alt.GetToken1());
+ TSqlSelect innerSelect(Ctx, Mode);
+ TPosition pos;
+ auto source = innerSelect.Build(alt.GetRule_select_stmt2(), pos);
+ if (!source) {
+ return nullptr;
+ }
+ return BuildInnerSource(pos, BuildSourceNode(pos, std::move(source)));
+ }
+ case TRule_single_source::kAltSingleSource3: {
+ const auto& alt = node.GetAlt_single_source3();
+ Ctx.IncrementMonCounter("sql_features", "NamedNodeUseSource");
+ auto named = NamedNodeImpl(alt.GetRule_bind_parameter2(), *this);
+ auto at = alt.HasBlock1();
+ if (at) {
+ if (alt.HasBlock3()) {
+ Ctx.Error() << "Subquery must not be used as anonymous table name";
+ return nullptr;
+ }
+
+ auto namedNode = GetNamedNode(named);
+ if (!namedNode) {
+ return nullptr;
+ }
+
+ auto source = TryMakeSourceFromExpression(Ctx, namedNode, "@");
+ if (!source) {
+ Ctx.Error() << "Cannot infer cluster and table name";
+ return nullptr;
+ }
+
+ return source;
+ }
+ auto node = GetNamedNode(named);
+ if (!node) {
+ Ctx.IncrementMonCounter("sql_errors", "NamedNodeSourceError");
+ return nullptr;
+ }
+ if (alt.HasBlock3()) {
+ TVector<TNodePtr> values;
+ values.push_back(new TAstAtomNodeImpl(Ctx.Pos(), "Apply", TNodeFlags::Default));
+ values.push_back(node);
+ values.push_back(new TAstAtomNodeImpl(Ctx.Pos(), "world", TNodeFlags::Default));
+
+ TSqlExpression sqlExpr(Ctx, Mode);
+ if (alt.GetBlock3().HasBlock2() && !ExprList(sqlExpr, values, alt.GetBlock3().GetBlock2().GetRule_expr_list1())) {
+ return nullptr;
+ }
+
+ TNodePtr apply = new TAstListNodeImpl(Ctx.Pos(), std::move(values));
+ return BuildNodeSource(Ctx.Pos(), apply);
+ }
+
+ return BuildInnerSource(Ctx.Pos(), node);
+ }
+ default:
+ AltNotImplemented("single_source", node);
+ Ctx.IncrementMonCounter("sql_errors", "UnknownSingleSource");
+ return nullptr;
+ }
+}
+
+TSourcePtr TSqlSelect::NamedSingleSource(const TRule_named_single_source& node) {
+ auto singleSource = SingleSource(node.GetRule_single_source1());
+ if (!singleSource) {
+ return nullptr;
+ }
+ if (node.HasBlock2()) {
+ const auto label = IdOrString(node.GetBlock2().GetRule_id_or_string2(), *this);
+ singleSource->SetLabel(label);
+ }
+ if (node.HasBlock3()) {
+ ESampleMode mode = ESampleMode::Auto;
+ TSqlExpression expr(Ctx, Mode);
+ TNodePtr samplingRateNode;
+ TNodePtr samplingSeedNode;
+ const auto& sampleBlock = node.GetBlock3();
+ TPosition pos;
+ switch (sampleBlock.Alt_case()) {
+ case TRule_named_single_source::TBlock3::kAlt1:
+ {
+ const auto& sampleExpr = sampleBlock.GetAlt1().GetRule_sample_clause1().GetRule_expr2();
+ samplingRateNode = expr.Build(sampleExpr);
+ if (!samplingRateNode) {
+ return nullptr;
+ }
+ pos = GetPos(sampleBlock.GetAlt1().GetRule_sample_clause1().GetToken1());
+ Ctx.IncrementMonCounter("sql_features", "SampleClause");
+ }
+ break;
+ case TRule_named_single_source::TBlock3::kAlt2:
+ {
+ const auto& tableSampleClause = sampleBlock.GetAlt2().GetRule_tablesample_clause1();
+ const auto& modeToken = tableSampleClause.GetRule_sampling_mode2().GetToken1();
+ const TCiString& token = Token(modeToken);
+ if (token == "system") {
+ mode = ESampleMode::System;
+ } else if (token == "bernoulli") {
+ mode = ESampleMode::Bernoulli;
+ } else {
+ Ctx.Error(GetPos(modeToken)) << "Unsupported sampling mode: " << token;
+ Ctx.IncrementMonCounter("sql_errors", "UnsupportedSamplingMode");
+ return nullptr;
+ }
+ const auto& tableSampleExpr = tableSampleClause.GetRule_expr4();
+ samplingRateNode = expr.Build(tableSampleExpr);
+ if (!samplingRateNode) {
+ return nullptr;
+ }
+ if (tableSampleClause.HasBlock6()) {
+ const auto& repeatableExpr = tableSampleClause.GetBlock6().GetRule_repeatable_clause1().GetRule_expr3();
+ samplingSeedNode = expr.Build(repeatableExpr);
+ if (!samplingSeedNode) {
+ return nullptr;
+ }
+ }
+ pos = GetPos(sampleBlock.GetAlt2().GetRule_tablesample_clause1().GetToken1());
+ Ctx.IncrementMonCounter("sql_features", "SampleClause");
+ }
+ break;
+ default:
+ Y_ABORT("SampleClause: does not corresond to grammar changes");
+ }
+ if (!singleSource->SetSamplingOptions(Ctx, pos, mode, samplingRateNode, samplingSeedNode)) {
+ Ctx.IncrementMonCounter("sql_errors", "IncorrectSampleClause");
+ return nullptr;
+ }
+ }
+ return singleSource;
+}
+
+bool TSqlSelect::ColumnName(TVector<TNodePtr>& keys, const TRule_column_name& node) {
+ const auto sourceName = OptIdPrefixAsStr(node.GetRule_opt_id_prefix1(), *this);
+ const auto columnName = IdOrString(node.GetRule_id_or_string2(), *this);
+ YQL_ENSURE(!columnName.empty());
+ keys.push_back(BuildColumn(Ctx.Pos(), columnName, sourceName));
+ return true;
+}
+
+bool TSqlSelect::ColumnList(TVector<TNodePtr>& keys, const TRule_column_list& node) {
+ if (!ColumnName(keys, node.GetRule_column_name1())) {
+ return false;
+ }
+ for (auto b: node.GetBlock2()) {
+ Token(b.GetToken1());
+ if (!ColumnName(keys, b.GetRule_column_name2())) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool TSqlSelect::NamedColumn(TVector<TNodePtr>& columnList, const TRule_named_column& node) {
+ if (!ColumnName(columnList, node.GetRule_column_name1())) {
+ return false;
+ }
+ if (node.HasBlock2()) {
+ const auto label = IdOrString(node.GetBlock2().GetRule_id_or_string2(), *this);
+ columnList.back()->SetLabel(label);
+ }
+ return true;
+}
+
+bool TSqlSelect::NamedColumnList(TVector<TNodePtr>& columnList, const TRule_named_column_list& node) {
+ if (!NamedColumn(columnList, node.GetRule_named_column1())) {
+ return false;
+ }
+ for (auto b: node.GetBlock2()) {
+ if (!NamedColumn(columnList, b.GetRule_named_column2())) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool TSqlSelect::SortSpecification(const TRule_sort_specification& node, TVector<TSortSpecificationPtr>& sortSpecs) {
+ bool asc = true;
+ TSqlExpression expr(Ctx, Mode);
+ TNodePtr exprNode = expr.Build(node.GetRule_expr1());
+ if (!exprNode) {
+ return false;
+ }
+ if (node.HasBlock2()) {
+ const auto& token = node.GetBlock2().GetToken1();
+ Token(token);
+ switch (token.GetId()) {
+ case SQLLexerTokens::TOKEN_ASC:
+ Ctx.IncrementMonCounter("sql_features", "OrderByAsc");
+ break;
+ case SQLLexerTokens::TOKEN_DESC:
+ asc = false;
+ Ctx.IncrementMonCounter("sql_features", "OrderByDesc");
+ break;
+ default:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownOrderBy");
+ Error() << "Unsupported direction token: " << token.GetId();
+ return false;
+ }
+ } else {
+ Ctx.IncrementMonCounter("sql_features", "OrderByDefault");
+ }
+ auto sortSpecPtr = MakeIntrusive<TSortSpecification>();
+ sortSpecPtr->OrderExpr = exprNode;
+ sortSpecPtr->Ascending = asc;
+ sortSpecs.emplace_back(sortSpecPtr);
+ return true;
+}
+
+bool TSqlSelect::SortSpecificationList(const TRule_sort_specification_list& node, TVector<TSortSpecificationPtr>& sortSpecs) {
+ if (!SortSpecification(node.GetRule_sort_specification1(), sortSpecs)) {
+ return false;
+ }
+ for (auto sortSpec: node.GetBlock2()) {
+ Token(sortSpec.GetToken1());
+ if (!SortSpecification(sortSpec.GetRule_sort_specification2(), sortSpecs)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+TSourcePtr TSqlSelect::ProcessCore(const TRule_process_core& node, const TWriteSettings& settings, TPosition& selectPos) {
+ // PROCESS STREAM? named_single_source (COMMA named_single_source)* (USING call_expr (AS id_or_string)?
+ // (WHERE expr)? (HAVING expr)?)?
+
+ Token(node.GetToken1());
+ TPosition startPos(Ctx.Pos());
+
+ const bool stream = node.HasBlock2();
+ if (!selectPos) {
+ selectPos = startPos;
+ }
+
+ TSourcePtr source(NamedSingleSource(node.GetRule_named_single_source3()));
+ if (!source) {
+ return nullptr;
+ }
+ if (node.GetBlock4().size()) {
+ TVector<TSourcePtr> sources(1, source);
+ for (auto& s: node.GetBlock4()) {
+ sources.push_back(NamedSingleSource(s.GetRule_named_single_source2()));
+ if (!sources.back()) {
+ return nullptr;
+ }
+ }
+ auto pos = source->GetPos();
+ source = BuildMuxSource(pos, std::move(sources));
+ }
+
+ bool hasUsing = node.HasBlock5();
+ if (!hasUsing) {
+ return BuildProcess(startPos, std::move(source), nullptr, {}, true, stream, settings);
+ }
+
+ const auto& block5 = node.GetBlock5();
+ if (block5.HasBlock4()) {
+ TSqlExpression expr(Ctx, Mode);
+ TNodePtr where = expr.Build(block5.GetBlock4().GetRule_expr2());
+ if (!where || !source->AddFilter(Ctx, where)) {
+ return nullptr;
+ }
+ Ctx.IncrementMonCounter("sql_features", "ProcessWhere");
+ } else {
+ Ctx.IncrementMonCounter("sql_features", stream ? "ProcessStream" : "Process");
+ }
+
+ if (block5.HasBlock5()) {
+ Ctx.Error() << "PROCESS does not allow HAVING yet! You may request it on yql@ maillist.";
+ return nullptr;
+ }
+
+ /// \todo other solution
+ PushNamedNode(TArgPlaceholderNode::ProcessRows, BuildArgPlaceholder(Ctx.Pos(), TArgPlaceholderNode::ProcessRows));
+ PushNamedNode(TArgPlaceholderNode::ProcessRow, BuildArgPlaceholder(Ctx.Pos(), TArgPlaceholderNode::ProcessRow));
+
+ bool listCall = false;
+ TSqlCallExpr call(Ctx, Mode);
+ bool initRet = call.Init(block5.GetRule_call_expr2());
+ if (initRet) {
+ call.IncCounters();
+ }
+
+ PopNamedNode(TArgPlaceholderNode::ProcessRows);
+ PopNamedNode(TArgPlaceholderNode::ProcessRow);
+ if (!initRet) {
+ return nullptr;
+ }
+
+ auto args = call.GetArgs();
+
+ /// SIN: special processing of binds
+ for (auto& arg: args) {
+ auto placeholder = dynamic_cast<TArgPlaceholderNode*>(arg.Get());
+ if (placeholder) {
+ auto name = placeholder->GetName();
+ if (name == TArgPlaceholderNode::ProcessRows) {
+ if (listCall) {
+ Ctx.Error(arg->GetPos()) << "Only single instance of " << name << " is allowed.";
+ return nullptr;
+ }
+ listCall = true;
+ arg = new TAstAtomNodeImpl(arg->GetPos(), "inputRowsList", 0);
+ } else if (name == TArgPlaceholderNode::ProcessRow) {
+ arg = BuildColumn(arg->GetPos(), "*");
+ }
+ }
+ }
+
+ TSqlCallExpr finalCall(call, args);
+
+ TNodePtr with(finalCall.BuildUdf(true));
+ if (!with || !finalCall.EnsureNotDistinct("PROCESS")) {
+ return {};
+
+ }
+ args = finalCall.GetArgs();
+
+ if (block5.HasBlock3()) {
+ with->SetLabel(IdOrString(block5.GetBlock3().GetRule_id_or_string2(), *this));
+ }
+
+ return BuildProcess(startPos, std::move(source), with, std::move(args), listCall, stream, settings);
+}
+
+TSourcePtr TSqlSelect::ReduceCore(const TRule_reduce_core& node, const TWriteSettings& settings, TPosition& selectPos) {
+ // REDUCE named_single_source (COMMA named_single_source)* (PRESORT sort_specification_list)?
+ // ON column_list USING ALL? call_expr (AS id_or_string)?
+ // (WHERE expr)? (HAVING expr)?
+ Token(node.GetToken1());
+ TPosition startPos(Ctx.Pos());
+ if (!selectPos) {
+ selectPos = startPos;
+ }
+
+ TSourcePtr source(NamedSingleSource(node.GetRule_named_single_source2()));
+ if (!source) {
+ return {};
+ }
+ if (node.GetBlock3().size()) {
+ TVector<TSourcePtr> sources(1, source);
+ for (auto& s: node.GetBlock3()) {
+ sources.push_back(NamedSingleSource(s.GetRule_named_single_source2()));
+ if (!sources.back()) {
+ return nullptr;
+ }
+ }
+ auto pos = source->GetPos();
+ source = BuildMuxSource(pos, std::move(sources));
+ }
+
+ TVector<TSortSpecificationPtr> orderBy;
+ if (node.HasBlock4()) {
+ if (!SortSpecificationList(node.GetBlock4().GetRule_sort_specification_list2(), orderBy)) {
+ return {};
+ }
+ }
+
+ TVector<TNodePtr> keys;
+ if (!ColumnList(keys, node.GetRule_column_list6())) {
+ return nullptr;
+ }
+
+ if (node.HasBlock11()) {
+ TSqlExpression expr(Ctx, Mode);
+ TNodePtr where = expr.Build(node.GetBlock11().GetRule_expr2());
+ if (!where || !source->AddFilter(Ctx, where)) {
+ return nullptr;
+ }
+ Ctx.IncrementMonCounter("sql_features", "ReduceWhere");
+ } else {
+ Ctx.IncrementMonCounter("sql_features", "Reduce");
+ }
+
+ TNodePtr having;
+ if (node.HasBlock12()) {
+ TSqlExpression expr(Ctx, Mode);
+ having = expr.Build(node.GetBlock12().GetRule_expr2());
+ if (!having) {
+ return nullptr;
+ }
+ }
+
+ PushNamedNode(TArgPlaceholderNode::ProcessRow, BuildColumn(Ctx.Pos(), "*"));
+
+ TSqlCallExpr call(Ctx, Mode);
+ bool initRet = call.Init(node.GetRule_call_expr9());
+ if (initRet) {
+ call.IncCounters();
+ }
+
+ PopNamedNode(TArgPlaceholderNode::ProcessRow);
+ if (!initRet) {
+ return nullptr;
+ }
+
+ auto args = call.GetArgs();
+
+ TSqlCallExpr finalCall(call, args);
+
+ TNodePtr udf(finalCall.BuildUdf(false));
+ if (!udf || !finalCall.EnsureNotDistinct("REDUCE")) {
+ return {};
+ }
+
+ if (node.HasBlock10()) {
+ udf->SetLabel(IdOrString(node.GetBlock10().GetRule_id_or_string2(), *this));
+ }
+
+ const auto reduceMode = node.HasBlock8() ? ReduceMode::ByAll : ReduceMode::ByPartition;
+ return BuildReduce(startPos, reduceMode, std::move(source), std::move(orderBy), std::move(keys), std::move(args), udf, having, settings);
+}
+
+TSourcePtr TSqlSelect::SelectCore(const TRule_select_core& node, const TWriteSettings& settings, TPosition& selectPos) {
+ // (FROM join_source)? SELECT STREAM? opt_set_quantifier result_column (COMMA result_column)* (WITHOUT column_list)? (FROM join_source)? (WHERE expr)?
+ // group_by_clause? (HAVING expr)? window_clause? order_by_clause?
+ if (node.HasBlock1()) {
+ Token(node.GetBlock1().GetToken1());
+ } else {
+ Token(node.GetToken2());
+ }
+
+ TPosition startPos(Ctx.Pos());
+ if (!selectPos) {
+ selectPos = Ctx.Pos();
+ }
+
+ const bool stream = node.HasBlock3();
+ const bool distinct = IsDistinctOptSet(node.GetRule_opt_set_quantifier4());
+ if (distinct) {
+ Ctx.IncrementMonCounter("sql_features", "DistinctInSelect");
+ }
+
+ TSourcePtr source(BuildFakeSource(selectPos));
+ if (node.HasBlock1() && node.HasBlock8()) {
+ Token(node.GetBlock8().GetToken1());
+ Ctx.IncrementMonCounter("sql_errors", "DoubleFrom");
+ Ctx.Error() << "Only one FROM clause is allowed";
+ return nullptr;
+ }
+ if (node.HasBlock1()) {
+ source = JoinSource(node.GetBlock1().GetRule_join_source2());
+ Ctx.IncrementMonCounter("sql_features", "FromInFront");
+ } else if (node.HasBlock8()) {
+ source = JoinSource(node.GetBlock8().GetRule_join_source2());
+ }
+ if (!source) {
+ return nullptr;
+ }
+
+ TVector<TNodePtr> without;
+ if (node.HasBlock7()) {
+ if (!ColumnList(without, node.GetBlock7().GetRule_column_list2())) {
+ return nullptr;
+ }
+ }
+ if (node.HasBlock9()) {
+ auto block = node.GetBlock9();
+ Token(block.GetToken1());
+ TPosition pos(Ctx.Pos());
+ TSqlExpression expr(Ctx, Mode);
+ TNodePtr where = expr.WrapExprShortcuts(expr.Build(block.GetRule_expr2()));
+ if (!where) {
+ Ctx.IncrementMonCounter("sql_errors", "WhereInvalid");
+ return nullptr;
+ }
+ if (!source->AddFilter(Ctx, where)) {
+ Ctx.IncrementMonCounter("sql_errors", "WhereNotSupportedBySource");
+ return nullptr;
+ }
+ Ctx.IncrementMonCounter("sql_features", "Where");
+ }
+
+ /// \todo merge gtoupByExpr and groupBy in one
+ TVector<TNodePtr> groupByExpr, groupBy;
+ THoppingWindowSpecPtr hoppingWindowSpec;
+ if (node.HasBlock10()) {
+ TGroupByClause clause(Ctx, Mode);
+ if (!clause.Build(node.GetBlock10().GetRule_group_by_clause1(), stream)) {
+ return nullptr;
+ }
+ for (const auto& exprAlias: clause.Aliases()) {
+ YQL_ENSURE(exprAlias.first == exprAlias.second->GetLabel());
+ groupByExpr.emplace_back(exprAlias.second);
+ }
+ groupBy = std::move(clause.Content());
+ clause.SetFeatures("sql_features");
+ hoppingWindowSpec = clause.GetHoppingWindow();
+ }
+
+ TNodePtr having;
+ if (node.HasBlock11()) {
+ TSqlExpression expr(Ctx, Mode);
+ having = expr.Build(node.GetBlock11().GetRule_expr2());
+ if (!having) {
+ return nullptr;
+ }
+ Ctx.IncrementMonCounter("sql_features", "Having");
+ }
+
+ TWinSpecs windowSpec;
+ if (node.HasBlock12()) {
+ if (stream) {
+ Ctx.Error() << "WINDOW is not allowed in streaming queries";
+ return nullptr;
+ }
+ if (!WindowClause(node.GetBlock12().GetRule_window_clause1(), windowSpec)) {
+ return nullptr;
+ }
+ Ctx.IncrementMonCounter("sql_features", "WindowClause");
+ }
+
+ TVector<TSortSpecificationPtr> orderBy;
+ if (node.HasBlock13()) {
+ if (stream) {
+ Ctx.Error() << "ORDER BY is not allowed in streaming queries";
+ return nullptr;
+ }
+ if (!OrderByClause(node.GetBlock13().GetRule_order_by_clause1(), orderBy)) {
+ return nullptr;
+ }
+ Ctx.IncrementMonCounter("sql_features", IsColumnsOnly(orderBy) ? "OrderBy" : "OrderByExpr");
+ }
+ TVector<TNodePtr> terms;
+ if (!SelectTerm(terms, node.GetRule_result_column5())) {
+ return nullptr;
+ }
+ for (auto block: node.GetBlock6()) {
+ if (!SelectTerm(terms, block.GetRule_result_column2())) {
+ return nullptr;
+ }
+ }
+ if (!ValidateSelectColumns(terms)) {
+ return nullptr;
+ }
+ return BuildSelectCore(Ctx, startPos, std::move(source), groupByExpr, groupBy, orderBy, having,
+ std::move(windowSpec), hoppingWindowSpec, std::move(terms), distinct, std::move(without), stream, settings);
+}
+
+bool TSqlSelect::FrameStart(const TRule_window_frame_start& rule, TNodePtr& node, bool beginBound) {
+ switch (rule.Alt_case()) {
+ case TRule_window_frame_start::kAltWindowFrameStart1:
+ if (!beginBound) {
+ Ctx.Error() << "Unable to use UNBOUNDED PRECEDING after BETWEEN ... AND";
+ return false;
+ }
+ node = BuildLiteralVoid(Ctx.Pos());
+ return true;
+ case TRule_window_frame_start::kAltWindowFrameStart2:
+ if (beginBound) {
+ Ctx.Error() << "Unable to use FOLLOWING before AND in BETWEEN ... AND syntax";
+ return false;
+ }
+ {
+ auto precedingRule = rule.GetAlt_window_frame_start2().GetRule_window_frame_preceding1();
+ node = Literal(Ctx, precedingRule.GetRule_unsigned_number1());
+ return true;
+ }
+ case TRule_window_frame_start::kAltWindowFrameStart3:
+ return new TLiteralNumberNode<i32>(Ctx.Pos(), "Int32", ToString("0"));
+ return true;
+ default:
+ Y_ABORT("FrameClause: frame start not corresond to grammar changes");
+ }
+}
+
+bool TSqlSelect::FrameBound(const TRule_window_frame_bound& rule, TNodePtr& node, bool beginBound) {
+ switch (rule.Alt_case()) {
+ case TRule_window_frame_bound::kAltWindowFrameBound1:
+ return FrameStart(rule.GetAlt_window_frame_bound1().GetRule_window_frame_start1(), node, beginBound);
+ case TRule_window_frame_bound::kAltWindowFrameBound2:
+ if (beginBound) {
+ Ctx.Error() << "Unable to use UNBOUNDED FOLLOWING before AND";
+ return false;
+ }
+ node = BuildLiteralVoid(Ctx.Pos());
+ return true;
+ case TRule_window_frame_bound::kAltWindowFrameBound3:
+ if (beginBound) {
+ Ctx.Error() << "Unable to use FOLLOWING before AND";
+ return false;
+ }
+ {
+ auto followRule = rule.GetAlt_window_frame_bound3().GetRule_window_frame_following1();
+ node = Literal(Ctx, followRule.GetRule_unsigned_number1());
+ return true;
+ }
+ default:
+ Y_ABORT("FrameClause: frame bound not corresond to grammar changes");
+ }
+}
+
+bool TSqlSelect::FrameClause(const TRule_window_frame_clause& rule, TMaybe<TFrameSpecification>& frameSpecLink) {
+ TFrameSpecification frameSpec;
+ const TString frameUnitStr = to_lower(Token(rule.GetRule_window_frame_units1().GetToken1()));
+ if (frameUnitStr == "rows") {
+ frameSpec.FrameType = EFrameType::FrameByRows;
+ } else if (frameUnitStr == "range") {
+ frameSpec.FrameType = EFrameType::FrameByRange;
+ } else {
+ Ctx.Error() << "Unknown frame type in window specification: " << frameUnitStr;
+ return false;
+ }
+ auto frameExtent = rule.GetRule_window_frame_extent2();
+ switch (frameExtent.Alt_case()) {
+ case TRule_window_frame_extent::kAltWindowFrameExtent1:
+ if (!FrameStart(frameExtent.GetAlt_window_frame_extent1().GetRule_window_frame_start1(), frameSpec.FrameBegin, true)) {
+ return false;
+ }
+ break;
+ case TRule_window_frame_extent::kAltWindowFrameExtent2: {
+ auto between = frameExtent.GetAlt_window_frame_extent2().GetRule_window_frame_between1();
+ if (!FrameBound(between.GetRule_window_frame_bound2(), frameSpec.FrameBegin, true)) {
+ return false;
+ }
+ if (!FrameBound(between.GetRule_window_frame_bound4(), frameSpec.FrameEnd, false)) {
+ return false;
+ }
+ break;
+ }
+ default:
+ Y_ABORT("FrameClause: frame extent not corresond to grammar changes");
+ }
+ if (rule.HasBlock3()) {
+ switch (rule.GetBlock3().GetRule_window_frame_exclusion1().Alt_case()) {
+ case TRule_window_frame_exclusion::kAltWindowFrameExclusion1:
+ frameSpec.FrameExclusion = FrameExclCurRow;
+ break;
+ case TRule_window_frame_exclusion::kAltWindowFrameExclusion2:
+ frameSpec.FrameExclusion = FrameExclGroup;
+ break;
+ case TRule_window_frame_exclusion::kAltWindowFrameExclusion3:
+ frameSpec.FrameExclusion = FrameExclTies;
+ break;
+ case TRule_window_frame_exclusion::kAltWindowFrameExclusion4:
+ frameSpec.FrameExclusion = FrameExclNoOthers;
+ break;
+ default:
+ Y_ABORT("FrameClause: frame exclusion not corresond to grammar changes");
+ }
+ }
+ frameSpecLink = frameSpec;
+ return true;
+}
+
+TWindowSpecificationPtr TSqlSelect::WindowSpecification(const TRule_window_specification_details& rule) {
+ TWindowSpecificationPtr winSpecPtr = new TWindowSpecification;
+ if (rule.HasBlock1()) {
+ Ctx.Error() << "Existing window name is not supported in window specification yet!";
+ return {};
+ }
+ if (rule.HasBlock2()) {
+ if (!NamedExprList(rule.GetBlock2().GetRule_window_partition_clause1().GetRule_named_expr_list3(), winSpecPtr->Partitions)) {
+ return {};
+ }
+ }
+ if (rule.HasBlock3()) {
+ if (!OrderByClause(rule.GetBlock3().GetRule_window_order_clause1().GetRule_order_by_clause1(), winSpecPtr->OrderBy)) {
+ return {};
+ }
+ }
+ if (rule.HasBlock4()) {
+ if (!FrameClause(rule.GetBlock4().GetRule_window_frame_clause1(), winSpecPtr->Frame)) {
+ return {};
+ }
+ }
+ return winSpecPtr;
+}
+
+bool TSqlSelect::WindowDefenition(const TRule_window_definition& rule, TWinSpecs& winSpecs) {
+ const TString windowName = Id(rule.GetRule_new_window_name1().GetRule_window_name1().GetRule_id1(), *this);
+ if (winSpecs.contains(windowName)) {
+ Ctx.Error() << "Unable to declare window with same name: " << windowName;
+ return false;
+ }
+ auto windowSpec = WindowSpecification(rule.GetRule_window_specification3().GetRule_window_specification_details2());
+ if (!windowSpec) {
+ return false;
+ }
+ winSpecs.emplace(windowName, std::move(windowSpec));
+ return true;
+}
+
+bool TSqlSelect::WindowClause(const TRule_window_clause& rule, TWinSpecs& winSpecs) {
+ auto windowList = rule.GetRule_window_definition_list2();
+ if (!WindowDefenition(windowList.GetRule_window_definition1(), winSpecs)) {
+ return false;
+ }
+ for (auto& block: windowList.GetBlock2()) {
+ if (!WindowDefenition(block.GetRule_window_definition2(), winSpecs)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool TSqlSelect::OrderByClause(const TRule_order_by_clause& node, TVector<TSortSpecificationPtr>& orderBy) {
+ return SortSpecificationList(node.GetRule_sort_specification_list3(), orderBy);
+}
+
+bool TGroupByClause::Build(const TRule_group_by_clause& node, bool stream) {
+ const bool distinct = IsDistinctOptSet(node.GetRule_opt_set_quantifier3());
+ if (distinct) {
+ Ctx.Error() << "DISTINCT is not supported in GROUP BY clause yet!";
+ Ctx.IncrementMonCounter("sql_errors", "DistinctInGroupByNotSupported");
+ return false;
+ }
+ if (!ParseList(node.GetRule_grouping_element_list4())) {
+ return false;
+ }
+ ResolveGroupByAndGrouping();
+ if (stream && !HoppingWindowSpec) {
+ Ctx.Error() << "Streaming group by query must have a hopping window specification.";
+ return false;
+ }
+ if (!stream && HoppingWindowSpec) {
+ Ctx.Error() << "Hopping window specification is not supported in a non-streaming query.";
+ return false;
+ }
+ return true;
+}
+
+bool TGroupByClause::ParseList(const TRule_grouping_element_list& groupingListNode) {
+ if (!GroupingElement(groupingListNode.GetRule_grouping_element1())) {
+ return false;
+ }
+ for (auto b: groupingListNode.GetBlock2()) {
+ if (!GroupingElement(b.GetRule_grouping_element2())) {
+ return false;
+ }
+ }
+ return true;
+}
+
+void TGroupByClause::SetFeatures(const TString& field) const {
+ Ctx.IncrementMonCounter(field, "GroupBy");
+ const auto& features = Features();
+ if (features.Test(EGroupByFeatures::Ordinary)) {
+ Ctx.IncrementMonCounter(field, "GroupByOrdinary");
+ }
+ if (features.Test(EGroupByFeatures::Expression)) {
+ Ctx.IncrementMonCounter(field, "GroupByExpression");
+ }
+ if (features.Test(EGroupByFeatures::Rollup)) {
+ Ctx.IncrementMonCounter(field, "GroupByRollup");
+ }
+ if (features.Test(EGroupByFeatures::Cube)) {
+ Ctx.IncrementMonCounter(field, "GroupByCube");
+ }
+ if (features.Test(EGroupByFeatures::GroupingSet)) {
+ Ctx.IncrementMonCounter(field, "GroupByGroupingSet");
+ }
+ if (features.Test(EGroupByFeatures::Empty)) {
+ Ctx.IncrementMonCounter(field, "GroupByEmpty");
+ }
+}
+
+TVector<TNodePtr>& TGroupByClause::Content() {
+ return GroupBySet;
+}
+
+TMap<TString, TNodePtr>& TGroupByClause::Aliases() {
+ return GroupSetContext->NodeAliases;
+}
+
+THoppingWindowSpecPtr TGroupByClause::GetHoppingWindow() {
+ return HoppingWindowSpec;
+}
+
+TVector<TNodePtr> TGroupByClause::MultiplyGroupingSets(const TVector<TNodePtr>& lhs, const TVector<TNodePtr>& rhs) const {
+ TVector<TNodePtr> content;
+ for (const auto& leftNode: lhs) {
+ auto leftPtr = leftNode->ContentListPtr();
+ YQL_ENSURE(leftPtr, "Unable to multiply grouping sets");
+ for (const auto& rightNode: rhs) {
+ TVector<TNodePtr> mulItem(leftPtr->begin(), leftPtr->end());
+ auto rightPtr = rightNode->ContentListPtr();
+ YQL_ENSURE(rightPtr, "Unable to multiply grouping sets");
+ mulItem.insert(mulItem.end(), rightPtr->begin(), rightPtr->end());
+ content.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(mulItem)));
+ }
+ }
+ return content;
+}
+
+void TGroupByClause::ResolveGroupByAndGrouping() {
+ auto listPos = std::find_if(GroupBySet.begin(), GroupBySet.end(), [](const TNodePtr& node) {
+ return node->ContentListPtr();
+ });
+ if (listPos == GroupBySet.end()) {
+ return;
+ }
+ auto curContent = *(*listPos)->ContentListPtr();
+ if (listPos != GroupBySet.begin()) {
+ TVector<TNodePtr> emulate(GroupBySet.begin(), listPos);
+ TVector<TNodePtr> emulateContent(1, BuildListOfNamedNodes(Ctx.Pos(), std::move(emulate)));
+ curContent = MultiplyGroupingSets(emulateContent, curContent);
+ }
+ for (++listPos; listPos != GroupBySet.end(); ++listPos) {
+ auto newElem = (*listPos)->ContentListPtr();
+ if (newElem) {
+ curContent = MultiplyGroupingSets(curContent, *newElem);
+ } else {
+ TVector<TNodePtr> emulate(1, *listPos);
+ TVector<TNodePtr> emulateContent(1, BuildListOfNamedNodes(Ctx.Pos(), std::move(emulate)));
+ curContent = MultiplyGroupingSets(curContent, emulateContent);
+ }
+ }
+ TVector<TNodePtr> result(1, BuildListOfNamedNodes(Ctx.Pos(), std::move(curContent)));
+ std::swap(result, GroupBySet);
+}
+
+bool TGroupByClause::GroupingElement(const TRule_grouping_element& node) {
+ TSourcePtr res;
+ TVector<TNodePtr> emptyContent;
+ switch (node.Alt_case()) {
+ case TRule_grouping_element::kAltGroupingElement1:
+ if (!OrdinaryGroupingSet(node.GetAlt_grouping_element1().GetRule_ordinary_grouping_set1())) {
+ return false;
+ }
+ Features().Set(EGroupByFeatures::Ordinary);
+ break;
+ case TRule_grouping_element::kAltGroupingElement2: {
+ TGroupByClause subClause(Ctx, Mode, GroupSetContext);
+ if (!subClause.OrdinaryGroupingSetList(node.GetAlt_grouping_element2().GetRule_rollup_list1().GetRule_ordinary_grouping_set_list3())) {
+ return false;
+ }
+ auto& content = subClause.Content();
+ if (!IsNodeColumnsOrNamedExpression(content, "ROLLUP")) {
+ return false;
+ }
+ TVector<TNodePtr> collection;
+ for (auto limit = content.end(), begin = content.begin(); limit != begin; --limit) {
+ TVector<TNodePtr> grouping(begin, limit);
+ collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(grouping)));
+ }
+ collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(emptyContent)));
+ GroupBySet.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(collection)));
+ Ctx.IncrementMonCounter("sql_features", TStringBuilder() << "GroupByRollup" << content.size());
+ Features().Set(EGroupByFeatures::Rollup);
+ break;
+ }
+ case TRule_grouping_element::kAltGroupingElement3: {
+ TGroupByClause subClause(Ctx, Mode, GroupSetContext);
+ if (!subClause.OrdinaryGroupingSetList(node.GetAlt_grouping_element3().GetRule_cube_list1().GetRule_ordinary_grouping_set_list3())) {
+ return false;
+ }
+ auto& content = subClause.Content();
+ if (!IsNodeColumnsOrNamedExpression(content, "CUBE")) {
+ return false;
+ }
+ if (content.size() > Ctx.PragmaGroupByCubeLimit) {
+ Ctx.Error() << "GROUP BY CUBE is allowed only for " << Ctx.PragmaGroupByCubeLimit << " columns, but you use " << content.size();
+ return false;
+ }
+ TVector<TNodePtr> collection;
+ for (unsigned mask = (1 << content.size()) - 1; mask > 0; --mask) {
+ TVector<TNodePtr> grouping;
+ for (unsigned index = 0; index < content.size(); ++index) {
+ if (mask & (1 << index)) {
+ grouping.push_back(content[content.size() - index - 1]);
+ }
+ }
+ collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(grouping)));
+ }
+ collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(emptyContent)));
+ GroupBySet.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(collection)));
+ Ctx.IncrementMonCounter("sql_features", TStringBuilder() << "GroupByCube" << content.size());
+ Features().Set(EGroupByFeatures::Cube);
+ break;
+ }
+ case TRule_grouping_element::kAltGroupingElement4: {
+ auto listNode = node.GetAlt_grouping_element4().GetRule_grouping_sets_specification1().GetRule_grouping_element_list4();
+ TGroupByClause subClause(Ctx, Mode, GroupSetContext);
+ if (!subClause.ParseList(listNode)) {
+ return false;
+ }
+ auto& content = subClause.Content();
+ if (!IsNodeColumnsOrNamedExpression(content, "GROUPING SETS")) {
+ return false;
+ }
+ TVector<TNodePtr> collection;
+ bool hasEmpty = false;
+ for (auto& elem: content) {
+ auto elemContent = elem->ContentListPtr();
+ if (elemContent) {
+ if (!elemContent->empty() && elemContent->front()->ContentListPtr()) {
+ for (auto& sub: *elemContent) {
+ FeedCollection(sub, collection, hasEmpty);
+ }
+ } else {
+ FeedCollection(elem, collection, hasEmpty);
+ }
+ } else {
+ TVector<TNodePtr> elemList(1, std::move(elem));
+ collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(elemList)));
+ }
+ }
+ GroupBySet.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(collection)));
+ Features().Set(EGroupByFeatures::GroupingSet);
+ break;
+ }
+ case TRule_grouping_element::kAltGroupingElement5: {
+ if (!HoppingWindow(node.GetAlt_grouping_element5().GetRule_hopping_window_specification1())) {
+ return false;
+ }
+ break;
+ }
+ default:
+ Y_ABORT("You should change implementation according grammar changes");
+ }
+ return true;
+}
+
+void TGroupByClause::FeedCollection(const TNodePtr& elem, TVector<TNodePtr>& collection, bool& hasEmpty) const {
+ auto elemContentPtr = elem->ContentListPtr();
+ if (elemContentPtr && elemContentPtr->empty()) {
+ if (hasEmpty) {
+ return;
+ }
+ hasEmpty = true;
+ }
+ collection.push_back(elem);
+}
+
+bool TGroupByClause::OrdinaryGroupingSet(const TRule_ordinary_grouping_set& node) {
+ auto namedExprNode = NamedExpr(node.GetRule_named_expr1(), EExpr::GroupBy);
+ if (!namedExprNode) {
+ return false;
+ }
+ auto nodeLabel = namedExprNode->GetLabel();
+ auto contentPtr = namedExprNode->ContentListPtr();
+ if (contentPtr) {
+ if (nodeLabel && (contentPtr->size() != 1 || contentPtr->front()->GetLabel())) {
+ Ctx.Error() << "Unable to use aliases for list of named expressions";
+ Ctx.IncrementMonCounter("sql_errors", "GroupByAliasForListOfExpressions");
+ return false;
+ }
+ for (auto& content: *contentPtr) {
+ auto label = content->GetLabel();
+ if (!label) {
+ if (content->GetColumnName()) {
+ namedExprNode->AssumeColumn();
+ continue;
+ }
+
+ content->SetLabel(label = GenerateGroupByExprName());
+ }
+ if (!AddAlias(label, content)) {
+ return false;
+ }
+ content = BuildColumn(content->GetPos(), label);
+ }
+ } else {
+ if (!nodeLabel && namedExprNode->GetColumnName()) {
+ namedExprNode->AssumeColumn();
+ }
+
+ if (!nodeLabel && !namedExprNode->GetColumnName()) {
+ namedExprNode->SetLabel(nodeLabel = GenerateGroupByExprName());
+ }
+ if (nodeLabel) {
+ if (!AddAlias(nodeLabel, namedExprNode)) {
+ return false;
+ }
+ namedExprNode = BuildColumn(namedExprNode->GetPos(), nodeLabel);
+ }
+ }
+ GroupBySet.emplace_back(std::move(namedExprNode));
+ return true;
+}
+
+bool TGroupByClause::OrdinaryGroupingSetList(const TRule_ordinary_grouping_set_list& node) {
+ if (!OrdinaryGroupingSet(node.GetRule_ordinary_grouping_set1())) {
+ return false;
+ }
+ for (auto& block: node.GetBlock2()) {
+ if (!OrdinaryGroupingSet(block.GetRule_ordinary_grouping_set2())) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool TGroupByClause::HoppingWindow(const TRule_hopping_window_specification& node) {
+ if (HoppingWindowSpec) {
+ Ctx.Error() << "Duplicate hopping window specification.";
+ return false;
+ }
+ HoppingWindowSpec = new THoppingWindowSpec;
+ {
+ TSqlExpression expr(Ctx, Mode);
+ HoppingWindowSpec->TimeExtractor = expr.Build(node.GetRule_expr3());
+ if (!HoppingWindowSpec->TimeExtractor) {
+ return false;
+ }
+ }
+ auto processIntervalParam = [&] (const TRule_expr& rule) -> TNodePtr {
+ TSqlExpression expr(Ctx, Mode);
+ auto node = expr.Build(rule);
+ if (!node) {
+ return nullptr;
+ }
+
+ auto literal = node->GetLiteral("String");
+ if (!literal) {
+ return new TAstListNodeImpl(Ctx.Pos(), {
+ new TAstAtomNodeImpl(Ctx.Pos(), "EvaluateExpr", TNodeFlags::Default),
+ node
+ });
+ }
+
+ const auto out = NKikimr::NMiniKQL::ValueFromString(NKikimr::NUdf::EDataSlot::Interval, *literal);
+ if (!out) {
+ Ctx.Error(node->GetPos()) << "Expected interval in ISO 8601 format";
+ return nullptr;
+ }
+
+ if ('T' == literal->back()) {
+ Ctx.Warning(node->GetPos(), TIssuesIds::YQL_DEPRECATED_INTERVAL_CONSTANT) << "Time prefix 'T' at end of interval contant";
+ }
+
+ return new TAstListNodeImpl(Ctx.Pos(), {
+ new TAstAtomNodeImpl(Ctx.Pos(), "Interval", TNodeFlags::Default),
+ new TAstListNodeImpl(Ctx.Pos(), {
+ new TAstAtomNodeImpl(Ctx.Pos(), "quote", TNodeFlags::Default),
+ new TAstAtomNodeImpl(Ctx.Pos(), ToString(out.Get<i64>()), TNodeFlags::Default)
+ })
+ });
+ };
+
+ HoppingWindowSpec->Hop = processIntervalParam(node.GetRule_expr5());
+ if (!HoppingWindowSpec->Hop) {
+ return false;
+ }
+ HoppingWindowSpec->Interval = processIntervalParam(node.GetRule_expr7());
+ if (!HoppingWindowSpec->Interval) {
+ return false;
+ }
+ HoppingWindowSpec->Delay = processIntervalParam(node.GetRule_expr9());
+ if (!HoppingWindowSpec->Delay) {
+ return false;
+ }
+
+ return true;
+}
+
+bool TGroupByClause::IsNodeColumnsOrNamedExpression(const TVector<TNodePtr>& content, const TString& construction) const {
+ for (const auto& node: content) {
+ if (IsAutogenerated(node->GetColumnName())) {
+ Ctx.Error() << "You should use in " << construction << " either expression with required alias either column name or used alias.";
+ Ctx.IncrementMonCounter("sql_errors", "GroupBySetNoAliasOrColumn");
+ return false;
+ }
+ }
+ return true;
+}
+
+TGroupByClause::TGroupingSetFeatures& TGroupByClause::Features() {
+ return GroupSetContext->GroupFeatures;
+}
+
+const TGroupByClause::TGroupingSetFeatures& TGroupByClause::Features() const {
+ return GroupSetContext->GroupFeatures;
+}
+
+bool TGroupByClause::AddAlias(const TString& label, const TNodePtr& node) {
+ if (Aliases().contains(label)) {
+ Ctx.Error() << "Duplicated aliases not allowed";
+ Ctx.IncrementMonCounter("sql_errors", "GroupByDuplicateAliases");
+ return false;
+ }
+ Aliases().emplace(label, node);
+ return true;
+}
+
+TString TGroupByClause::GenerateGroupByExprName() {
+ return TStringBuilder() << AutogenerateNamePrefix << GroupSetContext->UnnamedCount++;
+}
+
+bool TGroupByClause::IsAutogenerated(const TString* name) const {
+ return name && name->StartsWith(AutogenerateNamePrefix);
+}
+
+TSourcePtr TSqlSelect::SelectKind(const TRule_select_kind_partial& node, TPosition& selectPos) {
+ auto source = SelectKind(node.GetRule_select_kind1(), selectPos);
+ if (!source) {
+ return {};
+ }
+ TPosition startPos(Ctx.Pos());
+ /// LIMIT INTEGER block
+ TNodePtr skipTake;
+ if (node.HasBlock2()) {
+ auto block = node.GetBlock2();
+
+ Token(block.GetToken1());
+ TPosition pos(Ctx.Pos());
+
+ TSqlExpression takeExpr(Ctx, Mode);
+ auto take = takeExpr.Build(block.GetRule_expr2());
+ if (!take) {
+ return{};
+ }
+
+ TNodePtr skip;
+ if (block.HasBlock3()) {
+ TSqlExpression skipExpr(Ctx, Mode);
+ skip = skipExpr.Build(block.GetBlock3().GetRule_expr2());
+ if (!skip) {
+ return {};
+ }
+ if (Token(block.GetBlock3().GetToken1()) == ",") {
+ // LIMIT skip, take
+ skip.Swap(take);
+ Ctx.IncrementMonCounter("sql_features", "LimitSkipTake");
+ } else {
+ Ctx.IncrementMonCounter("sql_features", "LimitOffset");
+ }
+ }
+ skipTake = BuildSkipTake(pos, skip, take);
+ Ctx.IncrementMonCounter("sql_features", "Limit");
+ }
+ return BuildSelect(startPos, std::move(source), skipTake);
+}
+
+TSourcePtr TSqlSelect::SelectKind(const TRule_select_kind& node, TPosition& selectPos) {
+ const bool discard = node.HasBlock1();
+ const bool hasLabel = node.HasBlock3();
+ if ((discard || hasLabel) && (Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW || Mode == NSQLTranslation::ESqlMode::SUBQUERY)) {
+ Ctx.Error() << "DISCARD and INTO RESULT are not allowed in current mode";
+ return {};
+ }
+
+ if (discard && hasLabel) {
+ Ctx.Error() << "DISCARD and INTO RESULT cannot be used at the same time";
+ return {};
+ }
+
+ if (discard && !selectPos) {
+ selectPos = Ctx.TokenPosition(node.GetBlock1().GetToken1());
+ }
+
+ TWriteSettings settings;
+ settings.Discard = discard;
+ if (hasLabel) {
+ settings.Label = PureColumnOrNamed(node.GetBlock3().GetRule_pure_column_or_named3(), *this);
+ }
+
+ TSourcePtr res;
+ switch (node.GetBlock2().Alt_case()) {
+ case TRule_select_kind_TBlock2::kAlt1:
+ res = ProcessCore(node.GetBlock2().GetAlt1().GetRule_process_core1(), settings, selectPos);
+ break;
+ case TRule_select_kind_TBlock2::kAlt2:
+ res = ReduceCore(node.GetBlock2().GetAlt2().GetRule_reduce_core1(), settings, selectPos);
+ break;
+ case TRule_select_kind_TBlock2::kAlt3:
+ res = SelectCore(node.GetBlock2().GetAlt3().GetRule_select_core1(), settings, selectPos);
+ break;
+ default:
+ Y_ABORT("You should change implementation according grammar changes");
+ }
+
+ return res;
+}
+
+TSourcePtr TSqlSelect::SelectKind(const TRule_select_kind_parenthesis& node, TPosition& selectPos) {
+ if (node.Alt_case() == TRule_select_kind_parenthesis::kAltSelectKindParenthesis1) {
+ return SelectKind(node.GetAlt_select_kind_parenthesis1().GetRule_select_kind_partial1(), selectPos);
+ } else {
+ return SelectKind(node.GetAlt_select_kind_parenthesis2().GetRule_select_kind_partial2(), selectPos);
+ }
+}
+
+TSourcePtr TSqlSelect::Build(const TRule_select_stmt& node, TPosition& selectPos) {
+ auto res = SelectKind(node.GetRule_select_kind_parenthesis1(), selectPos);
+ if (!res) {
+ return nullptr;
+ }
+
+ TPosition unionPos = selectPos; // Position of first select
+ TVector<TSourcePtr> sources;
+ sources.emplace_back(std::move(res));
+ for (auto& b: node.GetBlock2()) {
+ auto next = SelectKind(b.GetRule_select_kind_parenthesis2(), selectPos);
+ if (!next) {
+ return nullptr;
+ }
+ switch (b.GetRule_select_op1().Alt_case()) {
+ case TRule_select_op::kAltSelectOp1: {
+ const bool isUnionAll = b.GetRule_select_op1().GetAlt_select_op1().HasBlock2();
+ if (!isUnionAll) {
+ Token(b.GetRule_select_op1().GetAlt_select_op1().GetToken1());
+ Ctx.Error() << "UNION without quantifier ALL is not supported yet. Did you mean UNION ALL?";
+ return nullptr;
+ } else {
+ sources.emplace_back(std::move(next));
+ }
+ break;
+ }
+ default:
+ Ctx.Error() << "INTERSECT and EXCEPT are not implemented yet";
+ return nullptr;
+ }
+ }
+
+ if (sources.size() == 1) {
+ return std::move(sources[0]);
+ }
+
+ res = BuildUnionAll(unionPos, std::move(sources));
+ return res;
+}
+
+class TSqlIntoValues: public TSqlTranslation {
+public:
+ TSqlIntoValues(TContext& ctx, NSQLTranslation::ESqlMode mode)
+ : TSqlTranslation(ctx, mode)
+ {
+ }
+
+ TSourcePtr Build(const TRule_into_values_source& node, const TString& operationName);
+
+private:
+ bool BuildValuesRow(const TRule_values_source_row& inRow, TVector<TNodePtr>& outRow);
+ TSourcePtr ValuesSource(const TRule_values_source& node, TVector<TString>& columnsHint,
+ const TString& operationName);
+};
+
+TSourcePtr TSqlIntoValues::Build(const TRule_into_values_source& node, const TString& operationName) {
+ switch (node.Alt_case()) {
+ case TRule_into_values_source::kAltIntoValuesSource1: {
+ auto alt = node.GetAlt_into_values_source1();
+ TVector<TString> columnsHint;
+ if (alt.HasBlock1()) {
+ PureColumnListStr(alt.GetBlock1().GetRule_pure_column_list1(), *this, columnsHint);
+ }
+ return ValuesSource(alt.GetRule_values_source2(), columnsHint, operationName);
+ }
+ default:
+ Ctx.IncrementMonCounter("sql_errors", "DefaultValuesOrOther");
+ AltNotImplemented("into_values_source", node);
+ return nullptr;
+ }
+}
+
+bool TSqlIntoValues::BuildValuesRow(const TRule_values_source_row& inRow, TVector<TNodePtr>& outRow){
+ TSqlExpression sqlExpr(Ctx, Mode);
+ return ExprList(sqlExpr, outRow, inRow.GetRule_expr_list2());
+}
+
+TSourcePtr TSqlIntoValues::ValuesSource(const TRule_values_source& node, TVector<TString>& columnsHint,
+ const TString& operationName)
+{
+ Ctx.IncrementMonCounter("sql_features", "ValuesSource");
+ TPosition pos(Ctx.Pos());
+ switch (node.Alt_case()) {
+ case TRule_values_source::kAltValuesSource1: {
+ TVector<TVector<TNodePtr>> rows {{}};
+ const auto& rowList = node.GetAlt_values_source1().GetRule_values_source_row_list2();
+
+ if (!BuildValuesRow(rowList.GetRule_values_source_row1(), rows.back())) {
+ return nullptr;
+ }
+
+ for (const auto& valuesSourceRow: rowList.GetBlock2()) {
+ rows.push_back({});
+ if (!BuildValuesRow(valuesSourceRow.GetRule_values_source_row2(), rows.back())) {
+ return nullptr;
+ }
+ }
+
+ return BuildWriteValues(pos, operationName, columnsHint, rows);
+ }
+ case TRule_values_source::kAltValuesSource2: {
+ TSqlSelect select(Ctx, Mode);
+ TPosition selectPos;
+ auto source = select.Build(node.GetAlt_values_source2().GetRule_select_stmt1(), selectPos);
+ if (!source) {
+ return nullptr;
+ }
+ return BuildWriteValues(pos, "UPDATE", columnsHint, std::move(source));
+ }
+ default:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownValuesSource");
+ AltNotImplemented("values_source", node);
+ return nullptr;
+ }
+}
+
+class TSqlIntoTable: public TSqlTranslation {
+public:
+ TSqlIntoTable(TContext& ctx, NSQLTranslation::ESqlMode mode)
+ : TSqlTranslation(ctx, mode)
+ {
+ }
+
+ TNodePtr Build(const TRule_into_table_stmt& node);
+
+private:
+ //bool BuildValuesRow(const TRule_values_source_row& inRow, TVector<TNodePtr>& outRow);
+ //TSourcePtr ValuesSource(const TRule_values_source& node, TVector<TString>& columnsHint);
+ //TSourcePtr IntoValuesSource(const TRule_into_values_source& node);
+
+ bool ValidateServiceName(const TRule_into_table_stmt& node, const TTableRef& table, ESQLWriteColumnMode mode,
+ const TPosition& pos);
+ TString SqlIntoModeStr;
+ TString SqlIntoUserModeStr;
+};
+
+TNodePtr TSqlIntoTable::Build(const TRule_into_table_stmt& node) {
+ static const TMap<TString, ESQLWriteColumnMode> str2Mode = {
+ {"InsertInto", ESQLWriteColumnMode::InsertInto},
+ {"InsertOrAbortInto", ESQLWriteColumnMode::InsertOrAbortInto},
+ {"InsertOrIgnoreInto", ESQLWriteColumnMode::InsertOrIgnoreInto},
+ {"InsertOrRevertInto", ESQLWriteColumnMode::InsertOrRevertInto},
+ {"UpsertInto", ESQLWriteColumnMode::UpsertInto},
+ {"ReplaceInto", ESQLWriteColumnMode::ReplaceInto},
+ {"InsertIntoWithTruncate", ESQLWriteColumnMode::InsertIntoWithTruncate}
+ };
+
+ auto& modeBlock = node.GetBlock1();
+
+ TVector<TToken> modeTokens;
+ switch (modeBlock.Alt_case()) {
+ case TRule_into_table_stmt_TBlock1::AltCase::kAlt1:
+ modeTokens = {modeBlock.GetAlt1().GetToken1()};
+ break;
+ case TRule_into_table_stmt_TBlock1::AltCase::kAlt2:
+ modeTokens = {
+ modeBlock.GetAlt2().GetToken1(),
+ modeBlock.GetAlt2().GetToken2(),
+ modeBlock.GetAlt2().GetToken3()
+ };
+ break;
+ case TRule_into_table_stmt_TBlock1::AltCase::kAlt3:
+ modeTokens = {
+ modeBlock.GetAlt3().GetToken1(),
+ modeBlock.GetAlt3().GetToken2(),
+ modeBlock.GetAlt3().GetToken3()
+ };
+ break;
+ case TRule_into_table_stmt_TBlock1::AltCase::kAlt4:
+ modeTokens = {
+ modeBlock.GetAlt4().GetToken1(),
+ modeBlock.GetAlt4().GetToken2(),
+ modeBlock.GetAlt4().GetToken3()
+ };
+ break;
+ case TRule_into_table_stmt_TBlock1::AltCase::kAlt5:
+ modeTokens = {modeBlock.GetAlt5().GetToken1()};
+ break;
+ case TRule_into_table_stmt_TBlock1::AltCase::kAlt6:
+ modeTokens = {modeBlock.GetAlt6().GetToken1()};
+ break;
+ default:
+ Y_ABORT("You should change implementation according grammar changes");
+ }
+
+ TVector<TString> modeStrings;
+ modeStrings.reserve(modeTokens.size());
+ TVector<TString> userModeStrings;
+ userModeStrings.reserve(modeTokens.size());
+
+ for (auto& token : modeTokens) {
+ auto tokenStr = Token(token);
+
+ auto modeStr = tokenStr;
+ modeStr.to_lower();
+ modeStr.to_upper(0, 1);
+ modeStrings.push_back(modeStr);
+
+ auto userModeStr = tokenStr;
+ userModeStr.to_upper();
+ userModeStrings.push_back(userModeStr);
+ }
+
+ modeStrings.push_back("Into");
+ userModeStrings.push_back("INTO");
+
+ SqlIntoModeStr = JoinRange("", modeStrings.begin(), modeStrings.end());
+ SqlIntoUserModeStr = JoinRange(" ", userModeStrings.begin(), userModeStrings.end());
+
+ auto intoTableRef = node.GetRule_into_simple_table_ref3();
+ auto tableRef = intoTableRef.GetRule_simple_table_ref1();
+
+ auto cluster = Ctx.CurrCluster;
+ std::pair<bool, TDeferredAtom> nameOrAt;
+ if (tableRef.HasBlock1()) {
+ switch (tableRef.GetBlock1().Alt_case()) {
+ case TRule_simple_table_ref_TBlock1::AltCase::kAlt1: {
+ cluster = OptIdPrefixAsClusterStr(tableRef.GetBlock1().GetAlt1().GetBlock1().GetRule_opt_id_prefix1(), *this, Ctx.CurrCluster);
+ if (!cluster && Ctx.CurrCluster) {
+ return nullptr;
+ }
+ auto id = Id(tableRef.GetBlock1().GetAlt1().GetBlock1().GetRule_id_or_at2(), *this);
+ nameOrAt = std::make_pair(id.first, TDeferredAtom(Ctx.Pos(), id.second));
+ break;
+ }
+ case TRule_simple_table_ref_TBlock1::AltCase::kAlt2: {
+ auto at = tableRef.GetBlock1().GetAlt2().HasBlock1();
+ auto named = GetNamedNode(NamedNodeImpl(tableRef.GetBlock1().GetAlt2().GetRule_bind_parameter2(), *this));
+ if (!named) {
+ return nullptr;
+ }
+
+ TDeferredAtom table;
+ if (!TryMakeClusterAndTableFromExpression(named, cluster, table, Ctx)) {
+ Ctx.Error() << "Cannot infer cluster and table name";
+ return nullptr;
+ }
+
+ cluster = cluster.empty() ? Ctx.CurrCluster : cluster;
+ nameOrAt = std::make_pair(at, table);
+ break;
+ }
+ default:
+ Y_ABORT("You should change implementation according grammar changes");
+ }
+ }
+
+ bool withTruncate = false;
+ if (tableRef.HasBlock2()) {
+ auto hints = TableHintsImpl(tableRef.GetBlock2().GetRule_table_hints1(), *this);
+ for (const auto& hint : hints) {
+ if (to_upper(hint) == "TRUNCATE") {
+ withTruncate = true;
+ } else {
+ Ctx.Error() << "Unsupported hint: " << hint;
+ return nullptr;
+ }
+ }
+ }
+
+ TVector<TString> eraseColumns;
+ if (intoTableRef.HasBlock2()) {
+ auto service = Ctx.GetClusterProvider(cluster);
+ if (!service) {
+ Ctx.Error() << "Unknown cluster name: " << cluster;
+ return nullptr;
+ }
+
+ if (*service != StatProviderName) {
+ Ctx.Error() << "ERASE BY is unsupported for " << *service;
+ return nullptr;
+ }
+
+ PureColumnListStr(
+ intoTableRef.GetBlock2().GetRule_pure_column_list3(), *this, eraseColumns
+ );
+ }
+
+ if (withTruncate) {
+ if (SqlIntoModeStr != "InsertInto") {
+ Error() << "Unable " << SqlIntoUserModeStr << " with truncate mode";
+ return nullptr;
+ }
+ SqlIntoModeStr += "WithTruncate";
+ SqlIntoUserModeStr += " ... WITH TRUNCATE";
+ }
+ const auto iterMode = str2Mode.find(SqlIntoModeStr);
+ YQL_ENSURE(iterMode != str2Mode.end(), "Invalid sql write mode string: " << SqlIntoModeStr);
+ const auto SqlIntoMode = iterMode->second;
+
+ TPosition pos(Ctx.Pos());
+ TNodePtr tableKey = BuildTableKey(pos, cluster, nameOrAt.second, nameOrAt.first ? "@" : "");
+
+ TTableRef table(Ctx.MakeName("table"), cluster, tableKey);
+ Ctx.IncrementMonCounter("sql_insert_clusters", table.Cluster);
+
+ auto values = TSqlIntoValues(Ctx, Mode).Build(node.GetRule_into_values_source4(), SqlIntoUserModeStr);
+ if (!values) {
+ return nullptr;
+ }
+ if (!ValidateServiceName(node, table, SqlIntoMode, GetPos(modeTokens[0]))) {
+ return nullptr;
+ }
+ Ctx.IncrementMonCounter("sql_features", SqlIntoModeStr);
+
+ TNodePtr options;
+ if (eraseColumns) {
+ options = BuildEraseColumns(pos, std::move(eraseColumns));
+ }
+
+ return BuildWriteColumns(pos, table, ToWriteColumnsMode(SqlIntoMode), std::move(values), std::move(options));
+}
+
+bool TSqlIntoTable::ValidateServiceName(const TRule_into_table_stmt& node, const TTableRef& table,
+ ESQLWriteColumnMode mode, const TPosition& pos) {
+ Y_UNUSED(node);
+ if (!table.Check(Ctx)) {
+ return false;
+ }
+ auto serviceName = to_lower(table.ServiceName(Ctx));
+ const bool isMapReduce = serviceName == YtProviderName;
+ const bool isKikimr = serviceName == KikimrProviderName;
+ const bool isRtmr = serviceName == RtmrProviderName;
+ const bool isStat = serviceName == StatProviderName;
+
+ if (!isKikimr) {
+ if (mode == ESQLWriteColumnMode::InsertOrAbortInto ||
+ mode == ESQLWriteColumnMode::InsertOrIgnoreInto ||
+ mode == ESQLWriteColumnMode::InsertOrRevertInto ||
+ mode == ESQLWriteColumnMode::UpsertInto && !isStat)
+ {
+ Ctx.Error(pos) << SqlIntoUserModeStr << " is not supported for " << serviceName << " tables";
+ Ctx.IncrementMonCounter("sql_errors", TStringBuilder() << SqlIntoUserModeStr << "UnsupportedFor" << serviceName);
+ return false;
+ }
+ }
+
+ if (isMapReduce) {
+ if (mode == ESQLWriteColumnMode::ReplaceInto) {
+ Ctx.Error(pos) << "Meaning of REPLACE INTO has been changed, now you should use INSERT INTO <table> WITH TRUNCATE ... for " << serviceName;
+ Ctx.IncrementMonCounter("sql_errors", "ReplaceIntoConflictUsage");
+ return false;
+ }
+ } else if (isKikimr) {
+ if (mode == ESQLWriteColumnMode::InsertIntoWithTruncate) {
+ Ctx.Error(pos) << "INSERT INTO WITH TRUNCATE is not supported for " << serviceName << " tables";
+ Ctx.IncrementMonCounter("sql_errors", TStringBuilder() << SqlIntoUserModeStr << "UnsupportedFor" << serviceName);
+ return false;
+ }
+ } else if (isRtmr) {
+ if (mode != ESQLWriteColumnMode::InsertInto) {
+ Ctx.Error(pos) << SqlIntoUserModeStr << " is unsupported for " << serviceName;
+ Ctx.IncrementMonCounter("sql_errors", TStringBuilder() << SqlIntoUserModeStr << "UnsupportedFor" << serviceName);
+ return false;
+ }
+ } else if (isStat) {
+ if (mode != ESQLWriteColumnMode::UpsertInto) {
+ Ctx.Error(pos) << SqlIntoUserModeStr << " is unsupported for " << serviceName;
+ Ctx.IncrementMonCounter("sql_errors", TStringBuilder() << SqlIntoUserModeStr << "UnsupportedFor" << serviceName);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+class TSqlQuery: public TSqlTranslation {
+public:
+ TSqlQuery(TContext& ctx, NSQLTranslation::ESqlMode mode, bool topLevel)
+ : TSqlTranslation(ctx, mode)
+ , TopLevel(topLevel)
+ {
+ }
+
+ TNodePtr Build(const TSQLParserAST& ast);
+
+ bool Statement(TVector<TNodePtr>& blocks, const TRule_sql_stmt_core& core);
+private:
+ bool DeclareStatement(const TRule_declare_stmt& stmt);
+ bool ExportStatement(const TRule_export_stmt& stmt);
+ bool AlterTableAddColumns(TVector<TNodePtr>& blocks, const TRule_alter_table_add_column& node, const TTableRef& tr);
+ bool AlterTableDropColumn(TVector<TNodePtr>& blocks, const TRule_alter_table_drop_column& node, const TTableRef& tr);
+ TNodePtr PragmaStatement(const TRule_pragma_stmt& stmt, bool& success);
+ void AddStatementToBlocks(TVector<TNodePtr>& blocks, TNodePtr node);
+
+ TNodePtr Build(const TRule_delete_stmt& stmt);
+
+ TNodePtr Build(const TRule_update_stmt& stmt);
+ TSourcePtr Build(const TRule_set_clause_choice& stmt);
+ bool FillSetClause(const TRule_set_clause& node, TVector<TString>& targetList, TVector<TNodePtr>& values);
+ TSourcePtr Build(const TRule_set_clause_list& stmt);
+ TSourcePtr Build(const TRule_multiple_column_assignment& stmt);
+ TNodePtr FlexType(TTranslation& ctx, const TRule_flex_type& node);
+
+ template<class TNode>
+ void ParseStatementName(const TNode& node, TString& internalStatementName, TString& humanStatementName) {
+ internalStatementName.clear();
+ humanStatementName.clear();
+ const auto& descr = AltDescription(node);
+ TVector<TString> parts;
+ const auto pos = descr.find(": ");
+ Y_DEBUG_ABORT_UNLESS(pos != TString::npos);
+ Split(TString(descr.begin() + pos + 2, descr.end()), "_", parts);
+ Y_DEBUG_ABORT_UNLESS(parts.size() > 1);
+ parts.pop_back();
+ for (auto& part: parts) {
+ part.to_upper(0, 1);
+ internalStatementName += part;
+ if (!humanStatementName.empty()) {
+ humanStatementName += ' ';
+ }
+ humanStatementName += to_upper(part);
+ }
+ }
+
+ const bool TopLevel;
+};
+
+void TSqlQuery::AddStatementToBlocks(TVector<TNodePtr>& blocks, TNodePtr node) {
+ blocks.emplace_back(node);
+}
+
+
+bool TSqlQuery::Statement(TVector<TNodePtr>& blocks, const TRule_sql_stmt_core& core) {
+ TString internalStatementName;
+ TString humanStatementName;
+ ParseStatementName(core, internalStatementName, humanStatementName);
+ const auto& altCase = core.Alt_case();
+ if (Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW && (altCase >= TRule_sql_stmt_core::kAltSqlStmtCore4 &&
+ altCase != TRule_sql_stmt_core::kAltSqlStmtCore13)) {
+ Error() << humanStatementName << " statement is not supported in limited views";
+ return false;
+ }
+
+ if (Mode == NSQLTranslation::ESqlMode::SUBQUERY && (altCase >= TRule_sql_stmt_core::kAltSqlStmtCore4 &&
+ altCase != TRule_sql_stmt_core::kAltSqlStmtCore13 && altCase != TRule_sql_stmt_core::kAltSqlStmtCore6 &&
+ altCase != TRule_sql_stmt_core::kAltSqlStmtCore17)) {
+ Error() << humanStatementName << " statement is not supported in subqueries";
+ return false;
+ }
+ switch (altCase) {
+ case TRule_sql_stmt_core::kAltSqlStmtCore1: {
+ bool success = false;
+ TNodePtr nodeExpr = PragmaStatement(core.GetAlt_sql_stmt_core1().GetRule_pragma_stmt1(), success);
+ if (!success) {
+ return false;
+ }
+ if (nodeExpr) {
+ AddStatementToBlocks(blocks, nodeExpr);
+ }
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore2: {
+ Ctx.BodyPart();
+ TSqlSelect select(Ctx, Mode);
+ TPosition pos;
+ auto source = select.Build(core.GetAlt_sql_stmt_core2().GetRule_select_stmt1(), pos);
+ if (!source) {
+ return false;
+ }
+ blocks.emplace_back(BuildSelectResult(pos, std::move(source),
+ Mode != NSQLTranslation::ESqlMode::LIMITED_VIEW && Mode != NSQLTranslation::ESqlMode::SUBQUERY, Mode == NSQLTranslation::ESqlMode::SUBQUERY));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore3: {
+ Ctx.BodyPart();
+ TVector<TString> names;
+ auto nodeExpr = NamedNode(core.GetAlt_sql_stmt_core3().GetRule_named_nodes_stmt1(), names);
+ if (!nodeExpr) {
+ return false;
+ }
+ TVector<TNodePtr> nodes;
+ auto subquery = nodeExpr->GetSource();
+ if (subquery) {
+ const auto alias = Ctx.MakeName("subquerynode");
+ const auto ref = Ctx.MakeName("subquery");
+ blocks.push_back(BuildSubquery(subquery, alias, Mode == NSQLTranslation::ESqlMode::SUBQUERY, names.size() == 1 ? -1 : names.size()));
+ blocks.back()->SetLabel(ref);
+
+ for (size_t i = 0; i < names.size(); ++i) {
+ nodes.push_back(BuildSubqueryRef(blocks.back(), ref, names.size() == 1 ? -1 : i));
+ }
+ } else {
+ if (names.size() > 1) {
+ const auto ref = Ctx.MakeName("tie");
+ blocks.push_back(BuildTupleResult(nodeExpr, names.size()));
+ blocks.back()->SetLabel(ref);
+ for (size_t i = 0; i < names.size(); ++i) {
+ nodes.push_back(nodeExpr->Y("Nth", ref, nodeExpr->Q(ToString(i))));
+ }
+ } else {
+ nodes.push_back(std::move(nodeExpr));
+ }
+ }
+
+ for (size_t i = 0; i < names.size(); ++i) {
+ PushNamedNode(names[i], nodes[i]);
+ }
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore4: {
+ Ctx.BodyPart();
+ const auto& rule = core.GetAlt_sql_stmt_core4().GetRule_create_table_stmt1();
+ TTableRef tr(SimpleTableRefImpl(rule.GetRule_simple_table_ref3(), Mode, *this));
+
+ TVector<TColumnSchema> columns;
+ TVector<TIdentifier> pkColumns;
+ TVector<TIdentifier> partitionByColumns;
+ TVector<std::pair<TIdentifier, bool>> orderByColumns;
+
+ if (!CreateTableEntry(rule.GetRule_create_table_entry5(), *this, columns, pkColumns, partitionByColumns, orderByColumns)) {
+ return false;
+ }
+ for (auto& block: rule.GetBlock6()) {
+ if (!CreateTableEntry(block.GetRule_create_table_entry2(), *this, columns, pkColumns, partitionByColumns, orderByColumns)) {
+ return false;
+ }
+ }
+
+ AddStatementToBlocks(blocks, BuildCreateTable(Ctx.Pos(), tr, columns, pkColumns, partitionByColumns, orderByColumns));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore5: {
+ Ctx.BodyPart();
+ const auto& rule = core.GetAlt_sql_stmt_core5().GetRule_drop_table_stmt1();
+ if (rule.HasBlock3()) {
+ Context().Error(GetPos(rule.GetToken1())) << "IF EXISTS in " << humanStatementName
+ << " is not supported.";
+ return false;
+ }
+ TTableRef tr(SimpleTableRefImpl(rule.GetRule_simple_table_ref4(), Mode, *this));
+ AddStatementToBlocks(blocks, BuildDropTable(Ctx.Pos(), tr));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore6: {
+ const auto& rule = core.GetAlt_sql_stmt_core6().GetRule_use_stmt1();
+ Token(rule.GetToken1());
+ Ctx.CurrCluster = IdOrStringAsCluster(rule.GetRule_id_or_string2(), *this);
+ if (!Ctx.CurrCluster) {
+ return false;
+ }
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore7: {
+ Ctx.BodyPart();
+ TSqlIntoTable intoTable(Ctx, Mode);
+ TNodePtr block(intoTable.Build(core.GetAlt_sql_stmt_core7().GetRule_into_table_stmt1()));
+ if (!block) {
+ return false;
+ }
+ blocks.emplace_back(block);
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore8: {
+ Ctx.BodyPart();
+ const auto& rule = core.GetAlt_sql_stmt_core8().GetRule_commit_stmt1();
+ Token(rule.GetToken1());
+ blocks.emplace_back(BuildCommitClusters(Ctx.Pos()));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore9: {
+ Ctx.BodyPart();
+ auto updateNode = Build(core.GetAlt_sql_stmt_core9().GetRule_update_stmt1());
+ if (!updateNode) {
+ return false;
+ }
+ AddStatementToBlocks(blocks, updateNode);
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore10: {
+ Ctx.BodyPart();
+ auto deleteNode = Build(core.GetAlt_sql_stmt_core10().GetRule_delete_stmt1());
+ if (!deleteNode) {
+ return false;
+ }
+ blocks.emplace_back(deleteNode);
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore11: {
+ Ctx.BodyPart();
+ const auto& rule = core.GetAlt_sql_stmt_core11().GetRule_rollback_stmt1();
+ Token(rule.GetToken1());
+ blocks.emplace_back(BuildRollbackClusters(Ctx.Pos()));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore12:
+ if (!DeclareStatement(core.GetAlt_sql_stmt_core12().GetRule_declare_stmt1())) {
+ return false;
+ }
+ break;
+ case TRule_sql_stmt_core::kAltSqlStmtCore13:
+ if (!ImportStatement(core.GetAlt_sql_stmt_core13().GetRule_import_stmt1())) {
+ return false;
+ }
+ break;
+ case TRule_sql_stmt_core::kAltSqlStmtCore14:
+ if (!ExportStatement(core.GetAlt_sql_stmt_core14().GetRule_export_stmt1())) {
+ return false;
+ }
+ break;
+ case TRule_sql_stmt_core::kAltSqlStmtCore15: {
+ Ctx.BodyPart();
+ const auto& rule = core.GetAlt_sql_stmt_core15().GetRule_alter_table_stmt1();
+ TTableRef tr(SimpleTableRefImpl(rule.GetRule_simple_table_ref3(), Mode, *this));
+ const auto& ruleAction = rule.GetRule_alter_table_action4();
+ switch (ruleAction.Alt_case()) {
+ case TRule_alter_table_action::kAltAlterTableAction1: {
+ const auto& addRule = ruleAction.GetAlt_alter_table_action1().GetRule_alter_table_add_column1();
+ if (!AlterTableAddColumns(blocks, addRule, tr)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_alter_table_action::kAltAlterTableAction2: {
+ const auto& dropRule = ruleAction.GetAlt_alter_table_action2().GetRule_alter_table_drop_column1();
+ if (!AlterTableDropColumn(blocks, dropRule, tr)) {
+ return false;
+ }
+ break;
+ }
+ default:
+ AltNotImplemented("alter_table_action", core);
+ return false;
+ }
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore16: {
+ Ctx.BodyPart();
+ auto node = DoStatement(core.GetAlt_sql_stmt_core16().GetRule_do_stmt1(), false);
+ if (!node) {
+ return false;
+ }
+
+ blocks.push_back(node);
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore17: {
+ Ctx.BodyPart();
+ if (!DefineActionOrSubqueryStatement(core.GetAlt_sql_stmt_core17().GetRule_define_action_or_subquery_stmt1())) {
+ return false;
+ }
+
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore18: {
+ Ctx.BodyPart();
+ auto node = EvaluateIfStatement(core.GetAlt_sql_stmt_core18().GetRule_evaluate_if_stmt1());
+ if (!node) {
+ return false;
+ }
+
+ blocks.push_back(node);
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore19: {
+ Ctx.BodyPart();
+ auto node = EvaluateForStatement(core.GetAlt_sql_stmt_core19().GetRule_evaluate_for_stmt1());
+ if (!node) {
+ return false;
+ }
+
+ blocks.push_back(node);
+ break;
+ }
+ default:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownStatement" + internalStatementName);
+ AltNotImplemented("sql_stmt_core", core);
+ return false;
+ }
+
+ Ctx.IncrementMonCounter("sql_features", internalStatementName);
+ return !Ctx.HasPendingErrors;
+}
+
+bool TSqlQuery::DeclareStatement(const TRule_declare_stmt& stmt) {
+ TNodePtr defaultValue;
+ if (stmt.HasBlock5()) {
+ TSqlExpression sqlExpr(Ctx, Mode);
+ if (!(defaultValue = sqlExpr.LiteralExpr(stmt.GetBlock5().GetRule_literal_value2()))) {
+ return false;
+ }
+ }
+ if (defaultValue) {
+ Error() << "DEFAULT value not supported yet";
+ return false;
+ }
+ if (!Ctx.IsParseHeading()) {
+ Error() << "DECLARE statement should be in beginning of query, but it's possible to use PRAGMA or USE before it";
+ return false;
+ }
+ const auto varName = NamedNodeImpl(stmt.GetRule_bind_parameter2(), *this);
+ const auto varPos = Ctx.Pos();
+ const auto typeNode = FlexType(*this, stmt.GetRule_flex_type4());
+ if (!typeNode) {
+ return false;
+ }
+ if (!Ctx.DeclareVariable(varName, typeNode)) {
+ return false;
+ }
+ PushNamedNode(varName, BuildAtom(varPos, varName));
+ return true;
+}
+
+bool TSqlQuery::ExportStatement(const TRule_export_stmt& stmt) {
+ if (Mode != NSQLTranslation::ESqlMode::LIBRARY || !TopLevel) {
+ Error() << "EXPORT statement should be used only in a library on the top level";
+ return false;
+ }
+
+ TVector<TString> bindNames;
+ if (!BindList(stmt.GetRule_bind_parameter_list2(), bindNames)) {
+ return false;
+ }
+ return Ctx.AddExports(bindNames);
+}
+
+bool TSqlQuery::AlterTableAddColumns(TVector<TNodePtr>& blocks, const TRule_alter_table_add_column& rule, const TTableRef& tr) {
+ TVector<TColumnSchema> columns;
+
+ columns.push_back(ColumnSchemaImpl(rule.GetRule_column_schema3(), *this));
+ for (const auto& block: rule.GetBlock4()) {
+ columns.push_back(ColumnSchemaImpl(block.GetRule_column_schema4(), *this));
+ }
+
+ AddStatementToBlocks(blocks, BuildAlterTable(Ctx.Pos(), tr, columns, EAlterTableIntentnt::AddColumn));
+ return true;
+}
+
+bool TSqlQuery::AlterTableDropColumn(TVector<TNodePtr>& blocks, const TRule_alter_table_drop_column& node, const TTableRef& tr) {
+ TString name = Id(node.GetRule_id3(), *this);
+ TColumnSchema column(Ctx.Pos(), name, "", false, false);
+ AddStatementToBlocks(blocks, BuildAlterTable(Ctx.Pos(), tr, TVector<TColumnSchema>{column}, EAlterTableIntentnt::DropColumn));
+ return true;
+}
+
+TNodePtr TSqlQuery::PragmaStatement(const TRule_pragma_stmt& stmt, bool& success) {
+ success = false;
+ const TString& prefix = OptIdPrefixAsStr(stmt.GetRule_opt_id_prefix2(), *this);
+ const TString& lowerPrefix = to_lower(prefix);
+ const TString pragma(IdOrString(stmt.GetRule_id_or_string3(), *this));
+ TString normalizedPragma(pragma);
+ TMaybe<TIssue> normalizeError = NormalizeName(Ctx.Pos(), normalizedPragma);
+ if (!normalizeError.Empty()) {
+ Error() << normalizeError->GetMessage();
+ Ctx.IncrementMonCounter("sql_errors", "NormalizePragmaError");
+ return {};
+ }
+
+ TVector<TDeferredAtom> values;
+ TVector<const TRule_pragma_value*> pragmaValues;
+ bool pragmaValueDefault = false;
+ if (stmt.GetBlock4().HasAlt1()) {
+ pragmaValues.push_back(&stmt.GetBlock4().GetAlt1().GetRule_pragma_value2());
+ }
+ else if (stmt.GetBlock4().HasAlt2()) {
+ pragmaValues.push_back(&stmt.GetBlock4().GetAlt2().GetRule_pragma_value2());
+ for (auto& additionalValue : stmt.GetBlock4().GetAlt2().GetBlock3()) {
+ pragmaValues.push_back(&additionalValue.GetRule_pragma_value2());
+ }
+ }
+
+ const bool withConfigure = prefix || normalizedPragma == "file" || normalizedPragma == "folder" || normalizedPragma == "udf";
+ const bool hasLexicalScope = withConfigure || normalizedPragma == "classicdivision";
+ for (auto pragmaValue : pragmaValues) {
+ if (pragmaValue->HasAlt_pragma_value3()) {
+ values.push_back(TDeferredAtom(Ctx.Pos(), StringContent(Ctx, pragmaValue->GetAlt_pragma_value3().GetToken1().GetValue())));
+ }
+ else if (pragmaValue->HasAlt_pragma_value2()
+ && pragmaValue->GetAlt_pragma_value2().GetRule_id1().HasAlt_id2()
+ && "default" == Id(pragmaValue->GetAlt_pragma_value2().GetRule_id1(), *this))
+ {
+ pragmaValueDefault = true;
+ }
+ else if (withConfigure && pragmaValue->HasAlt_pragma_value5()) {
+ auto bindName = NamedNodeImpl(pragmaValue->GetAlt_pragma_value5().GetRule_bind_parameter1(), *this);
+ auto namedNode = GetNamedNode(bindName);
+ if (!namedNode) {
+ return {};
+ }
+
+ TDeferredAtom atom;
+ MakeTableFromExpression(Ctx, namedNode, atom);
+ values.push_back(atom);
+ } else {
+ Error() << "Expected string" << (withConfigure ? ", named parameter" : "") << " or 'default' keyword as pragma value for pragma: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ }
+
+ if (prefix.empty()) {
+ if (!TopLevel && !hasLexicalScope) {
+ Error() << "This pragma '" << pragma << "' is not allowed to be used in actions or subqueries";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return{};
+ }
+
+ if (normalizedPragma == "refselect") {
+ Ctx.PragmaRefSelect = true;
+ Ctx.IncrementMonCounter("sql_pragma", "RefSelect");
+ } else if (normalizedPragma == "sampleselect") {
+ Ctx.PragmaSampleSelect = true;
+ Ctx.IncrementMonCounter("sql_pragma", "SampleSelect");
+ } else if (normalizedPragma == "allowdotinalias") {
+ Ctx.PragmaAllowDotInAlias = true;
+ Ctx.IncrementMonCounter("sql_pragma", "AllowDotInAlias");
+ } else if (normalizedPragma == "udf") {
+ if (values.size() != 1 || pragmaValueDefault) {
+ Error() << "Expected file alias as pragma value";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ Ctx.IncrementMonCounter("sql_pragma", "udf");
+ success = true;
+ return BuildPragma(Ctx.Pos(), TString(ConfigProviderName), "ImportUdfs", values, false);
+ } else if (normalizedPragma == "file") {
+ if (values.size() != 2U || pragmaValueDefault) {
+ Error() << "Expected file alias and url as pragma values";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ Ctx.IncrementMonCounter("sql_pragma", "file");
+ success = true;
+ return BuildPragma(Ctx.Pos(), TString(ConfigProviderName), "AddFileByUrl", values, false);
+ } else if (normalizedPragma == "folder") {
+ if (values.size() != 2U || pragmaValueDefault) {
+ Error() << "Expected folder alias as url as pragma values";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ Ctx.IncrementMonCounter("sql_pragma", "folder");
+ success = true;
+ return BuildPragma(Ctx.Pos(), TString(ConfigProviderName), "AddFolderByUrl", values, false);
+ } else if (normalizedPragma == "library") {
+ if (values.size() != 1) {
+ Error() << "Expected non-empty file alias";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return{};
+ }
+
+ Ctx.Libraries.emplace(*values.front().GetLiteral());
+ Ctx.IncrementMonCounter("sql_pragma", "library");
+ } else if (normalizedPragma == "inferscheme" || normalizedPragma == "inferschema") {
+ Ctx.Warning(Ctx.Pos(), TIssuesIds::YQL_DEPRECATED_INFERSCHEME) << "PRAGMA InferScheme is deprecated, please use PRAGMA yt.InferSchema instead.";
+ Ctx.PragmaInferSchema = true;
+ Ctx.IncrementMonCounter("sql_pragma", "InferSchema");
+ } else if (normalizedPragma == "directread") {
+ Ctx.PragmaDirectRead = true;
+ Ctx.IncrementMonCounter("sql_pragma", "DirectRead");
+ } else if (normalizedPragma == "equijoin") {
+ Ctx.IncrementMonCounter("sql_pragma", "EquiJoin");
+ } else if (normalizedPragma == "autocommit") {
+ Ctx.PragmaAutoCommit = true;
+ Ctx.IncrementMonCounter("sql_pragma", "AutoCommit");
+ } else if (normalizedPragma == "tablepathprefix") {
+ if (values.size() == 1) {
+ if (!Ctx.SetPathPrefix(*values[0].GetLiteral())) {
+ return {};
+ }
+ } else if (values.size() == 2) {
+ if (!Ctx.SetPathPrefix(*values[1].GetLiteral(), *values[0].GetLiteral())) {
+ return {};
+ }
+ } else {
+ Error() << "Expected path prefix or tuple of (Provider, PathPrefix) or"
+ << " (Cluster, PathPrefix) as pragma value";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ Ctx.IncrementMonCounter("sql_pragma", "PathPrefix");
+ } else if (normalizedPragma == "groupbylimit") {
+ if (values.size() != 1 || !TryFromString(*values[0].GetLiteral(), Ctx.PragmaGroupByLimit)) {
+ Error() << "Expected single unsigned integer argument for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ Ctx.IncrementMonCounter("sql_pragma", "GroupByLimit");
+ } else if (normalizedPragma == "groupbycubelimit") {
+ if (values.size() != 1 || !TryFromString(*values[0].GetLiteral(), Ctx.PragmaGroupByCubeLimit)) {
+ Error() << "Expected single unsigned integer argument for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ Ctx.IncrementMonCounter("sql_pragma", "GroupByCubeLimit");
+ }
+ else if (normalizedPragma == "simplecolumns") {
+ Ctx.SimpleColumns = true;
+ Ctx.IncrementMonCounter("sql_pragma", "SimpleColumns");
+ }
+ else if (normalizedPragma == "disablesimplecolumns") {
+ Ctx.SimpleColumns = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableSimpleColumns");
+ } else if (normalizedPragma == "resultrowslimit") {
+ if (values.size() != 1 || !TryFromString(*values[0].GetLiteral(), Ctx.ResultRowsLimit)) {
+ Error() << "Expected single unsigned integer argument for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ Ctx.IncrementMonCounter("sql_pragma", "ResultRowsLimit");
+ } else if (normalizedPragma == "resultsizelimit") {
+ if (values.size() != 1 || !TryFromString(*values[0].GetLiteral(), Ctx.ResultSizeLimit)) {
+ Error() << "Expected single unsigned integer argument for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ Ctx.IncrementMonCounter("sql_pragma", "ResultSizeLimit");
+ } else if (normalizedPragma == "warning") {
+ if (values.size() != 2U || values.front().Empty() || values.back().Empty()) {
+ Error() << "Expected arguments <action>, <issueId> for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ TString codePattern = *values[1].GetLiteral();
+ TString action = *values[0].GetLiteral();
+
+ TWarningRule rule;
+ TString parseError;
+ auto parseResult = TWarningRule::ParseFrom(codePattern, action, rule, parseError);
+ switch (parseResult) {
+ case TWarningRule::EParseResult::PARSE_OK:
+ Ctx.WarningPolicy.AddRule(rule);
+ if (Ctx.Settings.WarnOnV0 && codePattern == "*") {
+ // Add exception for YQL_DEPRECATED_V0_SYNTAX
+ TWarningRule defaultForDeprecatedV0;
+ YQL_ENSURE(TWarningRule::ParseFrom(ToString(TIssueCode(TIssuesIds::YQL_DEPRECATED_V0_SYNTAX)),
+ "default", defaultForDeprecatedV0,
+ parseError) == TWarningRule::EParseResult::PARSE_OK);
+ Ctx.WarningPolicy.AddRule(defaultForDeprecatedV0);
+ }
+ break;
+ case TWarningRule::EParseResult::PARSE_PATTERN_FAIL:
+ case TWarningRule::EParseResult::PARSE_ACTION_FAIL:
+ Ctx.Error() << parseError;
+ return {};
+ default:
+ Y_ENSURE(false, "Unknown parse result");
+ }
+
+ Ctx.IncrementMonCounter("sql_pragma", "warning");
+ } else if (normalizedPragma == "greetings") {
+ if (values.size() > 1) {
+ Error() << "Not expect few arguments for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ } else if (values.empty()) {
+ values.emplace_back(TDeferredAtom(Ctx.Pos(), "Hello, world! And best wishes from the YQL Team!"));
+ }
+ Ctx.Info(Ctx.Pos()) << *values[0].GetLiteral();
+ } else if (normalizedPragma == "warningmsg") {
+ if (values.size() != 1) {
+ Error() << "Expected single string argument for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ Ctx.Warning(Ctx.Pos(), TIssuesIds::YQL_PRAGMA_WARNING_MSG) << *values[0].GetLiteral();
+ } else if (normalizedPragma == "errormsg") {
+ if (values.size() != 1) {
+ Error() << "Expected single string argument for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ Ctx.Error(Ctx.Pos()) << *values[0].GetLiteral();
+ } else if (normalizedPragma == "classicdivision") {
+ if (values.size() != 1 || !TryFromString(*values[0].GetLiteral(), Ctx.PragmaClassicDivision)) {
+ Error() << "Expected single boolean argument for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ Ctx.IncrementMonCounter("sql_pragma", "ClassicDivision");
+ } else if (normalizedPragma == "disableunordered") {
+ Ctx.Warning(Ctx.Pos(), TIssuesIds::YQL_DEPRECATED_PRAGMA)
+ << "Use of deprecated DisableUnordered pragma. It will be dropped soon";
+ } else if (normalizedPragma == "pullupflatmapoverjoin") {
+ Ctx.PragmaPullUpFlatMapOverJoin = true;
+ Ctx.IncrementMonCounter("sql_pragma", "PullUpFlatMapOverJoin");
+ } else if (normalizedPragma == "disablepullupflatmapoverjoin") {
+ Ctx.PragmaPullUpFlatMapOverJoin = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisablePullUpFlatMapOverJoin");
+ } else if (normalizedPragma == "enablesystemcolumns") {
+ if (values.size() != 1 || !TryFromString(*values[0].GetLiteral(), Ctx.EnableSystemColumns)) {
+ Error() << "Expected single boolean argument for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ Ctx.IncrementMonCounter("sql_pragma", "EnableSystemColumns");
+ } else {
+ Error() << "Unknown pragma: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "UnknownPragma");
+ return {};
+ }
+ } else {
+ if (lowerPrefix == "yson") {
+ if (!TopLevel) {
+ Error() << "This pragma '" << pragma << "' is not allowed to be used in actions";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ if (normalizedPragma == "autoconvert") {
+ Ctx.PragmaYsonAutoConvert = true;
+ success = true;
+ return {};
+ } else if (normalizedPragma == "strict") {
+ Ctx.PragmaYsonStrict = true;
+ success = true;
+ return {};
+ } else if (normalizedPragma == "disablestrict") {
+ Ctx.PragmaYsonStrict = false;
+ success = true;
+ return {};
+ } else {
+ Error() << "Unknown pragma: '" << pragma << "'";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ } else if (std::find(Providers.cbegin(), Providers.cend(), lowerPrefix) == Providers.cend()) {
+ if (!Ctx.HasCluster(lowerPrefix)) {
+ Error() << "Unknown pragma prefix: " << prefix << ", please use cluster name or one of provider " <<
+ JoinRange(", ", Providers.cbegin(), Providers.cend());
+ Ctx.IncrementMonCounter("sql_errors", "UnknownPragma");
+ return {};
+ }
+ }
+
+ if (normalizedPragma != "flags") {
+ if (values.size() > 1) {
+ Error() << "Expected at most one value in the pragma";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ } else {
+ if (pragmaValueDefault || values.size() < 1) {
+ Error() << "Expected at least one value in the pragma";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ }
+
+ success = true;
+ Ctx.IncrementMonCounter("sql_pragma", pragma);
+ return BuildPragma(Ctx.Pos(), lowerPrefix, normalizedPragma, values, pragmaValueDefault);
+ }
+ success = true;
+ return {};
+}
+
+TNodePtr TSqlQuery::Build(const TRule_delete_stmt& stmt) {
+ TTableRef table = SimpleTableRefImpl(stmt.GetRule_simple_table_ref3(), Mode, *this);
+ if (!table.Check(Ctx)) {
+ return nullptr;
+ }
+
+ auto serviceName = to_lower(table.ServiceName(Ctx));
+ const bool isKikimr = serviceName == KikimrProviderName;
+
+ if (!isKikimr) {
+ Ctx.Error(GetPos(stmt.GetToken1())) << "DELETE is unsupported for " << serviceName;
+ return nullptr;
+ }
+
+ TSourcePtr source = BuildTableSource(Ctx.Pos(), table, false);
+
+ if (stmt.HasBlock4()) {
+ switch (stmt.GetBlock4().Alt_case()) {
+ case TRule_delete_stmt_TBlock4::kAlt1: {
+ const auto& alt = stmt.GetBlock4().GetAlt1();
+
+ TSqlExpression sqlExpr(Ctx, Mode);
+ auto whereExpr = sqlExpr.Build(alt.GetRule_expr2());
+ if (!whereExpr) {
+ return nullptr;
+ }
+ source->AddFilter(Ctx, whereExpr);
+ break;
+ }
+
+ case TRule_delete_stmt_TBlock4::kAlt2: {
+ const auto& alt = stmt.GetBlock4().GetAlt2();
+
+ auto values = TSqlIntoValues(Ctx, Mode).Build(alt.GetRule_into_values_source2(), "DELETE ON");
+ if (!values) {
+ return nullptr;
+ }
+
+ return BuildWriteColumns(Ctx.Pos(), table, EWriteColumnMode::DeleteOn, std::move(values));
+ }
+
+ default:
+ return nullptr;
+ }
+ }
+
+ return BuildDelete(Ctx.Pos(), table, std::move(source));
+}
+
+TNodePtr TSqlQuery::Build(const TRule_update_stmt& stmt) {
+ TTableRef table = SimpleTableRefImpl(stmt.GetRule_simple_table_ref2(), Mode, *this);
+ if (!table.Check(Ctx)) {
+ return nullptr;
+ }
+
+ auto serviceName = to_lower(table.ServiceName(Ctx));
+ const bool isKikimr = serviceName == KikimrProviderName;
+
+ if (!isKikimr) {
+ Ctx.Error(GetPos(stmt.GetToken1())) << "UPDATE is unsupported for " << serviceName;
+ return nullptr;
+ }
+
+ switch (stmt.GetBlock3().Alt_case()) {
+ case TRule_update_stmt_TBlock3::kAlt1: {
+ const auto& alt = stmt.GetBlock3().GetAlt1();
+ TSourcePtr values = Build(alt.GetRule_set_clause_choice2());
+ auto source = BuildTableSource(Ctx.Pos(), table, false);
+
+ if (alt.HasBlock3()) {
+ TSqlExpression sqlExpr(Ctx, Mode);
+ auto whereExpr = sqlExpr.Build(alt.GetBlock3().GetRule_expr2());
+ if (!whereExpr) {
+ return nullptr;
+ }
+ source->AddFilter(Ctx, whereExpr);
+ }
+
+ return BuildUpdateColumns(Ctx.Pos(), table, std::move(values), std::move(source));
+ }
+
+ case TRule_update_stmt_TBlock3::kAlt2: {
+ const auto& alt = stmt.GetBlock3().GetAlt2();
+
+ auto values = TSqlIntoValues(Ctx, Mode).Build(alt.GetRule_into_values_source2(), "UPDATE ON");
+ if (!values) {
+ return nullptr;
+ }
+
+ return BuildWriteColumns(Ctx.Pos(), table, EWriteColumnMode::UpdateOn, std::move(values));
+ }
+
+ default:
+ return nullptr;
+ }
+}
+
+TSourcePtr TSqlQuery::Build(const TRule_set_clause_choice& stmt) {
+ switch (stmt.Alt_case()) {
+ case TRule_set_clause_choice::kAltSetClauseChoice1:
+ return Build(stmt.GetAlt_set_clause_choice1().GetRule_set_clause_list1());
+ case TRule_set_clause_choice::kAltSetClauseChoice2:
+ return Build(stmt.GetAlt_set_clause_choice2().GetRule_multiple_column_assignment1());
+ default:
+ AltNotImplemented("set_clause_choice", stmt);
+ return nullptr;
+ }
+}
+
+bool TSqlQuery::FillSetClause(const TRule_set_clause& node, TVector<TString>& targetList, TVector<TNodePtr>& values) {
+ targetList.push_back(ColumnNameAsSingleStr(*this, node.GetRule_set_target1().GetRule_column_name1()));
+ TSqlExpression sqlExpr(Ctx, Mode);
+ if (!Expr(sqlExpr, values, node.GetRule_expr3())) {
+ return false;
+ }
+ return true;
+}
+
+TSourcePtr TSqlQuery::Build(const TRule_set_clause_list& stmt) {
+ TVector<TString> targetList;
+ TVector<TNodePtr> values;
+ const TPosition pos(Ctx.Pos());
+ if (!FillSetClause(stmt.GetRule_set_clause1(), targetList, values)) {
+ return nullptr;
+ }
+ for (auto& block: stmt.GetBlock2()) {
+ if (!FillSetClause(block.GetRule_set_clause2(), targetList, values)) {
+ return nullptr;
+ }
+ }
+ Y_DEBUG_ABORT_UNLESS(targetList.size() == values.size());
+ return BuildUpdateValues(pos, targetList, values);
+}
+
+TSourcePtr TSqlQuery::Build(const TRule_multiple_column_assignment& stmt) {
+ TVector<TString> targetList;
+ FillTargetList(*this, stmt.GetRule_set_target_list1(), targetList);
+ auto simpleValuesNode = stmt.GetRule_simple_values_source4();
+ const TPosition pos(Ctx.Pos());
+ switch (simpleValuesNode.Alt_case()) {
+ case TRule_simple_values_source::kAltSimpleValuesSource1: {
+ TVector<TNodePtr> values;
+ TSqlExpression sqlExpr(Ctx, Mode);
+ if (!ExprList(sqlExpr, values, simpleValuesNode.GetAlt_simple_values_source1().GetRule_expr_list1())) {
+ return nullptr;
+ }
+ return BuildUpdateValues(pos, targetList, values);
+ }
+ case TRule_simple_values_source::kAltSimpleValuesSource2: {
+ TSqlSelect select(Ctx, Mode);
+ TPosition selectPos;
+ auto source = select.Build(simpleValuesNode.GetAlt_simple_values_source2().GetRule_select_stmt1(), selectPos);
+ if (!source) {
+ return nullptr;
+ }
+ return BuildWriteValues(pos, "UPDATE", targetList, std::move(source));
+ }
+ default:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownSimpleValuesSourceAlt");
+ AltNotImplemented("simple_values_source", simpleValuesNode);
+ return nullptr;
+ }
+}
+
+TNodePtr TSqlQuery::Build(const TSQLParserAST& ast) {
+ const auto& statements = ast.GetRule_sql_stmt_list();
+ TVector<TNodePtr> blocks;
+
+ if (Ctx.Settings.WarnOnV0) {
+ if (Ctx.Settings.V0WarnAsError->Allow()) {
+ Error() << "SQL v0 syntax is deprecated and no longer supported. Please switch to v1: https://clubs.at.yandex-team.ru/yql/2910";
+ return nullptr;
+ }
+
+ Ctx.Warning(Ctx.Pos(), TIssuesIds::YQL_DEPRECATED_V0_SYNTAX) <<
+ "SQL v0 syntax is deprecated and will stop working soon. Consider switching to v1: https://clubs.at.yandex-team.ru/yql/2910";
+ }
+
+ if (Ctx.Settings.V0Behavior == NSQLTranslation::EV0Behavior::Report) {
+ AddStatementToBlocks(blocks, BuildPragma(TPosition(), "config", "flags", {
+ TDeferredAtom(TPosition(), "SQL"),
+ TDeferredAtom(TPosition(), "0")
+ }, false));
+ }
+ if (!Statement(blocks, statements.GetRule_sql_stmt1().GetRule_sql_stmt_core2())) {
+ return nullptr;
+ }
+ for (auto block: statements.GetBlock2()) {
+ if (!Statement(blocks, block.GetRule_sql_stmt2().GetRule_sql_stmt_core2())) {
+ return nullptr;
+ }
+ }
+
+ ui32 topLevelSelects = 0;
+ for (auto& block : blocks) {
+ if (block->IsSelect()) {
+ ++topLevelSelects;
+ }
+ }
+
+ if ((Mode == NSQLTranslation::ESqlMode::SUBQUERY || Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW) && topLevelSelects != 1) {
+ Error() << "Strictly one select/process/reduce statement must be used in the "
+ << (Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW ? "view" : "subquery");
+ return nullptr;
+ }
+
+ if (!Ctx.PragmaAutoCommit && Ctx.Settings.EndOfQueryCommit && IsQueryMode(Mode)) {
+ AddStatementToBlocks(blocks, BuildCommitClusters(Ctx.Pos()));
+ }
+ return BuildQuery(Ctx.Pos(), blocks, true);
+}
+
+TNodePtr TSqlQuery::FlexType(TTranslation& ctx, const TRule_flex_type& node) {
+ const auto& stringType = NSQLTranslationV0::FlexType(node, ctx);
+ auto res = TryBuildDataType(Ctx.Pos(), TypeByAlias(stringType.first, !stringType.second));
+ if (!res) {
+ res = BuildBuiltinFunc(Ctx, Ctx.Pos(), "ParseType", {BuildLiteralRawString(Ctx.Pos(), stringType.first)});
+ }
+ return res;
+}
+
+bool TSqlTranslation::DefineActionOrSubqueryStatement(const TRule_define_action_or_subquery_stmt& stmt) {
+ auto kind = Ctx.Token(stmt.GetToken2());
+ const bool isSubquery = to_lower(kind) == "subquery";
+ if (!isSubquery && Mode == NSQLTranslation::ESqlMode::SUBQUERY) {
+ Error() << "Definition of actions is not allowed in the subquery";
+ return false;
+ }
+
+ auto actionName = NamedNodeImpl(stmt.GetRule_bind_parameter3(), *this);
+ TVector<TString> argNames;
+ if (stmt.HasBlock5() && !BindList(stmt.GetBlock5().GetRule_bind_parameter_list1(), argNames)) {
+ return false;
+ }
+
+ auto saveNamedNodes = Ctx.NamedNodes;
+ for (const auto& arg : argNames) {
+ PushNamedNode(arg, BuildAtom(Ctx.Pos(), arg, NYql::TNodeFlags::Default));
+ }
+
+ auto saveCurrCluster = Ctx.CurrCluster;
+ auto savePragmaClassicDivision = Ctx.PragmaClassicDivision;
+ auto saveMode = Ctx.Settings.Mode;
+ if (isSubquery) {
+ Ctx.Settings.Mode = NSQLTranslation::ESqlMode::SUBQUERY;
+ }
+
+ TSqlQuery query(Ctx, Ctx.Settings.Mode, false);
+ TVector<TNodePtr> blocks;
+ const auto& body = stmt.GetRule_define_action_or_subquery_body8();
+ bool hasError = false;
+ for (const auto& nestedStmtItem : body.GetBlock1()) {
+ const auto& nestedStmt = nestedStmtItem.GetRule_sql_stmt_core1();
+ if (!query.Statement(blocks, nestedStmt)) {
+ hasError = true;
+ break;
+ }
+ }
+
+ ui32 topLevelSelects = 0;
+ for (auto& block : blocks) {
+ if (block->IsSelect()) {
+ ++topLevelSelects;
+ }
+ }
+
+ if (isSubquery && topLevelSelects != 1) {
+ Error() << "Strictly one select/process/reduce statement must be used in the subquery";
+ return false;
+ }
+
+ auto ret = !hasError ? BuildQuery(Ctx.Pos(), blocks, false) : nullptr;
+ Ctx.CurrCluster = saveCurrCluster;
+ Ctx.PragmaClassicDivision = savePragmaClassicDivision;
+ Ctx.NamedNodes = saveNamedNodes;
+ Ctx.Settings.Mode = saveMode;
+
+ if (!ret) {
+ return false;
+ }
+
+ TNodePtr blockNode = new TAstListNodeImpl(Ctx.Pos());
+ blockNode->Add("block");
+ blockNode->Add(blockNode->Q(ret));
+
+ TNodePtr params = new TAstListNodeImpl(Ctx.Pos());
+ params->Add("world");
+ for (const auto& arg : argNames) {
+ params->Add(arg);
+ }
+
+ auto lambda = BuildLambda(Ctx.Pos(), params, blockNode);
+ PushNamedNode(actionName, lambda);
+ return true;
+}
+
+TNodePtr TSqlTranslation::EvaluateIfStatement(const TRule_evaluate_if_stmt& stmt) {
+ TSqlExpression expr(Ctx, Mode);
+ auto exprNode = expr.Build(stmt.GetRule_expr3());
+ if (!exprNode) {
+ return {};
+ }
+
+ auto thenNode = DoStatement(stmt.GetRule_do_stmt4(), true);
+ if (!thenNode) {
+ return {};
+ }
+
+ TNodePtr elseNode;
+ if (stmt.HasBlock5()) {
+ elseNode = DoStatement(stmt.GetBlock5().GetRule_do_stmt2(), true);
+ if (!elseNode) {
+ return {};
+ }
+ }
+
+ return BuildEvaluateIfNode(Ctx.Pos(), exprNode, thenNode, elseNode);
+}
+
+TNodePtr TSqlTranslation::EvaluateForStatement(const TRule_evaluate_for_stmt& stmt) {
+ TSqlExpression expr(Ctx, Mode);
+ auto itemArgName = NamedNodeImpl(stmt.GetRule_bind_parameter3(), *this);
+
+ auto exprNode = expr.Build(stmt.GetRule_expr5());
+ if (!exprNode) {
+ return{};
+ }
+
+ PushNamedNode(itemArgName, new TAstAtomNodeImpl(Ctx.Pos(), itemArgName, TNodeFlags::Default));
+ auto bodyNode = DoStatement(stmt.GetRule_do_stmt6(), true, { itemArgName });
+ PopNamedNode(itemArgName);
+ if (!bodyNode) {
+ return{};
+ }
+
+ TNodePtr elseNode;
+ if (stmt.HasBlock7()) {
+ elseNode = DoStatement(stmt.GetBlock7().GetRule_do_stmt2(), true);
+ if (!elseNode) {
+ return{};
+ }
+ }
+
+ return BuildEvaluateForNode(Ctx.Pos(), exprNode, bodyNode, elseNode);
+}
+
+google::protobuf::Message* SqlAST(const TString& query, const TString& queryName, TIssues& err, size_t maxErrors, google::protobuf::Arena* arena) {
+ YQL_ENSURE(arena);
+#if defined(_tsan_enabled_)
+ TGuard<TMutex> grd(SanitizerSQLTranslationMutex);
+#endif
+ NSQLTranslation::TErrorCollectorOverIssues collector(err, maxErrors, "");
+ NProtoAST::TProtoASTBuilder3<NALP::SQLParser, NALP::SQLLexer> builder(query, queryName, arena);
+ return builder.BuildAST(collector);
+}
+
+google::protobuf::Message* SqlAST(const TString& query, const TString& queryName, NProtoAST::IErrorCollector& err, google::protobuf::Arena* arena) {
+ YQL_ENSURE(arena);
+#if defined(_tsan_enabled_)
+ TGuard<TMutex> grd(SanitizerSQLTranslationMutex);
+#endif
+ NProtoAST::TProtoASTBuilder3<NALP::SQLParser, NALP::SQLLexer> builder(query, queryName, arena);
+ return builder.BuildAST(err);
+}
+
+TAstNode* SqlASTToYql(const google::protobuf::Message& protoAst, TContext& ctx) {
+ const google::protobuf::Descriptor* d = protoAst.GetDescriptor();
+ if (d && d->name() != "TSQLParserAST") {
+ ctx.Error() << "Invalid AST structure: " << d->name() << ", expected TSQLParserAST";
+ return nullptr;
+ }
+ TSqlQuery query(ctx, ctx.Settings.Mode, true);
+ TNodePtr node(query.Build(static_cast<const TSQLParserAST&>(protoAst)));
+ try {
+ if (node && node->Init(ctx, nullptr)) {
+ return node->Translate(ctx);
+ }
+ } catch (const NProtoAST::TTooManyErrors&) {
+ // do not add error issue, no room for it
+ }
+
+ return nullptr;
+}
+
+void SqlASTToYqlImpl(NYql::TAstParseResult& res, const google::protobuf::Message& protoAst,
+ TContext& ctx) {
+ YQL_ENSURE(!ctx.Issues.Size());
+ res.Root = SqlASTToYql(protoAst, ctx);
+ res.Pool = std::move(ctx.Pool);
+ if (!res.Root) {
+ if (ctx.Issues.Size()) {
+ ctx.IncrementMonCounter("sql_errors", "AstToYqlError");
+ } else {
+ ctx.IncrementMonCounter("sql_errors", "AstToYqlSilentError");
+ ctx.Error() << "Error occurred on parse SQL query, but no error is collected" <<
+ ", please send this request over bug report into YQL interface or write on yql@ maillist";
+ }
+ }
+}
+
+NYql::TAstParseResult SqlASTToYql(const google::protobuf::Message& protoAst,
+ const NSQLTranslation::TTranslationSettings& settings)
+{
+ YQL_ENSURE(IsQueryMode(settings.Mode));
+ TAstParseResult res;
+ TContext ctx(settings, res.Issues);
+ SqlASTToYqlImpl(res, protoAst, ctx);
+ res.ActualSyntaxType = ESyntaxType::YQLv0;
+ return res;
+}
+
+
+NYql::TAstParseResult SqlToYql(const TString& query, const NSQLTranslation::TTranslationSettings& settings, NYql::TWarningRules* warningRules)
+{
+ TAstParseResult res;
+ TContext ctx(settings, res.Issues);
+ NSQLTranslation::TErrorCollectorOverIssues collector(res.Issues, settings.MaxErrors, settings.File);
+
+ google::protobuf::Message* ast(SqlAST(query, "query", collector, settings.Arena));
+ if (ast) {
+ SqlASTToYqlImpl(res, *ast, ctx);
+ } else {
+ ctx.IncrementMonCounter("sql_errors", "AstError");
+ }
+ if (warningRules) {
+ *warningRules = ctx.WarningPolicy.GetRules();
+ ctx.WarningPolicy.Clear();
+ }
+ res.ActualSyntaxType = NYql::ESyntaxType::YQLv0;
+ return res;
+}
+
+} // namespace NSQLTranslationV0