diff options
author | aneporada <aneporada@ydb.tech> | 2023-07-24 19:54:34 +0300 |
---|---|---|
committer | aneporada <aneporada@ydb.tech> | 2023-07-24 19:54:34 +0300 |
commit | a1262352a731dcdfc23df0bb13e6f04c464c4ea7 (patch) | |
tree | 20c5bf088ba7234c6ad5b24e9e47fbd8d07f42fb | |
parent | f2b5a3362738b5cc60ea626eac83022856851412 (diff) | |
download | ydb-a1262352a731dcdfc23df0bb13e6f04c464c4ea7.tar.gz |
Autogenerate column names on type annotation stage
initial
-rw-r--r-- | ydb/library/yql/core/type_ann/type_ann_core.cpp | 184 | ||||
-rw-r--r-- | ydb/library/yql/providers/result/provider/yql_result_provider.cpp | 67 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/insert.cpp | 34 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/node.cpp | 54 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/node.h | 4 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/select.cpp | 28 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/sql_ut.cpp | 8 |
7 files changed, 312 insertions, 67 deletions
diff --git a/ydb/library/yql/core/type_ann/type_ann_core.cpp b/ydb/library/yql/core/type_ann/type_ann_core.cpp index fe9febb89cf..66cc7ea24aa 100644 --- a/ydb/library/yql/core/type_ann/type_ann_core.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_core.cpp @@ -9124,9 +9124,11 @@ template <NKikimr::NUdf::EDataSlot DataSlot> return IGraphTransformer::TStatus::Error; } - THashSet<TStringBuf> addedInProjectionFields; + THashSet<TString> addedInProjectionFields; TVector<const TItemExprType*> allItems; - for (auto& item : input->Child(1)->Children()) { + TVector<size_t> autoNameIndexes; + for (size_t i = 0; i < input->Child(1)->ChildrenSize(); ++i) { + auto item = input->Child(1)->Child(i); if (!item->IsCallable({"SqlProjectItem", "SqlProjectStarItem"})) { ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(item->Pos()), TStringBuilder() << "Expected SqlProjectItem or SqlProjectStarItem as argument")); @@ -9142,9 +9144,38 @@ template <NKikimr::NUdf::EDataSlot DataSlot> } else { YQL_ENSURE(item->Child(1)->IsAtom()); const auto fieldName = item->Child(1)->Content(); - allItems.push_back(ctx.Expr.MakeType<TItemExprType>(fieldName, item->GetTypeAnn())); - addedInProjectionFields.emplace(fieldName); + if (item->ChildrenSize() == 4 && HasSetting(*item->Child(3), "autoName")) { + autoNameIndexes.push_back(i); + } else { + addedInProjectionFields.emplace(fieldName); + allItems.push_back(ctx.Expr.MakeType<TItemExprType>(fieldName, item->GetTypeAnn())); + } + } + } + + if (!autoNameIndexes.empty()) { + auto sqlProjectItems = input->Child(1)->ChildrenList(); + for (size_t nameSuffix = autoNameIndexes.front(), i = 0; i < autoNameIndexes.size(); ) { + TString autoName = "column" + ToString(nameSuffix); + if (!addedInProjectionFields.insert(autoName).second) { + ++nameSuffix; + continue; + } + + if (i + 1 != autoNameIndexes.size()) { + nameSuffix = autoNameIndexes[i + 1]; + } + + auto& sqlProjectItem = sqlProjectItems[autoNameIndexes[i++]]; + YQL_ENSURE(sqlProjectItem->IsCallable("SqlProjectItem")); + YQL_ENSURE(sqlProjectItem->ChildrenSize() == 4); + + sqlProjectItem = ctx.Expr.ChangeChild(*sqlProjectItem, 1, ctx.Expr.NewAtom(sqlProjectItem->Child(1)->Pos(), autoName)); + sqlProjectItem = ctx.Expr.ChangeChild(*sqlProjectItem, 3, RemoveSetting(*sqlProjectItem->Child(3), "autoName", ctx.Expr)); } + + output = ctx.Expr.ChangeChild(*input, 1, ctx.Expr.NewList(input->Child(1)->Pos(), std::move(sqlProjectItems))); + return IGraphTransformer::TStatus::Repeat; } TVector<TStringBuf> transparentFields; @@ -9201,7 +9232,6 @@ template <NKikimr::NUdf::EDataSlot DataSlot> } IGraphTransformer::TStatus SqlProjectItemWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { - Y_UNUSED(output); YQL_ENSURE(input->IsCallable({"SqlProjectItem", "SqlProjectStarItem"})); const bool isStar = input->IsCallable("SqlProjectStarItem"); if (!EnsureMinMaxArgsCount(*input, 3, 4, ctx.Expr)) { @@ -9244,6 +9274,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> return IGraphTransformer::TStatus::Error; } + bool warnShadow = false; if (input->ChildrenSize() == 4) { // validate options THashSet<TStringBuf> seenOptions; @@ -9281,7 +9312,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> if (!EnsureAtom(*optionNode->Child(1), ctx.Expr)) { return IGraphTransformer::TStatus::Error; } - } else if (!isStar && name == "warnShadow") { + } else if (!isStar && (name == "warnShadow" || name == "autoName")) { // no params if (!EnsureTupleSize(*optionNode, 1, ctx.Expr)) { return IGraphTransformer::TStatus::Error; @@ -9299,23 +9330,39 @@ template <NKikimr::NUdf::EDataSlot DataSlot> return IGraphTransformer::TStatus::Error; } - if (seenOptions.contains("warnShadow")) { + if (seenOptions.contains("autoName")) { + if (seenOptions.contains("warnShadow")) { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Child(3)->Pos()), + TStringBuilder() << "Options warnShadow and autoName cannot be used at the same time")); + return IGraphTransformer::TStatus::Error; + } auto alias = input->Child(1)->Content(); - if (itemType->Cast<TStructExprType>()->FindItem(alias)) { - auto issue = TIssue(ctx.Expr.GetPosition(input->Child(1)->Pos()), - TStringBuilder() << "Alias `" << alias << "` shadows column with the same name. It looks like comma is missed here. " - "If not, it is recommended to use ... AS `" << alias << "` to avoid confusion"); - SetIssueCode(EYqlIssueCode::TIssuesIds_EIssueCode_CORE_ALIAS_SHADOWS_COLUMN, issue); - if (!ctx.Expr.AddWarning(issue)) { - return IGraphTransformer::TStatus::Error; - } + if (!alias.empty()) { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Child(1)->Pos()), + TStringBuilder() << "Non-empty name '" << alias << "' is used with autoName set")); + return IGraphTransformer::TStatus::Error; } - auto newChildren = input->ChildrenList(); - // drop options - newChildren.pop_back(); - output = ctx.Expr.ChangeChildren(*input, std::move(newChildren)); - return IGraphTransformer::TStatus::Repeat; } + + if (seenOptions.contains("warnShadow")) { + warnShadow = true; + } + } + + if (warnShadow) { + auto alias = input->Child(1)->Content(); + if (itemType->Cast<TStructExprType>()->FindItem(alias)) { + auto issue = TIssue(ctx.Expr.GetPosition(input->Child(1)->Pos()), + TStringBuilder() << "Alias `" << alias << "` shadows column with the same name. It looks like comma is missed here. " + "If not, it is recommended to use ... AS `" << alias << "` to avoid confusion"); + SetIssueCode(EYqlIssueCode::TIssuesIds_EIssueCode_CORE_ALIAS_SHADOWS_COLUMN, issue); + if (!ctx.Expr.AddWarning(issue)) { + return IGraphTransformer::TStatus::Error; + } + } + auto newOptions = RemoveSetting(*input->Child(3), "warnShadow", ctx.Expr); + output = ctx.Expr.ChangeChild(*input, 3, std::move(newOptions)); + return IGraphTransformer::TStatus::Repeat; } auto& lambda = input->ChildRef(2); @@ -9342,6 +9389,100 @@ template <NKikimr::NUdf::EDataSlot DataSlot> return IGraphTransformer::TStatus::Ok; } + IGraphTransformer::TStatus SqlRenameWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx) { + if (!EnsureArgsCount(*input, 2, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + if (IsEmptyList(input->Head())) { + output = input->HeadPtr(); + return IGraphTransformer::TStatus::Repeat; + } + + const TTypeAnnotationNode* itemType = nullptr; + if (!EnsureNewSeqType<false>(input->Head(), ctx.Expr, &itemType)) { + return IGraphTransformer::TStatus::Error; + } + + if (!itemType) { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Head().Pos()), + TStringBuilder() << "Expected Struct as a sequence item type, but got lambda")); + return IGraphTransformer::TStatus::Error; + } + + + if (!EnsureStructType(input->Head().Pos(), *itemType, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + const TStructExprType* structType = itemType->Cast<TStructExprType>(); + const ui32 numColumns = structType->GetSize(); + if (!EnsureTupleSize(*input->Child(1), numColumns, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + if (!EnsureTupleOfAtoms(*input->Child(1), ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + auto childColumnOrder = ctx.Types.LookupColumnOrder(input->Head()); + if (!childColumnOrder.Defined()) { + // somewhat ugly attempt to find SqlProject to obtain column order + auto currInput = input->HeadPtr(); + TString path = ToString(input->Content()); + while (currInput->IsCallable({"PersistableRepr", "SqlAggregateAll", "RemoveSystemMembers", "Sort"})) { + path = path + " -> " + ToString(currInput->Content()); + currInput = currInput->HeadPtr(); + } + if (!currInput->IsCallable({"SqlProject", "OrderedSqlProject"})) { + path = path + " -> " + ToString(currInput->Content()); + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Head().Pos()), + TStringBuilder() << "Failed to deduce column order for input - unable to locate SqlProject: " << path)); + return IGraphTransformer::TStatus::Error; + } + + childColumnOrder.ConstructInPlace(); + for (const auto& item : currInput->Child(1)->ChildrenList()) { + if (!item->IsCallable("SqlProjectItem")) { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(item->Pos()), + TStringBuilder() << "Failed to deduce column order for input - star / qualified star is prosent in projection")); + return IGraphTransformer::TStatus::Error; + } + childColumnOrder->push_back(ToString(item->Child(1)->Content())); + } + + } + YQL_ENSURE(childColumnOrder->size() == numColumns); + + output = ctx.Expr.Builder(input->Pos()) + .Callable("AssumeColumnOrder") + .Callable(0, input->IsCallable("OrderedSqlRename") ? "OrderedMap" : "Map") + .Add(0, input->HeadPtr()) + .Lambda(1) + .Param("item") + .Callable("AsStruct") + .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& { + for (ui32 i = 0; i < numColumns; ++i) { + parent + .List(i) + .Add(0, input->Child(1)->ChildPtr(i)) + .Callable(1, "Member") + .Arg(0, "item") + .Atom(1, (*childColumnOrder)[i]) + .Seal() + .Seal(); + } + return parent; + }) + .Seal() + .Seal() + .Seal() + .Add(1, input->ChildPtr(1)) + .Seal() + .Build(); + return IGraphTransformer::TStatus::Repeat; + } + IGraphTransformer::TStatus SqlTypeFromYsonWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { if (!EnsureArgsCount(*input, 1, ctx.Expr)) { return IGraphTransformer::TStatus::Error; @@ -11934,6 +12075,9 @@ template <NKikimr::NUdf::EDataSlot DataSlot> ExtFunctions["BlockMergeFinalizeHashed"] = &BlockMergeFinalizeHashedWrapper; ExtFunctions["BlockMergeManyFinalizeHashed"] = &BlockMergeFinalizeHashedWrapper; + ExtFunctions["SqlRename"] = &SqlRenameWrapper; + ExtFunctions["OrderedSqlRename"] = &SqlRenameWrapper; + Functions["AsRange"] = &AsRangeWrapper; Functions["RangeCreate"] = &RangeCreateWrapper; Functions["RangeEmpty"] = &RangeEmptyWrapper; diff --git a/ydb/library/yql/providers/result/provider/yql_result_provider.cpp b/ydb/library/yql/providers/result/provider/yql_result_provider.cpp index 42a541f5c58..f3476eb9517 100644 --- a/ydb/library/yql/providers/result/provider/yql_result_provider.cpp +++ b/ydb/library/yql/providers/result/provider/yql_result_provider.cpp @@ -96,18 +96,19 @@ namespace { IGraphTransformer::TStatus ValidateColumns(TExprNode::TPtr& columns, const TTypeAnnotationNode* listType, TExprContext& ctx) { bool hasPrefixes = false; + bool hasAutoNames = false; for (auto& child : columns->Children()) { if (HasError(child->GetTypeAnn(), ctx)) { return IGraphTransformer::TStatus::Error; } if (!child->IsAtom() && !child->IsList()) { - ctx.AddError(TIssue(ctx.GetPosition(child->Pos()), "either atom or tuple with prefix is expected")); + ctx.AddError(TIssue(ctx.GetPosition(child->Pos()), "either atom or tuple is expected")); return IGraphTransformer::TStatus::Error; } if (child->IsList()) { - if (!EnsureTupleSize(*child, 2, ctx)) { + if (!EnsureTupleMinSize(*child, 1, ctx)) { return IGraphTransformer::TStatus::Error; } @@ -115,17 +116,24 @@ namespace { return IGraphTransformer::TStatus::Error; } - if (!EnsureAtom(*child->Child(1), ctx)) { - return IGraphTransformer::TStatus::Error; - } - - if (child->Child(0)->Content() != "prefix") { + if (child->Child(0)->Content() == "prefix") { + if (!EnsureTupleSize(*child, 2, ctx)) { + return IGraphTransformer::TStatus::Error; + } + if (!EnsureAtom(*child->Child(1), ctx)) { + return IGraphTransformer::TStatus::Error; + } + hasPrefixes = true; + } else if (child->Child(0)->Content() == "auto") { + if (!EnsureTupleSize(*child, 1, ctx)) { + return IGraphTransformer::TStatus::Error; + } + hasAutoNames = true; + } else { ctx.AddError(TIssue(ctx.GetPosition(child->Pos()), TStringBuilder() << - "Expected 'prefix', but got: " << child->Child(0)->Content())); + "Expected 'prefix' or 'auto', but got: " << child->Child(0)->Content())); return IGraphTransformer::TStatus::Error; } - - hasPrefixes = true; } } @@ -147,27 +155,46 @@ namespace { auto structType = itemType->Cast<TStructExprType>(); TSet<TString> usedFields; TExprNode::TListType orderedFields; - for (auto& child : columns->Children()) { - TVector<TStringBuf> names; + for (size_t i = 0; i < columns->ChildrenSize(); ++i) { + auto child = columns->ChildPtr(i); if (child->IsAtom()) { orderedFields.push_back(child); if (!structType->FindItem(child->Content())) { + if (hasAutoNames) { + columns = {}; + return IGraphTransformer::TStatus(IGraphTransformer::TStatus::Repeat, true); + } ctx.AddError(TIssue(ctx.GetPosition(child->Pos()), TStringBuilder() << "Unknown field in hint: " << child->Content())); return IGraphTransformer::TStatus::Error; } if (!usedFields.insert(TString(child->Content())).second) { + if (hasAutoNames) { + columns = {}; + return IGraphTransformer::TStatus(IGraphTransformer::TStatus::Repeat, true); + } ctx.AddError(TIssue(ctx.GetPosition(child->Pos()), TStringBuilder() << "Duplicate field in hint: " << child->Content())); return IGraphTransformer::TStatus::Error; } + } else if (child->Child(0)->Content() == "auto") { + TString columnName = "column" + ToString(i); + if (!structType->FindItem(columnName) || !usedFields.insert(columnName).second) { + columns = {}; + return IGraphTransformer::TStatus(IGraphTransformer::TStatus::Repeat, true); + } + orderedFields.push_back(ctx.NewAtom(child->Pos(), columnName)); } else { auto prefix = child->Child(1)->Content(); for (auto& x : structType->GetItems()) { if (x->GetName().StartsWith(prefix)) { orderedFields.push_back(ctx.NewAtom(child->Pos(), x->GetName())); if (!usedFields.insert(TString(x->GetName())).second) { + if (hasAutoNames) { + columns = {}; + return IGraphTransformer::TStatus(IGraphTransformer::TStatus::Repeat, true); + } ctx.AddError(TIssue(ctx.GetPosition(child->Pos()), TStringBuilder() << "Duplicate field in hint: " << x->GetName())); return IGraphTransformer::TStatus::Error; @@ -178,13 +205,17 @@ namespace { } if (usedFields.size() != structType->GetSize()) { + if (hasAutoNames) { + columns = {}; + return IGraphTransformer::TStatus(IGraphTransformer::TStatus::Repeat, true); + } ctx.AddError(TIssue(ctx.GetPosition(columns->Pos()), TStringBuilder() << "Mismatch of fields in hint and in the struct, columns fields: " << usedFields.size() << ", struct fields:" << structType->GetSize())); return IGraphTransformer::TStatus::Error; } - if (hasPrefixes) { + if (hasPrefixes || hasAutoNames) { columns = ctx.NewList(columns->Pos(), std::move(orderedFields)); return IGraphTransformer::TStatus(IGraphTransformer::TStatus::Repeat, true); } @@ -1010,9 +1041,13 @@ namespace { auto status = ValidateColumns(columns, res.Data().Ref().GetTypeAnn(), ctx); if (status.Level != IGraphTransformer::TStatus::Ok) { if (status.Level == IGraphTransformer::TStatus::Repeat) { - auto newSetting = ctx.ChangeChild(*setting, 1, std::move(columns)); - auto newSettings = ctx.ChangeChild(*settings, settingPos, std::move(newSetting)); - output = ctx.ChangeChild(*input, 4, std::move(newSettings)); + if (!columns) { + output = ctx.ChangeChild(*input, 4, RemoveSetting(*input->Child(4), "columns", ctx)); + } else { + auto newSetting = ctx.ChangeChild(*setting, 1, std::move(columns)); + auto newSettings = ctx.ChangeChild(*settings, settingPos, std::move(newSetting)); + output = ctx.ChangeChild(*input, 4, std::move(newSettings)); + } } return status; diff --git a/ydb/library/yql/sql/v1/insert.cpp b/ydb/library/yql/sql/v1/insert.cpp index 00c63e3d24b..c17bb458c21 100644 --- a/ydb/library/yql/sql/v1/insert.cpp +++ b/ydb/library/yql/sql/v1/insert.cpp @@ -191,13 +191,18 @@ public: if (!Source->Init(ctx, src)) { return false; } - const auto& sourceColumns = Source->GetColumns(); - const auto numColumns = !ColumnsHint.empty() && sourceColumns ? sourceColumns->List.size() : 0; - if (ColumnsHint.size() != numColumns) { - ctx.Error(Pos) << "SELECT have " << numColumns << " columns, " << OperationHumanName << " expects: " << ColumnsHint.size(); - return false; - } + const size_t numColumns = ColumnsHint.size(); if (numColumns) { + const auto sourceColumns = Source->GetColumns(); + if (!sourceColumns || sourceColumns->All || sourceColumns->QualifiedAll) { + return true; + } + + if (numColumns != sourceColumns->List.size()) { + ctx.Error(Pos) << "SELECT have " << numColumns << " columns, " << OperationHumanName << " expects: " << ColumnsHint.size(); + return false; + } + TStringStream str; bool mismatchFound = false; for (size_t i = 0; i < numColumns; ++i) { @@ -223,18 +228,29 @@ public: TNodePtr Build(TContext& ctx) override { auto input = Source->Build(ctx); - if (ColumnsHint.empty() || !Source->GetColumns()) { + if (ColumnsHint.empty()) { return input; } + auto columns = Y(); + for (auto column: ColumnsHint) { + columns = L(columns, BuildQuotedAtom(Pos, column)); + } + const auto sourceColumns = Source->GetColumns(); + if (!sourceColumns || sourceColumns->All || sourceColumns->QualifiedAll || sourceColumns->HasUnnamed) { + // will try to resolve column mapping on type annotation stage + return Y("OrderedSqlRename", input, Q(columns)); + } + + YQL_ENSURE(sourceColumns->List.size() == ColumnsHint.size()); auto srcColumn = Source->GetColumns()->List.begin(); - auto structObj = Y("AsStruct"); // ordered struct + auto structObj = Y("AsStruct"); // ordered struct for (auto column: ColumnsHint) { structObj = L(structObj, Q(Y(BuildQuotedAtom(Pos, column), Y("Member", "row", BuildQuotedAtom(Pos, *srcColumn)) ))); ++srcColumn; } - return Y("OrderedMap", input, BuildLambda(Pos, Y("row"), structObj)); + return Y("AssumeColumnOrder", Y("OrderedMap", input, BuildLambda(Pos, Y("row"), structObj)), Q(columns)); } TNodePtr DoClone() const final { diff --git a/ydb/library/yql/sql/v1/node.cpp b/ydb/library/yql/sql/v1/node.cpp index b7cc07e3de5..79c92301dca 100644 --- a/ydb/library/yql/sql/v1/node.cpp +++ b/ydb/library/yql/sql/v1/node.cpp @@ -871,7 +871,17 @@ TNodePtr BuildQuotedAtom(TPosition pos, const TString& content, ui32 flags) { return new TQuotedAtomNode(pos, content, flags); } -bool TColumns::Add(const TString* column, bool countHint, bool isArtificial, bool isReliable, bool hasName) { +TString TColumns::AddUnnamed() { + TString desiredResult = TStringBuilder() << "column" << List.size(); + if (!All) { + HasUnnamed = true; + List.emplace_back(); + NamedColumns.push_back(false); + } + return desiredResult; +} + +bool TColumns::Add(const TString* column, bool countHint, bool isArtificial, bool isReliable) { if (!column || *column == "*") { if (!countHint) { SetAll(); @@ -892,7 +902,7 @@ bool TColumns::Add(const TString* column, bool countHint, bool isArtificial, boo } if (std::find(List.begin(), List.end(), *column) == List.end()) { List.push_back(*column); - NamedColumns.push_back(hasName); + NamedColumns.push_back(true); } return inserted; } @@ -903,7 +913,18 @@ void TColumns::Merge(const TColumns& columns) { if (columns.All) { SetAll(); } else { - for (auto& c: columns.List) { + YQL_ENSURE(columns.List.size() == columns.NamedColumns.size()); + size_t myUnnamed = NamedColumns.size() - std::accumulate(NamedColumns.begin(), NamedColumns.end(), 0); + size_t otherUnnamed = 0; + for (size_t i = 0; i < columns.List.size(); ++i) { + auto& c = columns.List[i]; + if (!columns.NamedColumns[i]) { + if (++otherUnnamed > myUnnamed) { + AddUnnamed(); + ++myUnnamed; + } + continue; + } if (columns.Real.contains(c)) { Add(&c, false, false); } @@ -912,6 +933,7 @@ void TColumns::Merge(const TColumns& columns) { } } HasUnreliable |= columns.HasUnreliable; + HasUnnamed |= columns.HasUnnamed; } } @@ -933,9 +955,26 @@ void TColumns::SetPrefix(const TString& prefix) { void TColumns::SetAll() { All = true; + QualifiedAll = false; Real.clear(); List.clear(); Artificial.clear(); + NamedColumns.clear(); + HasUnnamed = HasUnreliable = false; +} + +namespace { + +bool MaybeAutogenerated(const TString& name) { + TStringBuf prefix = "column"; + if (!name.StartsWith(prefix)) { + return false; + } + + TString suffix = name.substr(prefix.size()); + return !suffix.empty() && AllOf(suffix, [](const auto c) { return std::isdigit(c); }); +} + } bool TColumns::IsColumnPossible(TContext& ctx, const TString& name) { @@ -946,6 +985,13 @@ bool TColumns::IsColumnPossible(TContext& ctx, const TString& name) { if (ctx.SimpleColumns) { return true; } + if (HasUnnamed) { + const auto dotPos = name.find_first_of("."); + TString suffix = (dotPos == TString::npos) ? name : name.substr(dotPos + 1); + if (MaybeAutogenerated(suffix)) { + return true; + } + } for (const auto& real: Real) { const auto pos = real.find_first_of("*"); if (pos == TString::npos) { @@ -955,6 +1001,8 @@ bool TColumns::IsColumnPossible(TContext& ctx, const TString& name) { return true; } } + } else if (HasUnnamed && MaybeAutogenerated(name)) { + return true; } return false; } diff --git a/ydb/library/yql/sql/v1/node.h b/ydb/library/yql/sql/v1/node.h index 6d540b8253b..f79a2d7d809 100644 --- a/ydb/library/yql/sql/v1/node.h +++ b/ydb/library/yql/sql/v1/node.h @@ -553,8 +553,10 @@ namespace NSQLTranslationV1 { bool All = false; bool QualifiedAll = false; bool HasUnreliable = false; + bool HasUnnamed = false; - bool Add(const TString* column, bool countHint, bool isArtificial = false, bool isReliable = true, bool hasName = true); + bool Add(const TString* column, bool countHint, bool isArtificial = false, bool isReliable = true); + TString AddUnnamed(); void Merge(const TColumns& columns); void SetPrefix(const TString& prefix); void SetAll(); diff --git a/ydb/library/yql/sql/v1/select.cpp b/ydb/library/yql/sql/v1/select.cpp index f90ad44beb7..d7b1e57c311 100644 --- a/ydb/library/yql/sql/v1/select.cpp +++ b/ydb/library/yql/sql/v1/select.cpp @@ -119,10 +119,8 @@ public: if (IsSubquery()) { /// should be not used? auto columnsPtr = Source->GetColumns(); - if (columnsPtr && (columnsPtr->All || columnsPtr->QualifiedAll)) { + if (columnsPtr && (columnsPtr->All || columnsPtr->QualifiedAll || columnsPtr->List.size() == 1)) { Node = Y("SingleMember", Y("SqlAccess", Q("dict"), Y("Take", Node, Y("Uint64", Q("1"))), Y("Uint64", Q("0")))); - } else if (columnsPtr && columnsPtr->List.size() == 1) { - Node = Y("Member", Y("SqlAccess", Q("dict"), Y("Take", Node, Y("Uint64", Q("1"))), Y("Uint64", Q("0"))), Q(columnsPtr->List.front())); } else { ctx.Error(Pos) << "Source used in expression should contain one concrete column"; return false; @@ -582,10 +580,8 @@ public: } if (src && Subquery->GetSource()->IsSelect()) { auto columnsPtr = &Columns; - if (columnsPtr && (columnsPtr->All || columnsPtr->QualifiedAll)) { + if (columnsPtr && (columnsPtr->All || columnsPtr->QualifiedAll || columnsPtr->List.size() == 1)) { Node = Y("SingleMember", Y("SqlAccess", Q("dict"), Y("Take", Node, Y("Uint64", Q("1"))), Y("Uint64", Q("0")))); - } else if (columnsPtr && columnsPtr->List.size() == 1) { - Node = Y("Member", Y("SqlAccess", Q("dict"), Y("Take", Node, Y("Uint64", Q("1"))), Y("Uint64", Q("0"))), Q(columnsPtr->List.front())); } else { ctx.Error(Pos) << "Source used in expression should contain one concrete column"; return false; @@ -1877,7 +1873,7 @@ private: } else if (column) { label = isJoin && source && *source ? DotJoin(*source, *column) : *column; } else { - label = TStringBuilder() << "column" << Columns.List.size(); + label = Columns.AddUnnamed(); hasName = false; if (ctx.WarnUnnamedColumns) { ctx.Warning(term->GetPos(), TIssuesIds::YQL_UNNAMED_COLUMN) @@ -1885,10 +1881,9 @@ private: } } } - if (!Columns.Add(&label, false, false, true, hasName)) { + if (hasName && !Columns.Add(&label, false, false, true)) { ctx.Error(Pos) << "Duplicate column: " << label; hasError = true; - continue; } } @@ -2073,6 +2068,7 @@ private: } auto column = Columns.List.begin(); + auto isNamedColumn = Columns.NamedColumns.begin(); for (auto& term: Terms) { auto sourceName = term->GetSourceName(); if (!term->IsAsterisk()) { @@ -2088,10 +2084,13 @@ private: lambdaPos = term->GetPos(); aliasPos = term->GetLabelPos() ? *term->GetLabelPos() : lambdaPos; } - auto projectItem = Y("SqlProjectItem", "projectCoreType", BuildQuotedAtom(aliasPos, *column), BuildLambda(lambdaPos, Y("row"), body, "res")); + auto projectItem = Y("SqlProjectItem", "projectCoreType", BuildQuotedAtom(aliasPos, *isNamedColumn ? *column : ""), BuildLambda(lambdaPos, Y("row"), body, "res")); if (term->IsImplicitLabel() && ctx.WarnOnAnsiAliasShadowing) { projectItem = L(projectItem, Q(Y(Q(Y(Q("warnShadow")))))); } + if (!*isNamedColumn) { + projectItem = L(projectItem, Q(Y(Q(Y(Q("autoName")))))); + } sqlProjectArgs = L(sqlProjectArgs, projectItem); } else { auto terms = PrepareWithout(Y()); @@ -2121,6 +2120,7 @@ private: sqlProjectArgs = L(sqlProjectArgs, Y("SqlProjectStarItem", "projectCoreType", BuildQuotedAtom(Pos, *sourceName), BuildLambda(Pos, Y("row"), terms, "res"), Q(options))); } ++column; + ++isNamedColumn; } } @@ -2968,11 +2968,15 @@ public: auto columns = Source->GetColumns(); if (columns && !columns->All && !(columns->QualifiedAll && ctx.SimpleColumns)) { auto list = Y(); - for (auto& c: columns->List) { + YQL_ENSURE(columns->List.size() == columns->NamedColumns.size()); + for (size_t i = 0; i < columns->List.size(); ++i) { + auto& c = columns->List[i]; if (c.EndsWith('*')) { list = L(list, Q(Y(Q("prefix"), BuildQuotedAtom(Pos, c.substr(0, c.size() - 1))))); - } else { + } else if (columns->NamedColumns[i]) { list = L(list, BuildQuotedAtom(Pos, c)); + } else { + list = L(list, Q(Y(Q("auto")))); } } settings = L(settings, Q(Y(Q("columns"), Q(list)))); diff --git a/ydb/library/yql/sql/v1/sql_ut.cpp b/ydb/library/yql/sql/v1/sql_ut.cpp index ed8988c5b8c..19242aee27f 100644 --- a/ydb/library/yql/sql/v1/sql_ut.cpp +++ b/ydb/library/yql/sql/v1/sql_ut.cpp @@ -2860,15 +2860,11 @@ Y_UNIT_TEST_SUITE(SqlToYQLErrors) { } Y_UNIT_TEST(AutogenerationAliasWithCollisionConflict1) { - NYql::TAstParseResult res = SqlToYql("select LENGTH(Value), key as column0 from plato.Input;"); - UNIT_ASSERT(!res.Root); - UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Duplicate column: column0\n"); + UNIT_ASSERT(SqlToYql("select LENGTH(Value), key as column0 from plato.Input;").IsOk()); } Y_UNIT_TEST(AutogenerationAliasWithCollisionConflict2) { - NYql::TAstParseResult res = SqlToYql("select key as column1, LENGTH(Value) from plato.Input;"); - UNIT_ASSERT(!res.Root); - UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Duplicate column: column1\n"); + UNIT_ASSERT(SqlToYql("select key as column1, LENGTH(Value) from plato.Input;").IsOk()); } Y_UNIT_TEST(MissedSourceTableForQualifiedAsteriskOnSimpleSelect) { |