diff options
author | a-romanov <Anton.Romanov@ydb.tech> | 2023-05-02 11:18:36 +0300 |
---|---|---|
committer | a-romanov <Anton.Romanov@ydb.tech> | 2023-05-02 11:18:36 +0300 |
commit | 354ca8487cc41a6c2156236afb656c72528f629c (patch) | |
tree | 4e40aed593dc32f390608765a27a3c4f05729638 | |
parent | 966d37c5d0af38a01513800a699b10e71d4ca05b (diff) | |
download | ydb-354ca8487cc41a6c2156236afb656c72528f629c.tar.gz |
YQL-8971 YQL-15555 Keep constraints with complicated paths.
-rw-r--r-- | ydb/library/yql/core/common_opt/yql_co_simple1.cpp | 13 | ||||
-rw-r--r-- | ydb/library/yql/core/type_ann/type_ann_list.cpp | 32 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_expr_constraint.cpp | 34 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_expr_type_annotation.cpp | 101 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_expr_type_annotation.h | 7 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_opt_utils.cpp | 13 |
6 files changed, 142 insertions, 58 deletions
diff --git a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp index 34a764b64d..189d86adfa 100644 --- a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp @@ -83,7 +83,12 @@ public: }; bool CanRewriteToEmptyContainer(const TExprNode& src) { - if (auto multi = src.GetConstraint<TMultiConstraintNode>()) { + if (src.GetConstraint<TPartOfSortedConstraintNode>() || + src.GetConstraint<TPartOfChoppedConstraintNode>() || + src.GetConstraint<TPartOfUniqueConstraintNode>() || + src.GetConstraint<TPartOfDistinctConstraintNode>()) + return false; + if (const auto multi = src.GetConstraint<TMultiConstraintNode>()) { for (auto& item: multi->GetItems()) { for (auto c: item.second.GetAllConstraints()) { if (c->GetName() != TEmptyConstraintNode::Name()) { @@ -5341,14 +5346,14 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { if (const auto& inputToCheck = SkipCallables(node->Head(), SkippableCallables); IsEmptyContainer(inputToCheck) || IsEmpty(inputToCheck, *optCtx.Types)) { YQL_CLOG(DEBUG, Core) << "Empty " << node->Content(); - return ctx.Builder(node->Pos()) + return KeepConstraints(ctx.Builder(node->Pos()) .Callable(ETypeAnnotationKind::Flow == node->GetTypeAnn()->GetKind() ? "ToFlow" : "ToStream") .Callable(0, "Just") .Callable(0, "Dict") .Add(0, ExpandType(node->Pos(), GetSeqItemType(*node->GetTypeAnn()), ctx)) .Seal() .Seal() - .Seal().Build(); + .Seal().Build(), *node, ctx); } return node; @@ -5357,7 +5362,7 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { map["ToDict"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { if (const auto& inputToCheck = SkipCallables(node->Head(), SkippableCallables); IsEmptyContainer(inputToCheck) || IsEmpty(inputToCheck, *optCtx.Types)) { YQL_CLOG(DEBUG, Core) << "Empty " << node->Content(); - return ctx.NewCallable(inputToCheck.Pos(), "Dict", {ExpandType(node->Pos(), *node->GetTypeAnn(), ctx)}); + return KeepConstraints(ctx.NewCallable(inputToCheck.Pos(), "Dict", {ExpandType(node->Pos(), *node->GetTypeAnn(), ctx)}), *node, ctx); } if (node->Head().IsCallable("AsList") && node->Child(2)->Child(1)->IsCallable("Void")) { diff --git a/ydb/library/yql/core/type_ann/type_ann_list.cpp b/ydb/library/yql/core/type_ann/type_ann_list.cpp index d0855abdba..4af726c03f 100644 --- a/ydb/library/yql/core/type_ann/type_ann_list.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_list.cpp @@ -65,8 +65,8 @@ namespace { elemsCount = 1; } - auto atom0 = ctx.NewAtom(node.Pos(), "0"); - auto atom1 = ctx.NewAtom(node.Pos(), "1"); + auto atom0 = ctx.NewAtom(node.Pos(), 0U); + auto atom1 = ctx.NewAtom(node.Pos(), 1U); TExprNode::TPtr initArg1 = ctx.NewArgument(node.Pos(), "item"); TExprNode::TPtr initArg2 = ctx.NewArgument(node.Pos(), "parent"); TExprNode::TListType initBodyArgs; @@ -4355,30 +4355,15 @@ namespace { return IGraphTransformer::TStatus::Error; } - const TTypeAnnotationNode* inputItemType = nullptr; - if (!EnsureNewSeqType<false>(input->Head(), ctx.Expr, &inputItemType)) { + if (!EnsureAnySeqType(input->Head(), ctx.Expr)) { return IGraphTransformer::TStatus::Error; } if (input->ChildrenSize() > 1U) { - if (!EnsureStaticContainerType(input->Head().Pos(), *inputItemType, ctx.Expr)) { - return IGraphTransformer::TStatus::Error; - } - for (auto i = 1U; i < input->ChildrenSize(); ++i) { - if (const auto status = NormalizeTupleOfAtoms(input, i, output, ctx.Expr); status != IGraphTransformer::TStatus::Ok) { + if (const auto status = NormalizeTupleOfAtoms<true, true>(input, i, output, ctx.Expr); status != IGraphTransformer::TStatus::Ok) { return status; } - - if (ETypeAnnotationKind::Struct == inputItemType->GetKind()) { - const auto structType = inputItemType->Cast<TStructExprType>(); - for (const auto& x : input->Child(i)->Children()) { - YQL_ENSURE(x->IsAtom()); - if (!FindOrReportMissingMember(x->Content(), x->Pos(), *structType, ctx.Expr)) { - return IGraphTransformer::TStatus::Error; - } - } - } } } @@ -5331,7 +5316,7 @@ namespace { } if (hasOriginalType) { - auto originalExtractorType = input->Child(3)->GetTypeAnn()->Cast<TTypeExprType>()->GetType(); + auto originalExtractorType = input->Child(3)->GetTypeAnn()->Cast<TTypeExprType>()->GetType(); if (!ApplyOriginalType(input, isMany, originalExtractorType, ctx.Expr)) { return IGraphTransformer::TStatus::Error; } @@ -5351,7 +5336,7 @@ namespace { } if (hasOriginalType) { - auto originalExtractorType = input->Child(3)->GetTypeAnn()->Cast<TTypeExprType>()->GetType(); + auto originalExtractorType = input->Child(3)->GetTypeAnn()->Cast<TTypeExprType>()->GetType(); if (!ApplyOriginalType(input, isMany, originalExtractorType, ctx.Expr)) { return IGraphTransformer::TStatus::Error; } @@ -6034,7 +6019,7 @@ namespace { auto status = NormalizeTupleOfAtoms(input, 1, output, ctx.Expr); if (isSession) { - status = status.Combine(NormalizeTupleOfAtoms(input, 5, output, ctx.Expr, /*deduplicate=*/false)); + status = status.Combine(NormalizeTupleOfAtoms<false>(input, 5, output, ctx.Expr)); } if (status != IGraphTransformer::TStatus::Ok) { @@ -6099,8 +6084,7 @@ namespace { calc = ctx.Expr.ChangeChildren(*calc, std::move(calcItems)); status = status.Combine(IGraphTransformer::TStatus::Repeat); } else { - status = status.Combine(NormalizeTupleOfAtoms(calc, TCoCalcOverWindowTuple::idx_SessionColumns, calc, - ctx.Expr, /*deduplicate=*/false)); + status = status.Combine(NormalizeTupleOfAtoms<false>(calc, TCoCalcOverWindowTuple::idx_SessionColumns, calc, ctx.Expr)); } if (status.Level == IGraphTransformer::TStatus::Error) { diff --git a/ydb/library/yql/core/yql_expr_constraint.cpp b/ydb/library/yql/core/yql_expr_constraint.cpp index 3a153c2cdb..c6f45d4c54 100644 --- a/ydb/library/yql/core/yql_expr_constraint.cpp +++ b/ydb/library/yql/core/yql_expr_constraint.cpp @@ -384,8 +384,15 @@ private: for (auto i = 1U; i < input->ChildrenSize(); ++i) { TConstraintNode::TSetType columns; columns.reserve(input->Child(i)->ChildrenSize()); - for (const auto& column: input->Child(i)->Children()) - columns.insert_unique(TConstraintNode::TPathType(1U, column->Content())); + for (const auto& column: input->Child(i)->Children()) { + if (column->IsAtom()) + columns.insert_unique(TConstraintNode::TPathType(1U, column->Content())); + else if (column->IsList()) { + TConstraintNode::TPathType path(column->ChildrenSize()); + std::transform(column->Children().cbegin(), column->Children().cend(), path.begin(), [](const TExprNode::TPtr& atom) { return atom->Content(); } ); + columns.insert_unique(std::move(path)); + } + } sets.insert_unique(std::move(columns)); } @@ -393,6 +400,12 @@ private: sets.insert_unique(TConstraintNode::TSetType{TConstraintNode::TPathType()}); auto constraint = ctx.MakeConstraint<TUniqueConstraintNodeBase<Distinct>>(std::move(sets)); + if (!constraint->IsApplicableToType(*input->GetTypeAnn())) { + ctx.AddError(TIssue(ctx.GetPosition(input->Pos()), TStringBuilder() << *constraint + << " is not applicable to " << *input->GetTypeAnn())); + return IGraphTransformer::TStatus::Error; + } + if (const auto old = input->Head().GetConstraint<TUniqueConstraintNodeBase<Distinct>>()) { if (old->Includes(*constraint)) { output = input->HeadPtr(); @@ -410,12 +423,25 @@ private: for (auto i = 1U; i < input->ChildrenSize(); ++i) { TConstraintNode::TSetType columns; columns.reserve(input->Child(i)->ChildrenSize()); - for (const auto& column: input->Child(i)->Children()) - columns.insert_unique(TConstraintNode::TPathType(1U, column->Content())); + for (const auto& column: input->Child(i)->Children()) { + if (column->IsAtom()) + columns.insert_unique(TConstraintNode::TPathType(1U, column->Content())); + else if (column->IsList()) { + TConstraintNode::TPathType path(column->ChildrenSize()); + std::transform(column->Children().cbegin(), column->Children().cend(), path.begin(), [](const TExprNode::TPtr& atom) { return atom->Content(); } ); + columns.insert_unique(std::move(path)); + } + } sets.insert_unique(std::move(columns)); } const auto constraint = ctx.MakeConstraint<TChoppedConstraintNode>(std::move(sets)); + if (!constraint->IsApplicableToType(*input->GetTypeAnn())) { + ctx.AddError(TIssue(ctx.GetPosition(input->Pos()), TStringBuilder() << *constraint + << " is not applicable to " << *input->GetTypeAnn())); + return IGraphTransformer::TStatus::Error; + } + if (const auto old = input->Head().GetConstraint<TChoppedConstraintNode>()) { if (old->Equals(*constraint)) { output = input->HeadPtr(); diff --git a/ydb/library/yql/core/yql_expr_type_annotation.cpp b/ydb/library/yql/core/yql_expr_type_annotation.cpp index d916abd98d..621483765f 100644 --- a/ydb/library/yql/core/yql_expr_type_annotation.cpp +++ b/ydb/library/yql/core/yql_expr_type_annotation.cpp @@ -3498,6 +3498,37 @@ template bool EnsureNewSeqType<true, true, true>(const TExprNode& node, TExprCon template bool EnsureNewSeqType<false, true, true>(const TExprNode& node, TExprContext& ctx, const TTypeAnnotationNode** itemType); template bool EnsureNewSeqType<false, true, false>(const TExprNode& node, TExprContext& ctx, const TTypeAnnotationNode** itemType); +bool EnsureAnySeqType(const TExprNode& node, TExprContext& ctx) { + if (HasError(node.GetTypeAnn(), ctx)) { + return false; + } + + if (!node.GetTypeAnn()) { + YQL_ENSURE(node.Type() == TExprNode::Lambda); + ctx.AddError(TIssue(ctx.GetPosition(node.Pos()), "Expected flow, list, stream or dict, but got lambda.")); + return false; + } + + return EnsureAnySeqType(node.Pos(), *node.GetTypeAnn(), ctx); +} + +bool EnsureAnySeqType(TPositionHandle position, const TTypeAnnotationNode& type, TExprContext& ctx) { + if (HasError(&type, ctx)) { + return false; + } + + switch (type.GetKind()) { + case ETypeAnnotationKind::Flow: + case ETypeAnnotationKind::Stream: + case ETypeAnnotationKind::List: + case ETypeAnnotationKind::Dict: + return true; + default: break; + } + ctx.AddError(TIssue(ctx.GetPosition(position), TStringBuilder() << "Expected flow, list, stream or dict, but got: " << type)); + return false; +} + bool EnsureStructOrOptionalStructType(const TExprNode& node, TExprContext& ctx) { if (HasError(node.GetTypeAnn(), ctx)) { return false; @@ -5239,40 +5270,70 @@ bool IsSystemMember(const TStringBuf& memberName) { return memberName.StartsWith(TStringBuf("_yql_")); } -IGraphTransformer::TStatus NormalizeTupleOfAtoms(const TExprNode::TPtr& input, ui32 index, TExprNode::TPtr& output, TExprContext& ctx, - bool deduplicate) +template<bool Deduplicte, bool OrListsOfAtoms> +IGraphTransformer::TStatus NormalizeTupleOfAtoms(const TExprNode::TPtr& input, ui32 index, TExprNode::TPtr& output, TExprContext& ctx) { - if (!EnsureTupleOfAtoms(*input->Child(index), ctx)) { + auto children = input->Child(index)->ChildrenList(); + bool needRestart = false; + + if constexpr (OrListsOfAtoms) { + if (!EnsureTuple(*input->Child(index), ctx)) + return IGraphTransformer::TStatus::Error; + + for (auto i = 0U; i < children.size(); ++i) { + if (const auto item = input->Child(index)->Child(i); item->IsList()) { + if (1U == item->ChildrenSize() && item->Head().IsAtom()) { + needRestart = true; + children[i] = item->HeadPtr(); + } else if (!EnsureTupleOfAtoms(*item, ctx)) + return IGraphTransformer::TStatus::Error; + } else if (!EnsureAtom(*item, ctx)) + return IGraphTransformer::TStatus::Error; + } + } else if (!EnsureTupleOfAtoms(*input->Child(index), ctx)) return IGraphTransformer::TStatus::Error; - } - auto atomList = input->Child(index)->ChildrenList(); - bool needRestart = false; - auto getKey = [](const auto& node) { return node->Content(); }; - auto cmp = [&getKey](const auto& a, const auto& b) { return getKey(a) < getKey(b); }; - if (!IsSorted(atomList.begin(), atomList.end(), cmp)) { - if (deduplicate) { - SortUniqueBy(atomList, getKey); - } else { - Sort(atomList, cmp); + const auto getKey = [](const TExprNode::TPtr& node) { + if constexpr (OrListsOfAtoms) { + using TKeyType = TSmallVec<std::string_view>; + if (node->IsAtom()) + return TKeyType(1U, node->Content()); + + TKeyType result(node->ChildrenSize()); + std::transform(node->Children().cbegin(), node->Children().cend(), result.begin(), [](const TExprNode::TPtr& atom) { return atom->Content(); }); + return result; + } else + return node->Content(); + }; + const auto cmp = [&getKey](const TExprNode::TPtr& a, const TExprNode::TPtr& b) { return getKey(a) < getKey(b); }; + if (std::is_sorted(children.cbegin(), children.cend(), cmp)) { + if constexpr (Deduplicte) { + if (const auto dups = UniqueBy(children.begin(), children.end(), getKey); children.cend() != dups) { + needRestart = true; + children.erase(dups, children.cend()); + } } + } else { needRestart = true; - } else if (deduplicate) { - auto dups = UniqueBy(atomList.begin(), atomList.end(), getKey); - if (dups != atomList.end()) { - needRestart = true; - atomList.erase(dups, atomList.end()); + if constexpr (Deduplicte) { + SortUniqueBy(children, getKey); + } else { + SortBy(children, getKey); } } if (needRestart) { - output = ctx.ChangeChild(*input, index, ctx.NewList(input->Child(index)->Pos(), std::move(atomList))); + output = ctx.ChangeChild(*input, index, ctx.NewList(input->Child(index)->Pos(), std::move(children))); return IGraphTransformer::TStatus::Repeat; } return IGraphTransformer::TStatus::Ok; } +template IGraphTransformer::TStatus NormalizeTupleOfAtoms<true, true>(const TExprNode::TPtr& input, ui32 index, TExprNode::TPtr& output, TExprContext& ctx); +template IGraphTransformer::TStatus NormalizeTupleOfAtoms<true, false>(const TExprNode::TPtr& input, ui32 index, TExprNode::TPtr& output, TExprContext& ctx); +template IGraphTransformer::TStatus NormalizeTupleOfAtoms<false, false>(const TExprNode::TPtr& input, ui32 index, TExprNode::TPtr& output, TExprContext& ctx); + IGraphTransformer::TStatus NormalizeKeyValueTuples(const TExprNode::TPtr& input, ui32 startIndex, TExprNode::TPtr& output, TExprContext &ctx, bool deduplicate) { @@ -5474,7 +5535,7 @@ const TTypeAnnotationNode* AggApplySerializedStateType(const TExprNode::TPtr& in if (name == "sum") { return input->GetTypeAnn(); } - + const auto decimalType = lambdaType->Cast<TDataExprParamsType>(); stateValueType = ctx.MakeType<TDataExprParamsType>(EDataSlot::Decimal, "35", decimalType->GetParamTwo()); } else if (IsDataTypeInterval(lambdaTypeSlot)) { diff --git a/ydb/library/yql/core/yql_expr_type_annotation.h b/ydb/library/yql/core/yql_expr_type_annotation.h index 05d4d4152b..a7f4504522 100644 --- a/ydb/library/yql/core/yql_expr_type_annotation.h +++ b/ydb/library/yql/core/yql_expr_type_annotation.h @@ -169,6 +169,8 @@ template <bool WithOptional, bool WithList = true, bool WithStream = true> bool EnsureNewSeqType(const TExprNode& node, TExprContext& ctx, const TTypeAnnotationNode** itemType = nullptr); template <bool WithOptional, bool WithList = true, bool WithStream = true> bool EnsureNewSeqType(TPositionHandle position, const TTypeAnnotationNode& type, TExprContext& ctx, const TTypeAnnotationNode** itemType = nullptr); +bool EnsureAnySeqType(const TExprNode& node, TExprContext& ctx); +bool EnsureAnySeqType(TPositionHandle position, const TTypeAnnotationNode& type, TExprContext& ctx); bool EnsureDependsOn(const TExprNode& node, TExprContext& ctx); bool EnsureDependsOnTail(const TExprNode& node, TExprContext& ctx, unsigned requiredArgumentCount, unsigned requiredDependsOnCount = 0); @@ -291,8 +293,9 @@ TExprNode::TPtr ExpandType(TPositionHandle position, const TTypeAnnotationNode& bool IsSystemMember(const TStringBuf& memberName); -IGraphTransformer::TStatus NormalizeTupleOfAtoms(const TExprNode::TPtr& input, ui32 index, TExprNode::TPtr& output, TExprContext& ctx, - bool deduplicte = true); +template<bool Deduplicte = true, bool OrListsOfAtoms = false> +IGraphTransformer::TStatus NormalizeTupleOfAtoms(const TExprNode::TPtr& input, ui32 index, TExprNode::TPtr& output, TExprContext& ctx); + IGraphTransformer::TStatus NormalizeKeyValueTuples(const TExprNode::TPtr& input, ui32 startIndex, TExprNode::TPtr& output, TExprContext& ctx, bool deduplicate = false); diff --git a/ydb/library/yql/core/yql_opt_utils.cpp b/ydb/library/yql/core/yql_opt_utils.cpp index 8f0241665d..c2e503019c 100644 --- a/ydb/library/yql/core/yql_opt_utils.cpp +++ b/ydb/library/yql/core/yql_opt_utils.cpp @@ -27,11 +27,16 @@ TExprNode::TPtr KeepConstraint(TExprNode::TPtr node, const TExprNode& src, TExpr for (const auto& set : constraint->GetAllSets()) { TExprNode::TListType columns; columns.reserve(set.size()); - for (const auto& path : set) + for (const auto& path : set) { if (1U == path.size()) columns.emplace_back(ctx.NewAtom(pos, path.front())); - if (!columns.empty()) - children.emplace_back(ctx.NewList(pos, std::move(columns))); + else { + TExprNode::TListType atoms(path.size()); + std::transform(path.cbegin(), path.cend(), atoms.begin(), [&](const std::string_view& name) { return ctx.NewAtom(pos, name); }); + columns.emplace_back(ctx.NewList(pos, std::move(atoms))); + } + } + children.emplace_back(ctx.NewList(pos, std::move(columns))); } return ctx.NewCallable(pos, TString("Assume") += TConstraint::Name(), std::move(children)); } @@ -1149,7 +1154,7 @@ TExprNode::TPtr BuildKeySelector(TPositionHandle pos, const TStructExprType& row TExprNode::TPtr tuple; if (tupleItems.size() == 0) { - tuple = ctx.Builder(pos).Callable("Uint32").Atom(0, "0").Seal().Build(); + tuple = ctx.Builder(pos).Callable("Uint32").Atom(0, 0U).Seal().Build(); } else if (tupleItems.size() == 1) { tuple = tupleItems[0]; } else { |