diff options
author | ssmike <ssmike@ydb.tech> | 2023-07-18 17:40:07 +0300 |
---|---|---|
committer | ssmike <ssmike@ydb.tech> | 2023-07-18 17:40:07 +0300 |
commit | bb605c86b04dd75721dddf111e53f5325a82a85b (patch) | |
tree | ecdf9257860c6e70ce5f26f0036e88a377d3f6a2 | |
parent | 5b8d459a92fed198035b8f70705bdb7540e1d69b (diff) | |
download | ydb-bb605c86b04dd75721dddf111e53f5325a82a85b.tar.gz |
Support unlimited ranges
7 files changed, 91 insertions, 35 deletions
diff --git a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp index 61a82c6d7d8..7fcae969e64 100644 --- a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp @@ -5986,7 +5986,16 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { map["RangeMultiply"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) { if (node->ChildrenSize() == 2 && node->Tail().IsCallable("RangeMultiply")) { - auto minLimit = ctx.NewCallable(node->Pos(), "Min", { node->HeadPtr(), node->Tail().HeadPtr() }); + auto first = node->HeadPtr(); + auto second = node->Tail().HeadPtr(); + TExprNode::TPtr minLimit; + if (first->IsCallable("Void")) { + minLimit = second; + } else if (second->IsCallable("Void")) { + minLimit = first; + } else { + minLimit = ctx.NewCallable(node->Pos(), "Min", { first , second }); + } YQL_CLOG(DEBUG, Core) << node->Content() << " over " << node->Tail().Content(); return ctx.ChangeChild(node->Tail(), 0, std::move(minLimit)); } diff --git a/ydb/library/yql/core/extract_predicate/extract_predicate.h b/ydb/library/yql/core/extract_predicate/extract_predicate.h index 83fc16110d3..475e2986808 100644 --- a/ydb/library/yql/core/extract_predicate/extract_predicate.h +++ b/ydb/library/yql/core/extract_predicate/extract_predicate.h @@ -6,7 +6,7 @@ namespace NYql { struct TPredicateExtractorSettings { - size_t MaxRanges = 10000; + TMaybe<size_t> MaxRanges = 10000; // should be less than Max<size_t>() due to integer overflow bool MergeAdjacentPointRanges = true; bool HaveNextValueCallable = false; bool BuildLiteralRange = false; diff --git a/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp b/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp index 40ef3047238..6b634f297f7 100644 --- a/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp +++ b/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp @@ -15,6 +15,21 @@ namespace { using namespace NNodes; using NUdf::TCastResultOptions; +TExprNode::TPtr BuildMultiplyLimit(TMaybe<size_t> limit, TExprContext& ctx, TPositionHandle pos) { + if (limit) { + return ctx.Builder(pos) + .Callable("Uint64") + .Atom(0, ToString(*limit), TNodeFlags::Default) + .Seal() + .Build(); + } else { + return ctx.Builder(pos) + .Callable("Void") + .Seal() + .Build(); + } +} + const TTypeAnnotationNode* GetBaseDataType(const TTypeAnnotationNode *type) { type = RemoveAllOptionals(type); return (type && type->GetKind() == ETypeAnnotationKind::Data) ? type : nullptr; @@ -903,15 +918,12 @@ TExprNode::TPtr BuildSingleComputeRange(const TStructExprType& rowType, if (!hasNot) { // IN = collection of point intervals body = ctx.Builder(pos) - .Callable("Take") - .Callable(0, "FlatMap") + .Callable("FlatMap") .Add(0, collection) .Lambda(1) .Param("item") .Callable("RangeMultiply") - .Callable(0, "Uint64") - .Atom(0, ToString(settings.MaxRanges), TNodeFlags::Default) - .Seal() + .Add(0, BuildMultiplyLimit(settings.MaxRanges, ctx, pos)) .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& { if (haveTuples) { const auto& types = compositeKeyType->Cast<TTupleExprType>()->GetItems(); @@ -943,29 +955,44 @@ TExprNode::TPtr BuildSingleComputeRange(const TStructExprType& rowType, .Seal() .Seal() .Seal() - .Callable(1, "Uint64") - // +1 is essential here - RangeMultiply will detect overflow by this extra item - .Atom(0, ToString(settings.MaxRanges + 1), TNodeFlags::Default) - .Seal() - .Seal() .Build(); + if (settings.MaxRanges) { + YQL_ENSURE(*settings.MaxRanges < Max<size_t>()); + body = ctx.Builder(pos) + .Callable("Take") + .Add(0, body) + .Callable(1, "Uint64") + // +1 is essential here - RangeMultiply will detect overflow by this extra item + .Atom(0, ToString(*settings.MaxRanges + 1), TNodeFlags::Default) + .Seal() + .Seal() + .Build(); + } body = ctx.NewCallable(pos, "Collect", { body }); - body = ctx.Builder(pos) - .Callable("IfStrict") - .Callable(0, ">") - .Callable(0, "Length") - .Add(0, body) + if (settings.MaxRanges) { + body = ctx.Builder(pos) + .Callable("IfStrict") + .Callable(0, ">") + .Callable(0, "Length") + .Add(0, body) + .Seal() + .Callable(1, "Uint64") + .Atom(0, ToString(*settings.MaxRanges), TNodeFlags::Default) + .Seal() .Seal() - .Callable(1, "Uint64") - .Atom(0, ToString(settings.MaxRanges), TNodeFlags::Default) + .Add(1, BuildFullRange(pos, rowType, keys, ctx)) + .Callable(2, "RangeUnion") + .Add(0, body) .Seal() .Seal() - .Add(1, BuildFullRange(pos, rowType, keys, ctx)) - .Callable(2, "RangeUnion") + .Build(); + } else { + body = ctx.Builder(pos) + .Callable("RangeUnion") .Add(0, body) .Seal() - .Seal() - .Build(); + .Build(); + } } else { YQL_ENSURE(false, "not supported yet, should be rejected earlier"); } @@ -1335,10 +1362,10 @@ bool IsRestTrue(const TExprNode& node) { return false; } -TExprNode::TPtr BuildRangeMultiply(TPositionHandle pos, size_t maxRanges, const TExprNodeList& toMultiply, TExprContext& ctx) { +TExprNode::TPtr BuildRangeMultiply(TPositionHandle pos, TMaybe<size_t> maxRanges, const TExprNodeList& toMultiply, TExprContext& ctx) { TExprNodeList args; args.reserve(toMultiply.size() + 1); - args.push_back(ctx.NewCallable(pos, "Uint64", { ctx.NewAtom(pos, ToString(maxRanges), TNodeFlags::Default) })); + args.push_back(BuildMultiplyLimit(maxRanges, ctx, pos)); args.insert(args.end(), toMultiply.begin(), toMultiply.end()); return ctx.NewCallable(pos, "RangeMultiply", std::move(args)); } @@ -1997,7 +2024,7 @@ TPredicateRangeExtractor::TBuildResult TPredicateRangeExtractor::BuildComputeNod if (result.ComputeNode) { result.ExpectedMaxRanges = CalcMaxRanges(rebuiltRange, indexKeysOrder); - if (result.ExpectedMaxRanges && *result.ExpectedMaxRanges < Settings.MaxRanges) { + if (result.ExpectedMaxRanges && (!Settings.MaxRanges || *result.ExpectedMaxRanges < *Settings.MaxRanges)) { TCoLambda lambda(result.PrunedLambda); auto newPred = MakePredicateFromPrunedRange(prunedRange, lambda.Args().Arg(0).Ptr(), ctx); diff --git a/ydb/library/yql/core/type_ann/type_ann_core.cpp b/ydb/library/yql/core/type_ann/type_ann_core.cpp index ed7de115d58..d6865af8fce 100644 --- a/ydb/library/yql/core/type_ann/type_ann_core.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_core.cpp @@ -11143,7 +11143,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> return IGraphTransformer::TStatus::Error; } - if (!EnsureSpecificDataType(input->Head(), EDataSlot::Uint64, ctx.Expr)) { + if (!IsVoidType(input->Head(), ctx.Expr) && !EnsureSpecificDataType(input->Head(), EDataSlot::Uint64, ctx.Expr)) { return IGraphTransformer::TStatus::Error; } diff --git a/ydb/library/yql/core/yql_expr_type_annotation.cpp b/ydb/library/yql/core/yql_expr_type_annotation.cpp index 4ca8c7ab1d1..a13ab51e7f8 100644 --- a/ydb/library/yql/core/yql_expr_type_annotation.cpp +++ b/ydb/library/yql/core/yql_expr_type_annotation.cpp @@ -2957,6 +2957,13 @@ bool EnsureDictType(TPositionHandle position, const TTypeAnnotationNode& type, T return true; } +bool IsVoidType(const TExprNode& node, TExprContext& ctx) { + if (HasError(node.GetTypeAnn(), ctx) || !node.GetTypeAnn()) { + return false; + } + return node.GetTypeAnn()->GetKind() == ETypeAnnotationKind::Void; +} + bool EnsureVoidType(const TExprNode& node, TExprContext& ctx) { if (HasError(node.GetTypeAnn(), ctx) || !node.GetTypeAnn()) { YQL_ENSURE(node.Type() == TExprNode::Lambda); @@ -2964,7 +2971,7 @@ bool EnsureVoidType(const TExprNode& node, TExprContext& ctx) { return false; } - if (node.GetTypeAnn()->GetKind() != ETypeAnnotationKind::Void) { + if (!IsVoidType(node, ctx)) { ctx.AddError(TIssue(ctx.GetPosition(node.Pos()), TStringBuilder() << "Expected void type, but got: " << *node.GetTypeAnn())); return false; } @@ -5487,7 +5494,7 @@ bool HasContextFuncs(const TExprNode& input) { return false; } - if (node.IsCallable({"AggApply","AggApplyState","AggApplyManyState","AggBlockApply","AggBlockApplyState"}) && + if (node.IsCallable({"AggApply","AggApplyState","AggApplyManyState","AggBlockApply","AggBlockApplyState"}) && node.Head().Content().StartsWith("pg_")) { needCtx = true; return false; diff --git a/ydb/library/yql/core/yql_expr_type_annotation.h b/ydb/library/yql/core/yql_expr_type_annotation.h index d3bf8512407..60d4eea91b0 100644 --- a/ydb/library/yql/core/yql_expr_type_annotation.h +++ b/ydb/library/yql/core/yql_expr_type_annotation.h @@ -135,6 +135,8 @@ bool EnsureDryType(TPositionHandle position, const TTypeAnnotationNode& type, TE bool EnsureDryType(const TExprNode& node, TExprContext& ctx); bool EnsureDictType(const TExprNode& node, TExprContext& ctx); bool EnsureDictType(TPositionHandle position, const TTypeAnnotationNode& type, TExprContext& ctx); + +bool IsVoidType(const TExprNode& node, TExprContext& ctx); bool EnsureVoidType(const TExprNode& node, TExprContext& ctx); bool EnsureVoidLiteral(const TExprNode& node, TExprContext& ctx); bool EnsureCallableType(const TExprNode& node, TExprContext& ctx); @@ -321,7 +323,7 @@ bool GetMinMaxResultType(const TPositionHandle& pos, const TTypeAnnotationNode& IGraphTransformer::TStatus ExtractPgTypesFromMultiLambda(TExprNode::TPtr& lambda, TVector<ui32>& argTypes, bool& needRetype, TExprContext& ctx); -TExprNode::TPtr ExpandPgAggregationTraits(TPositionHandle pos, const NPg::TAggregateDesc& aggDesc, bool onWindow, +TExprNode::TPtr ExpandPgAggregationTraits(TPositionHandle pos, const NPg::TAggregateDesc& aggDesc, bool onWindow, const TExprNode::TPtr& lambda, const TVector<ui32>& argTypes, const TTypeAnnotationNode* itemType, TExprContext& ctx); const TTypeAnnotationNode* GetOriginalResultType(TPositionHandle pos, bool isMany, const TTypeAnnotationNode* originalExtractorType, TExprContext& ctx); diff --git a/ydb/library/yql/minikql/mkql_program_builder.cpp b/ydb/library/yql/minikql/mkql_program_builder.cpp index e85212138cd..add4a2b6d13 100644 --- a/ydb/library/yql/minikql/mkql_program_builder.cpp +++ b/ydb/library/yql/minikql/mkql_program_builder.cpp @@ -5266,9 +5266,14 @@ TRuntimeNode TProgramBuilder::RangeIntersect(const TArrayRef<const TRuntimeNode> TRuntimeNode TProgramBuilder::RangeMultiply(const TArrayRef<const TRuntimeNode>& args) { MKQL_ENSURE(args.size() >= 2, "Expecting at least two arguments"); - MKQL_ENSURE(args.front().GetStaticType()->IsData() && - static_cast<TDataType*>(args.front().GetStaticType())->GetSchemeType() == NUdf::TDataType<ui64>::Id, - "Expected ui64 as first argument"); + bool unlimited = false; + if (args.front().GetStaticType()->IsVoid()) { + unlimited = true; + } else { + MKQL_ENSURE(args.front().GetStaticType()->IsData() && + static_cast<TDataType*>(args.front().GetStaticType())->GetSchemeType() == NUdf::TDataType<ui64>::Id, + "Expected ui64 as first argument"); + } std::vector<TType*> outputComponents; for (size_t i = 1; i < args.size(); ++i) { @@ -5299,8 +5304,14 @@ TRuntimeNode TProgramBuilder::RangeMultiply(const TArrayRef<const TRuntimeNode>& auto outputRange = TTupleType::Create(outputRangeComps.size(), &outputRangeComps.front(), Env); TCallableBuilder callableBuilder(Env, __func__, TListType::Create(outputRange, Env)); - for (auto& arg : args) { - callableBuilder.Add(arg); + if (unlimited) { + callableBuilder.Add(NewDataLiteral<ui64>(std::numeric_limits<ui64>::max())); + } else { + callableBuilder.Add(args[0]); + } + + for (size_t i = 1; i < args.size(); ++i) { + callableBuilder.Add(args[i]); } return TRuntimeNode(callableBuilder.Build(), false); |