diff options
author | a-romanov <Anton.Romanov@ydb.tech> | 2023-04-20 20:02:52 +0300 |
---|---|---|
committer | a-romanov <Anton.Romanov@ydb.tech> | 2023-04-20 20:02:52 +0300 |
commit | bf151d78b4f661464cebd573e17ec0771e450d24 (patch) | |
tree | 85cb57b3d6943b7e6b3c3a69af01e38e9b8606ed | |
parent | f576b57e89f1fc8d34332aa19629231e715743c8 (diff) | |
download | ydb-bf151d78b4f661464cebd573e17ec0771e450d24.tar.gz |
YQL-15415 YQL-15435 Suppres order on join inputs.
-rw-r--r-- | ydb/library/yql/core/common_opt/yql_co_simple1.cpp | 176 |
1 files changed, 129 insertions, 47 deletions
diff --git a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp index e6d73f249db..b8ddf4ba68e 100644 --- a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp @@ -1110,7 +1110,12 @@ TExprNode::TPtr OptimizeToOptional(const TExprNode::TPtr& node, TExprContext& ct return ctx.NewCallable(node->Head().Pos(), "Nothing", {ExpandType(node->Pos(), *node->GetTypeAnn(), ctx)}); } - if constexpr (!OrderAware) { + if constexpr (OrderAware) { + if (node->Head().IsCallable("Unordered")) { + YQL_CLOG(DEBUG, Core) << node->Content() << " over " << node->Head().Content(); + return ctx.RenameNode(*node, "ToOptional"); + } + } else { if (const auto sorted = node->Head().GetConstraint<TSortedConstraintNode>()) { YQL_CLOG(DEBUG, Core) << node->Content() << " over " << *sorted << ' ' << node->Head().Content(); return ctx.ChangeChild(*node, 0, ctx.NewCallable(node->Head().Pos(), "Unordered", {node->HeadPtr()})); @@ -2004,7 +2009,12 @@ TExprNode::TPtr BuildEquiJoinForSqlInChain(const TExprNode::TPtr& flatMapNode, c template <bool Ordered> TExprNode::TPtr SimpleFlatMap(const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { - if constexpr (!Ordered) { + if constexpr (Ordered) { + if (node->Head().IsCallable("Unordered")) { + YQL_CLOG(DEBUG, Core) << node->Content() << " over " << node->Head().Content(); + return ctx.RenameNode(*node, "FlatMap"); + } + } else { if (const auto sorted = node->Head().GetConstraint<TSortedConstraintNode>()) { YQL_CLOG(DEBUG, Core) << node->Content() << " over " << *sorted << ' ' << node->Head().Content(); return ctx.ChangeChild(*node, 0, ctx.NewCallable(node->Head().Pos(), "Unordered", {node->HeadPtr()})); @@ -3027,51 +3037,48 @@ TExprNode::TPtr PullAssumeColumnOrderOverEquiJoin(const TExprNode::TPtr& node, T return node; } -} // namespace - -TExprNode::TPtr TryConvertSqlInPredicatesToJoins(const TCoFlatMapBase& flatMap, - TShouldConvertSqlInToJoinPredicate shouldConvertSqlInToJoin, TExprContext& ctx, bool prefixOnly) -{ - // FlatMap input should be List<Struct<...>> to be accepted as EquiJoin input - auto inputType = flatMap.Input().Ref().GetTypeAnn(); - if (inputType->GetKind() != ETypeAnnotationKind::List || - inputType->Cast<TListExprType>()->GetItemType()->GetKind() != ETypeAnnotationKind::Struct) - { - return {}; +std::unordered_set<ui32> GetUselessSortedJoinInputs(const TCoEquiJoin& equiJoin) { + std::unordered_map<std::string_view, std::tuple<ui32, const TSortedConstraintNode*, const TChoppedConstraintNode*>> sorteds(equiJoin.ArgCount() - 2U); + for (ui32 i = 0U; i + 2U < equiJoin.ArgCount(); ++i) { + if (const auto joinInput = equiJoin.Arg(i).Cast<TCoEquiJoinInput>(); joinInput.Scope().Ref().IsAtom()) { + const auto sorted = joinInput.List().Ref().GetConstraint<TSortedConstraintNode>(); + const auto chopped = joinInput.List().Ref().GetConstraint<TChoppedConstraintNode>(); + if (sorted || chopped) + sorteds.emplace(joinInput.Scope().Ref().Content(), std::make_tuple(i, sorted, chopped)); + } } - TCoLambda lambda = flatMap.Lambda(); - if (!lambda.Body().Maybe<TCoConditionalValueBase>()) { - return {}; - } + for (std::vector<const TExprNode*> joinTreeNodes(1U, equiJoin.Arg(equiJoin.ArgCount() - 2).Raw()); !joinTreeNodes.empty();) { + const auto joinTree = joinTreeNodes.back(); + joinTreeNodes.pop_back(); - TCoConditionalValueBase conditional(lambda.Body().Ptr()); - TPredicateChain chain; - auto lambdaArg = lambda.Ptr()->Head().HeadPtr(); - auto sqlInTail = SplitPredicateChain(conditional.Predicate().Ptr(), lambdaArg, shouldConvertSqlInToJoin, chain, ctx); + if (!joinTree->Child(1)->IsAtom()) + joinTreeNodes.emplace_back(joinTree->Child(1)); - if (!chain.empty()) { - if (chain.front().ConvertibleToJoin) { - return ConvertSqlInPredicatesPrefixToJoins(flatMap.Ptr(), chain, sqlInTail, ctx); - } + if (!joinTree->Child(2)->IsAtom()) + joinTreeNodes.emplace_back(joinTree->Child(2)); - if (sqlInTail && !prefixOnly) { - YQL_CLOG(DEBUG, Core) << "FlatMapOverNonJoinableSqlInChain of size " << chain.size(); - TExprNode::TListType predicates; - predicates.reserve(chain.size()); - for (auto& it : chain) { - predicates.emplace_back(std::move(it.Pred)); - } - auto prefixPred = ctx.NewCallable(flatMap.Pos(), "And", std::move(predicates)); + if (!joinTree->Head().IsAtom("Cross")) { + std::unordered_map<std::string_view, TConstraintNode::TSetType> tableJoinKeys; + for (const auto keys : {joinTree->Child(3), joinTree->Child(4)}) + for (ui32 i = 0U; i < keys->ChildrenSize(); ++i) + tableJoinKeys[keys->Child(i)->Content()].insert_unique(TConstraintNode::TPathType(1U, keys->Child(++i)->Content())); - auto innerFlatMap = RebuildFlatmapOverPartOfPredicate(flatMap.Ptr(), flatMap.Input().Ptr(), prefixPred, false, ctx); - auto outerFlatMap = RebuildFlatmapOverPartOfPredicate(flatMap.Ptr(), innerFlatMap, sqlInTail, true, ctx); - return ctx.RenameNode(*outerFlatMap, - outerFlatMap->Content() == "OrderedFlatMap" ? "OrderedFlatMapToEquiJoin" : "FlatMapToEquiJoin"); + for (const auto& [label, joinKeys]: tableJoinKeys) { + if (const auto it = sorteds.find(label); sorteds.cend() != it) { + const auto sorted = std::get<const TSortedConstraintNode*>(it->second); + const auto chopped = std::get<const TChoppedConstraintNode*>(it->second); + if (sorted && sorted->StartsWith(joinKeys) || chopped && chopped->Equals(joinKeys)) + sorteds.erase(it); + } + } } } - return {}; + std::unordered_set<ui32> result(sorteds.size()); + for (const auto& sort : sorteds) + result.emplace(std::get<ui32>(sort.second)); + return result; } TExprNode::TPtr FoldParseAfterSerialize(const TExprNode::TPtr& node, const TStringBuf parseUdfName, const THashSet<TStringBuf>& serializeUdfNames) { @@ -3215,10 +3222,6 @@ TExprNode::TPtr BuildJsonParse(const TExprNode::TPtr& jsonExpr, TExprContext& ct .Done().Ptr(); } -TExprNode::TPtr BuildJsonParse(const TCoJsonQueryBase& jsonExpr, TExprContext& ctx) { - return BuildJsonParse(jsonExpr.Json().Ptr(), ctx); -} - TExprNode::TPtr GetJsonDocumentOrParseJson(const TExprNode::TPtr& jsonExpr, TExprContext& ctx, EDataSlot& argumentDataSlot) { const TTypeAnnotationNode* type = jsonExpr->GetTypeAnn(); if (type->GetKind() == ETypeAnnotationKind::Optional) { @@ -3299,7 +3302,12 @@ TExprNode::TPtr BuildJsonCompilePath(const TCoJsonQueryBase& jsonExpr, TExprCont template<bool Ordered> TExprNode::TPtr CanonizeMultiMap(const TExprNode::TPtr& node, TExprContext& ctx) { - if constexpr (!Ordered) { + if constexpr (Ordered) { + if (node->Head().IsCallable("Unordered")) { + YQL_CLOG(DEBUG, Core) << node->Content() << " over " << node->Head().Content(); + return ctx.RenameNode(*node, "MultiMap"); + } + } else { if (const auto sorted = node->Head().GetConstraint<TSortedConstraintNode>()) { YQL_CLOG(DEBUG, Core) << node->Content() << " over " << *sorted << ' ' << node->Head().Content(); return ctx.ChangeChild(*node, 0, ctx.NewCallable(node->Head().Pos(), "Unordered", {node->HeadPtr()})); @@ -3426,6 +3434,8 @@ TExprNode::TPtr OptimizeExtend(const TExprNode::TPtr& node, TExprContext& ctx, T return node; } +} // namespace + void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { using namespace std::placeholders; @@ -3671,6 +3681,11 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { }; map["OrderedFilter"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { + if (node->Head().IsCallable("Unordered")) { + YQL_CLOG(DEBUG, Core) << node->Content() << " over " << node->Head().Content(); + return ctx.RenameNode(*node, "Filter"); + } + YQL_CLOG(DEBUG, Core) << "Canonize " << node->Content(); return ConvertFilterToFlatmap<TCoOrderedFilter, TCoOrderedFlatMap>(TCoOrderedFilter(node), ctx, optCtx); }; @@ -3686,6 +3701,11 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { }; map["OrderedMap"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) { + if (node->Head().IsCallable("Unordered")) { + YQL_CLOG(DEBUG, Core) << node->Content() << " over " << node->Head().Content(); + return ctx.RenameNode(*node, "Map"); + } + YQL_CLOG(DEBUG, Core) << "Canonize " << node->Content(); return ConvertMapToFlatmap<TCoOrderedMap, TCoOrderedFlatMap>(TCoOrderedMap(node), ctx); }; @@ -4682,8 +4702,21 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { return ret; } - ui32 inputsCount = node->ChildrenSize() - 2; - for (ui32 i = 0; i < inputsCount; ++i) { + if (const auto indexes = GetUselessSortedJoinInputs(TCoEquiJoin(node)); !indexes.empty()) { + YQL_CLOG(DEBUG, Core) << "Suppress order on " << indexes.size() << ' ' << node->Content() << " inputs."; + auto children = node->ChildrenList(); + for (const auto idx : indexes) + children[idx] = ctx.Builder(children[idx]->Pos()) + .List() + .Callable(0, "Unordered") + .Add(0, children[idx]->HeadPtr()) + .Seal() + .Add(1, children[idx]->TailPtr()) + .Seal().Build(); + return ctx.ChangeChildren(*node, std::move(children)); + } + + for (ui32 i = 0U; i < node->ChildrenSize() - 2U; ++i) { if (IsListReorder(node->Child(i)->Head())) { YQL_CLOG(DEBUG, Core) << node->Content() << " with " << node->Child(i)->Content(); return ctx.ChangeChild(*node, i, ctx.ChangeChild(*node->Child(i), 0, node->Child(i)->Head().HeadPtr())); @@ -4728,12 +4761,12 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { map["AggrCountInit"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) { if (node->Head().GetTypeAnn()->GetKind() != ETypeAnnotationKind::Optional || node->Head().IsCallable("Just")) { YQL_CLOG(DEBUG, Core) << node->Content() << " - 1"; - return ctx.NewCallable(node->Pos(), "Uint64", { ctx.NewAtom(node->Pos(), "1", TNodeFlags::Default) }); + return ctx.NewCallable(node->Pos(), "Uint64", { ctx.NewAtom(node->Pos(), 1U) }); } if (node->Head().IsCallable("Nothing")) { YQL_CLOG(DEBUG, Core) << node->Content() << " - 0"; - return ctx.NewCallable(node->Pos(), "Uint64", { ctx.NewAtom(node->Pos(), "0", TNodeFlags::Default) }); + return ctx.NewCallable(node->Pos(), "Uint64", { ctx.NewAtom(node->Pos(), 0U) }); } return node; @@ -6708,4 +6741,53 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { }; } +TExprNode::TPtr TryConvertSqlInPredicatesToJoins(const TCoFlatMapBase& flatMap, + TShouldConvertSqlInToJoinPredicate shouldConvertSqlInToJoin, TExprContext& ctx, bool prefixOnly) +{ + // FlatMap input should be List<Struct<...>> to be accepted as EquiJoin input + auto inputType = flatMap.Input().Ref().GetTypeAnn(); + if (inputType->GetKind() != ETypeAnnotationKind::List || + inputType->Cast<TListExprType>()->GetItemType()->GetKind() != ETypeAnnotationKind::Struct) + { + return {}; + } + + TCoLambda lambda = flatMap.Lambda(); + if (!lambda.Body().Maybe<TCoConditionalValueBase>()) { + return {}; + } + + TCoConditionalValueBase conditional(lambda.Body().Ptr()); + TPredicateChain chain; + auto lambdaArg = lambda.Ptr()->Head().HeadPtr(); + auto sqlInTail = SplitPredicateChain(conditional.Predicate().Ptr(), lambdaArg, shouldConvertSqlInToJoin, chain, ctx); + + if (!chain.empty()) { + if (chain.front().ConvertibleToJoin) { + return ConvertSqlInPredicatesPrefixToJoins(flatMap.Ptr(), chain, sqlInTail, ctx); + } + + if (sqlInTail && !prefixOnly) { + YQL_CLOG(DEBUG, Core) << "FlatMapOverNonJoinableSqlInChain of size " << chain.size(); + TExprNode::TListType predicates; + predicates.reserve(chain.size()); + for (auto& it : chain) { + predicates.emplace_back(std::move(it.Pred)); + } + auto prefixPred = ctx.NewCallable(flatMap.Pos(), "And", std::move(predicates)); + + auto innerFlatMap = RebuildFlatmapOverPartOfPredicate(flatMap.Ptr(), flatMap.Input().Ptr(), prefixPred, false, ctx); + auto outerFlatMap = RebuildFlatmapOverPartOfPredicate(flatMap.Ptr(), innerFlatMap, sqlInTail, true, ctx); + return ctx.RenameNode(*outerFlatMap, + outerFlatMap->Content() == "OrderedFlatMap" ? "OrderedFlatMapToEquiJoin" : "FlatMapToEquiJoin"); + } + } + + return {}; +} + +TExprNode::TPtr BuildJsonParse(const TCoJsonQueryBase& jsonExpr, TExprContext& ctx) { + return BuildJsonParse(jsonExpr.Json().Ptr(), ctx); +} + } // namespace NYql |