aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authora-romanov <Anton.Romanov@ydb.tech>2023-04-20 20:02:52 +0300
committera-romanov <Anton.Romanov@ydb.tech>2023-04-20 20:02:52 +0300
commitbf151d78b4f661464cebd573e17ec0771e450d24 (patch)
tree85cb57b3d6943b7e6b3c3a69af01e38e9b8606ed
parentf576b57e89f1fc8d34332aa19629231e715743c8 (diff)
downloadydb-bf151d78b4f661464cebd573e17ec0771e450d24.tar.gz
YQL-15415 YQL-15435 Suppres order on join inputs.
-rw-r--r--ydb/library/yql/core/common_opt/yql_co_simple1.cpp176
1 files changed, 129 insertions, 47 deletions
diff --git a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp
index e6d73f249db..b8ddf4ba68e 100644
--- a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp
+++ b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp
@@ -1110,7 +1110,12 @@ TExprNode::TPtr OptimizeToOptional(const TExprNode::TPtr& node, TExprContext& ct
return ctx.NewCallable(node->Head().Pos(), "Nothing", {ExpandType(node->Pos(), *node->GetTypeAnn(), ctx)});
}
- if constexpr (!OrderAware) {
+ if constexpr (OrderAware) {
+ if (node->Head().IsCallable("Unordered")) {
+ YQL_CLOG(DEBUG, Core) << node->Content() << " over " << node->Head().Content();
+ return ctx.RenameNode(*node, "ToOptional");
+ }
+ } else {
if (const auto sorted = node->Head().GetConstraint<TSortedConstraintNode>()) {
YQL_CLOG(DEBUG, Core) << node->Content() << " over " << *sorted << ' ' << node->Head().Content();
return ctx.ChangeChild(*node, 0, ctx.NewCallable(node->Head().Pos(), "Unordered", {node->HeadPtr()}));
@@ -2004,7 +2009,12 @@ TExprNode::TPtr BuildEquiJoinForSqlInChain(const TExprNode::TPtr& flatMapNode, c
template <bool Ordered>
TExprNode::TPtr SimpleFlatMap(const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) {
- if constexpr (!Ordered) {
+ if constexpr (Ordered) {
+ if (node->Head().IsCallable("Unordered")) {
+ YQL_CLOG(DEBUG, Core) << node->Content() << " over " << node->Head().Content();
+ return ctx.RenameNode(*node, "FlatMap");
+ }
+ } else {
if (const auto sorted = node->Head().GetConstraint<TSortedConstraintNode>()) {
YQL_CLOG(DEBUG, Core) << node->Content() << " over " << *sorted << ' ' << node->Head().Content();
return ctx.ChangeChild(*node, 0, ctx.NewCallable(node->Head().Pos(), "Unordered", {node->HeadPtr()}));
@@ -3027,51 +3037,48 @@ TExprNode::TPtr PullAssumeColumnOrderOverEquiJoin(const TExprNode::TPtr& node, T
return node;
}
-} // namespace
-
-TExprNode::TPtr TryConvertSqlInPredicatesToJoins(const TCoFlatMapBase& flatMap,
- TShouldConvertSqlInToJoinPredicate shouldConvertSqlInToJoin, TExprContext& ctx, bool prefixOnly)
-{
- // FlatMap input should be List<Struct<...>> to be accepted as EquiJoin input
- auto inputType = flatMap.Input().Ref().GetTypeAnn();
- if (inputType->GetKind() != ETypeAnnotationKind::List ||
- inputType->Cast<TListExprType>()->GetItemType()->GetKind() != ETypeAnnotationKind::Struct)
- {
- return {};
+std::unordered_set<ui32> GetUselessSortedJoinInputs(const TCoEquiJoin& equiJoin) {
+ std::unordered_map<std::string_view, std::tuple<ui32, const TSortedConstraintNode*, const TChoppedConstraintNode*>> sorteds(equiJoin.ArgCount() - 2U);
+ for (ui32 i = 0U; i + 2U < equiJoin.ArgCount(); ++i) {
+ if (const auto joinInput = equiJoin.Arg(i).Cast<TCoEquiJoinInput>(); joinInput.Scope().Ref().IsAtom()) {
+ const auto sorted = joinInput.List().Ref().GetConstraint<TSortedConstraintNode>();
+ const auto chopped = joinInput.List().Ref().GetConstraint<TChoppedConstraintNode>();
+ if (sorted || chopped)
+ sorteds.emplace(joinInput.Scope().Ref().Content(), std::make_tuple(i, sorted, chopped));
+ }
}
- TCoLambda lambda = flatMap.Lambda();
- if (!lambda.Body().Maybe<TCoConditionalValueBase>()) {
- return {};
- }
+ for (std::vector<const TExprNode*> joinTreeNodes(1U, equiJoin.Arg(equiJoin.ArgCount() - 2).Raw()); !joinTreeNodes.empty();) {
+ const auto joinTree = joinTreeNodes.back();
+ joinTreeNodes.pop_back();
- TCoConditionalValueBase conditional(lambda.Body().Ptr());
- TPredicateChain chain;
- auto lambdaArg = lambda.Ptr()->Head().HeadPtr();
- auto sqlInTail = SplitPredicateChain(conditional.Predicate().Ptr(), lambdaArg, shouldConvertSqlInToJoin, chain, ctx);
+ if (!joinTree->Child(1)->IsAtom())
+ joinTreeNodes.emplace_back(joinTree->Child(1));
- if (!chain.empty()) {
- if (chain.front().ConvertibleToJoin) {
- return ConvertSqlInPredicatesPrefixToJoins(flatMap.Ptr(), chain, sqlInTail, ctx);
- }
+ if (!joinTree->Child(2)->IsAtom())
+ joinTreeNodes.emplace_back(joinTree->Child(2));
- if (sqlInTail && !prefixOnly) {
- YQL_CLOG(DEBUG, Core) << "FlatMapOverNonJoinableSqlInChain of size " << chain.size();
- TExprNode::TListType predicates;
- predicates.reserve(chain.size());
- for (auto& it : chain) {
- predicates.emplace_back(std::move(it.Pred));
- }
- auto prefixPred = ctx.NewCallable(flatMap.Pos(), "And", std::move(predicates));
+ if (!joinTree->Head().IsAtom("Cross")) {
+ std::unordered_map<std::string_view, TConstraintNode::TSetType> tableJoinKeys;
+ for (const auto keys : {joinTree->Child(3), joinTree->Child(4)})
+ for (ui32 i = 0U; i < keys->ChildrenSize(); ++i)
+ tableJoinKeys[keys->Child(i)->Content()].insert_unique(TConstraintNode::TPathType(1U, keys->Child(++i)->Content()));
- auto innerFlatMap = RebuildFlatmapOverPartOfPredicate(flatMap.Ptr(), flatMap.Input().Ptr(), prefixPred, false, ctx);
- auto outerFlatMap = RebuildFlatmapOverPartOfPredicate(flatMap.Ptr(), innerFlatMap, sqlInTail, true, ctx);
- return ctx.RenameNode(*outerFlatMap,
- outerFlatMap->Content() == "OrderedFlatMap" ? "OrderedFlatMapToEquiJoin" : "FlatMapToEquiJoin");
+ for (const auto& [label, joinKeys]: tableJoinKeys) {
+ if (const auto it = sorteds.find(label); sorteds.cend() != it) {
+ const auto sorted = std::get<const TSortedConstraintNode*>(it->second);
+ const auto chopped = std::get<const TChoppedConstraintNode*>(it->second);
+ if (sorted && sorted->StartsWith(joinKeys) || chopped && chopped->Equals(joinKeys))
+ sorteds.erase(it);
+ }
+ }
}
}
- return {};
+ std::unordered_set<ui32> result(sorteds.size());
+ for (const auto& sort : sorteds)
+ result.emplace(std::get<ui32>(sort.second));
+ return result;
}
TExprNode::TPtr FoldParseAfterSerialize(const TExprNode::TPtr& node, const TStringBuf parseUdfName, const THashSet<TStringBuf>& serializeUdfNames) {
@@ -3215,10 +3222,6 @@ TExprNode::TPtr BuildJsonParse(const TExprNode::TPtr& jsonExpr, TExprContext& ct
.Done().Ptr();
}
-TExprNode::TPtr BuildJsonParse(const TCoJsonQueryBase& jsonExpr, TExprContext& ctx) {
- return BuildJsonParse(jsonExpr.Json().Ptr(), ctx);
-}
-
TExprNode::TPtr GetJsonDocumentOrParseJson(const TExprNode::TPtr& jsonExpr, TExprContext& ctx, EDataSlot& argumentDataSlot) {
const TTypeAnnotationNode* type = jsonExpr->GetTypeAnn();
if (type->GetKind() == ETypeAnnotationKind::Optional) {
@@ -3299,7 +3302,12 @@ TExprNode::TPtr BuildJsonCompilePath(const TCoJsonQueryBase& jsonExpr, TExprCont
template<bool Ordered>
TExprNode::TPtr CanonizeMultiMap(const TExprNode::TPtr& node, TExprContext& ctx) {
- if constexpr (!Ordered) {
+ if constexpr (Ordered) {
+ if (node->Head().IsCallable("Unordered")) {
+ YQL_CLOG(DEBUG, Core) << node->Content() << " over " << node->Head().Content();
+ return ctx.RenameNode(*node, "MultiMap");
+ }
+ } else {
if (const auto sorted = node->Head().GetConstraint<TSortedConstraintNode>()) {
YQL_CLOG(DEBUG, Core) << node->Content() << " over " << *sorted << ' ' << node->Head().Content();
return ctx.ChangeChild(*node, 0, ctx.NewCallable(node->Head().Pos(), "Unordered", {node->HeadPtr()}));
@@ -3426,6 +3434,8 @@ TExprNode::TPtr OptimizeExtend(const TExprNode::TPtr& node, TExprContext& ctx, T
return node;
}
+} // namespace
+
void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) {
using namespace std::placeholders;
@@ -3671,6 +3681,11 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) {
};
map["OrderedFilter"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) {
+ if (node->Head().IsCallable("Unordered")) {
+ YQL_CLOG(DEBUG, Core) << node->Content() << " over " << node->Head().Content();
+ return ctx.RenameNode(*node, "Filter");
+ }
+
YQL_CLOG(DEBUG, Core) << "Canonize " << node->Content();
return ConvertFilterToFlatmap<TCoOrderedFilter, TCoOrderedFlatMap>(TCoOrderedFilter(node), ctx, optCtx);
};
@@ -3686,6 +3701,11 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) {
};
map["OrderedMap"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) {
+ if (node->Head().IsCallable("Unordered")) {
+ YQL_CLOG(DEBUG, Core) << node->Content() << " over " << node->Head().Content();
+ return ctx.RenameNode(*node, "Map");
+ }
+
YQL_CLOG(DEBUG, Core) << "Canonize " << node->Content();
return ConvertMapToFlatmap<TCoOrderedMap, TCoOrderedFlatMap>(TCoOrderedMap(node), ctx);
};
@@ -4682,8 +4702,21 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) {
return ret;
}
- ui32 inputsCount = node->ChildrenSize() - 2;
- for (ui32 i = 0; i < inputsCount; ++i) {
+ if (const auto indexes = GetUselessSortedJoinInputs(TCoEquiJoin(node)); !indexes.empty()) {
+ YQL_CLOG(DEBUG, Core) << "Suppress order on " << indexes.size() << ' ' << node->Content() << " inputs.";
+ auto children = node->ChildrenList();
+ for (const auto idx : indexes)
+ children[idx] = ctx.Builder(children[idx]->Pos())
+ .List()
+ .Callable(0, "Unordered")
+ .Add(0, children[idx]->HeadPtr())
+ .Seal()
+ .Add(1, children[idx]->TailPtr())
+ .Seal().Build();
+ return ctx.ChangeChildren(*node, std::move(children));
+ }
+
+ for (ui32 i = 0U; i < node->ChildrenSize() - 2U; ++i) {
if (IsListReorder(node->Child(i)->Head())) {
YQL_CLOG(DEBUG, Core) << node->Content() << " with " << node->Child(i)->Content();
return ctx.ChangeChild(*node, i, ctx.ChangeChild(*node->Child(i), 0, node->Child(i)->Head().HeadPtr()));
@@ -4728,12 +4761,12 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) {
map["AggrCountInit"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) {
if (node->Head().GetTypeAnn()->GetKind() != ETypeAnnotationKind::Optional || node->Head().IsCallable("Just")) {
YQL_CLOG(DEBUG, Core) << node->Content() << " - 1";
- return ctx.NewCallable(node->Pos(), "Uint64", { ctx.NewAtom(node->Pos(), "1", TNodeFlags::Default) });
+ return ctx.NewCallable(node->Pos(), "Uint64", { ctx.NewAtom(node->Pos(), 1U) });
}
if (node->Head().IsCallable("Nothing")) {
YQL_CLOG(DEBUG, Core) << node->Content() << " - 0";
- return ctx.NewCallable(node->Pos(), "Uint64", { ctx.NewAtom(node->Pos(), "0", TNodeFlags::Default) });
+ return ctx.NewCallable(node->Pos(), "Uint64", { ctx.NewAtom(node->Pos(), 0U) });
}
return node;
@@ -6708,4 +6741,53 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) {
};
}
+TExprNode::TPtr TryConvertSqlInPredicatesToJoins(const TCoFlatMapBase& flatMap,
+ TShouldConvertSqlInToJoinPredicate shouldConvertSqlInToJoin, TExprContext& ctx, bool prefixOnly)
+{
+ // FlatMap input should be List<Struct<...>> to be accepted as EquiJoin input
+ auto inputType = flatMap.Input().Ref().GetTypeAnn();
+ if (inputType->GetKind() != ETypeAnnotationKind::List ||
+ inputType->Cast<TListExprType>()->GetItemType()->GetKind() != ETypeAnnotationKind::Struct)
+ {
+ return {};
+ }
+
+ TCoLambda lambda = flatMap.Lambda();
+ if (!lambda.Body().Maybe<TCoConditionalValueBase>()) {
+ return {};
+ }
+
+ TCoConditionalValueBase conditional(lambda.Body().Ptr());
+ TPredicateChain chain;
+ auto lambdaArg = lambda.Ptr()->Head().HeadPtr();
+ auto sqlInTail = SplitPredicateChain(conditional.Predicate().Ptr(), lambdaArg, shouldConvertSqlInToJoin, chain, ctx);
+
+ if (!chain.empty()) {
+ if (chain.front().ConvertibleToJoin) {
+ return ConvertSqlInPredicatesPrefixToJoins(flatMap.Ptr(), chain, sqlInTail, ctx);
+ }
+
+ if (sqlInTail && !prefixOnly) {
+ YQL_CLOG(DEBUG, Core) << "FlatMapOverNonJoinableSqlInChain of size " << chain.size();
+ TExprNode::TListType predicates;
+ predicates.reserve(chain.size());
+ for (auto& it : chain) {
+ predicates.emplace_back(std::move(it.Pred));
+ }
+ auto prefixPred = ctx.NewCallable(flatMap.Pos(), "And", std::move(predicates));
+
+ auto innerFlatMap = RebuildFlatmapOverPartOfPredicate(flatMap.Ptr(), flatMap.Input().Ptr(), prefixPred, false, ctx);
+ auto outerFlatMap = RebuildFlatmapOverPartOfPredicate(flatMap.Ptr(), innerFlatMap, sqlInTail, true, ctx);
+ return ctx.RenameNode(*outerFlatMap,
+ outerFlatMap->Content() == "OrderedFlatMap" ? "OrderedFlatMapToEquiJoin" : "FlatMapToEquiJoin");
+ }
+ }
+
+ return {};
+}
+
+TExprNode::TPtr BuildJsonParse(const TCoJsonQueryBase& jsonExpr, TExprContext& ctx) {
+ return BuildJsonParse(jsonExpr.Json().Ptr(), ctx);
+}
+
} // namespace NYql