diff options
author | aneporada <[email protected]> | 2023-02-06 11:35:08 +0300 |
---|---|---|
committer | aneporada <[email protected]> | 2023-02-06 11:35:08 +0300 |
commit | 732d6740cb6e70e1cbe0dea39977f9d4ea7e1545 (patch) | |
tree | 17b3419ff402d3a3f90b8b0f8f8d9619e758a8e0 | |
parent | c22ac5f189df2b00d1f889210781b0b010d3e4b5 (diff) |
Implement and use unusedKeys hint for EquiJoin
-rw-r--r-- | ydb/library/yql/core/common_opt/yql_co_simple1.cpp | 89 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_join.cpp | 57 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_join.h | 3 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_opt_utils.cpp | 16 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_opt_utils.h | 2 |
5 files changed, 152 insertions, 15 deletions
diff --git a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp index d1d2bacf7c0..8e06f1f87f6 100644 --- a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp @@ -291,7 +291,7 @@ TExprNode::TPtr ExpandFlattenEquiJoin(const TExprNode::TPtr& node, TExprContext& return ctx.NewCallable(node->Pos(), "Map", { std::move(newJoin), std::move(mapLambda) }); } -void GatherEquiJoinKeyColumnsFromEquality(TExprNode::TPtr columns, THashSet<TString>& keyColumns) { +void GatherEquiJoinKeyColumnsFromEquality(TExprNode::TPtr columns, TSet<TString>& keyColumns) { for (ui32 i = 0; i < columns->ChildrenSize(); i += 2) { auto table = columns->Child(i)->Content(); auto column = columns->Child(i + 1)->Content(); @@ -299,21 +299,82 @@ void GatherEquiJoinKeyColumnsFromEquality(TExprNode::TPtr columns, THashSet<TStr } } -void GatherEquiJoinKeyColumns(TExprNode::TPtr joinTree, THashSet<TString>& keyColumns) { - auto left = joinTree->Child(1); +TExprNode::TPtr DoMarkUnusedKeyColumns(const TExprNode::TPtr& joinTree, THashSet<TString>& drops, THashSet<TString>& keyColumns, bool& needRebuild, bool topLevel, TExprContext& ctx) { + auto joinKind = joinTree->ChildPtr(0); + auto left = joinTree->ChildPtr(1); + auto right = joinTree->ChildPtr(2); + auto leftColumns = joinTree->Child(3); + auto rightColumns = joinTree->Child(4); + auto settings = joinTree->ChildPtr(5); + + TSet<TString> unusedKeys; + + TSet<TString> leftKeys; + GatherEquiJoinKeyColumnsFromEquality(leftColumns, leftKeys); + if (joinKind->Content() != "RightOnly" && joinKind->Content() != "RightSemi") { + for (auto& key : leftKeys) { + if (drops.contains(key)) { + unusedKeys.insert(key); + } + } + } + for (auto& key : leftKeys) { + drops.erase(key); + } + keyColumns.insert(leftKeys.begin(), leftKeys.end()); + + TSet<TString> rightKeys; + GatherEquiJoinKeyColumnsFromEquality(rightColumns, rightKeys); + if (joinKind->Content() != "LeftOnly" && joinKind->Content() != "LeftSemi") { + for (auto& key : rightKeys) { + if (drops.contains(key)) { + unusedKeys.insert(key); + } + } + } + for (auto& key : rightKeys) { + drops.erase(key); + } + keyColumns.insert(rightKeys.begin(), rightKeys.end()); + if (!left->IsAtom()) { - GatherEquiJoinKeyColumns(left, keyColumns); + left = DoMarkUnusedKeyColumns(left, drops, keyColumns, needRebuild, false, ctx); } - auto right = joinTree->Child(2); if (!right->IsAtom()) { - GatherEquiJoinKeyColumns(right, keyColumns); + right = DoMarkUnusedKeyColumns(right, drops, keyColumns, needRebuild, false, ctx); } - auto leftColumns = joinTree->Child(3); - auto rightColumns = joinTree->Child(4); - GatherEquiJoinKeyColumnsFromEquality(leftColumns, keyColumns); - GatherEquiJoinKeyColumnsFromEquality(rightColumns, keyColumns); + TSet<TString> currentUnusedKeys; + if (auto setting = GetSetting(*settings, "unusedKeys")) { + for (ui32 i = 1; i < setting->ChildrenSize(); ++i) { + currentUnusedKeys.insert(ToString(setting->Child(i)->Content())); + } + } + + if (!topLevel && currentUnusedKeys != unusedKeys) { + TExprNodeList settingValues; + settingValues.reserve(unusedKeys.size() + 1); + settingValues.push_back(ctx.NewAtom(settings->Pos(), "unusedKeys", TNodeFlags::Default)); + for (auto& key : unusedKeys) { + settingValues.push_back(ctx.NewAtom(settings->Pos(), key)); + } + settings = ReplaceSetting(*settings, ctx.NewList(settings->Pos(), std::move(settingValues)), ctx); + needRebuild = true; + } + + if (needRebuild) { + return ctx.NewList(joinTree->Pos(), { joinKind, left, right, leftColumns, rightColumns, settings }); + } + + return joinTree; +} + +TExprNode::TPtr MarkUnusedKeyColumns(const TExprNode::TPtr& joinTree, const TSet<TString>& drops, THashSet<TString>& keyColumns, TExprContext& ctx) { + bool needRebuild = false; + bool topLevel = true; + THashSet<TString> mutableDrops(drops.begin(), drops.end()); + return DoMarkUnusedKeyColumns(joinTree, mutableDrops, keyColumns, needRebuild, topLevel, ctx); } void GatherDroppedSingleTableColumns(TExprNode::TPtr joinTree, const TJoinLabels& labels, TSet<TString>& drops) { @@ -378,14 +439,18 @@ TExprNode::TPtr RemoveDeadPayloadColumns(const TExprNode::TPtr& node, TExprConte } } - auto joinTree = node->Child(node->ChildrenSize() - 2); + auto joinTree = node->ChildPtr(node->ChildrenSize() - 2); GatherDroppedSingleTableColumns(joinTree, labels, drops); if (drops.empty()) { return node; } THashSet<TString> keyColumns; - GatherEquiJoinKeyColumns(joinTree, keyColumns); + if (auto newTree = MarkUnusedKeyColumns(joinTree, drops, keyColumns, ctx); newTree != joinTree) { + YQL_CLOG(DEBUG, Core) << "MarkUnusedKeyColumns in EquiJoin"; + return ctx.ChangeChild(*node, node->ChildrenSize() - 2, std::move(newTree)); + } + for (auto& keyColumn : keyColumns) { drops.erase(keyColumn); } diff --git a/ydb/library/yql/core/yql_join.cpp b/ydb/library/yql/core/yql_join.cpp index af8d698ecf9..9b3b69ce45d 100644 --- a/ydb/library/yql/core/yql_join.cpp +++ b/ydb/library/yql/core/yql_join.cpp @@ -272,6 +272,16 @@ namespace { std::optional<std::unordered_set<std::string_view>> leftHints, rightHints; bool forceSortedMerge = false; + bool unusedKeysOption = false; + THashSet<TStringBuf> unusedKeys; + THashSet<TString> leftKeySet; + for (auto& [table, column] : leftKeys) { + leftKeySet.insert(FullColumnName(table, column)); + } + THashSet<TString> rightKeySet; + for (auto& [table, column] : rightKeys) { + rightKeySet.insert(FullColumnName(table, column)); + } for (auto child : linkOptions->Children()) { if (!EnsureTupleMinSize(*child, 1, ctx)) { return IGraphTransformer::TStatus::Error; @@ -321,6 +331,39 @@ namespace { } forceSortedMerge = true; } + else if (option.IsAtom("unusedKeys")) { + if (unusedKeysOption) { + ctx.AddError(TIssue(ctx.GetPosition(option.Pos()), TStringBuilder() << + "Duplicate " << option.Content() << " link option")); + return IGraphTransformer::TStatus::Error; + } + unusedKeysOption = true; + if (cross) { + ctx.AddError(TIssue(ctx.GetPosition(option.Pos()), TStringBuilder() << + "Link option " << option.Content() << " can not be used with CROSS JOIN")); + return IGraphTransformer::TStatus::Error; + } + for (ui32 i = 1; i < child->ChildrenSize(); ++i) { + bool isKey = false; + TStringBuf unusedKey = child->Child(i)->Content(); + if (singleSide) { + const auto& ks = leftSide ? leftKeySet : rightKeySet; + isKey = ks.contains(unusedKey); + } else { + isKey = leftKeySet.contains(unusedKey) || rightKeySet.contains(unusedKey); + } + if (!isKey) { + ctx.AddError(TIssue(ctx.GetPosition(option.Pos()), TStringBuilder() << + "Invalid key `" << unusedKey << "` for link option " << option.Content() << ", join type " << joinType.Content())); + return IGraphTransformer::TStatus::Error; + } + if (!unusedKeys.insert(unusedKey).second) { + ctx.AddError(TIssue(ctx.GetPosition(option.Pos()), TStringBuilder() << + "Duplicate key `" << unusedKey << "` for link option " << option.Content() )); + return IGraphTransformer::TStatus::Error; + } + } + } else { ctx.AddError(TIssue(ctx.GetPosition(option.Pos()), TStringBuilder() << "Unknown option name: " << option.Content())); @@ -1338,6 +1381,11 @@ TEquiJoinLinkSettings GetEquiJoinLinkSettings(const TExprNode& linkSettings) { } result.ForceSortedMerge = HasSetting(linkSettings, "forceSortedMerge"); + if (auto unusedKeys = GetSetting(linkSettings, "unusedKeys")) { + for (ui32 i = 1; i < unusedKeys->ChildrenSize(); ++i) { + result.UnusedKeyColumns.insert(ToString(unusedKeys->Child(i)->Content())); + } + } return result; } @@ -1371,6 +1419,15 @@ TExprNode::TPtr BuildEquiJoinLinkSettings(const TEquiJoinLinkSettings& linkSetti settings.push_back(builder("right")); } + if (!linkSettings.UnusedKeyColumns.empty()) { + TExprNodeList settingItems; + settingItems.push_back(ctx.NewAtom(linkSettings.Pos, "unusedKeys", TNodeFlags::Default)); + for (auto& key : linkSettings.UnusedKeyColumns) { + settingItems.push_back(ctx.NewAtom(linkSettings.Pos, key)); + } + settings.push_back(ctx.NewList(linkSettings.Pos, std::move(settingItems))); + } + return ctx.NewList(linkSettings.Pos, std::move(settings)); } diff --git a/ydb/library/yql/core/yql_join.h b/ydb/library/yql/core/yql_join.h index eed03c19734..bade80b4f17 100644 --- a/ydb/library/yql/core/yql_join.h +++ b/ydb/library/yql/core/yql_join.h @@ -142,6 +142,9 @@ struct TEquiJoinLinkSettings { TSet<TString> RightHints; // JOIN implementation may ignore this flags if SortedMerge strategy is not supported bool ForceSortedMerge = true; + // key columns that are unused after current JOIN - implementation may remove it immediately, + // otherwise they will be removed on top level + TSet<TString> UnusedKeyColumns; }; TEquiJoinLinkSettings GetEquiJoinLinkSettings(const TExprNode& linkSettings); diff --git a/ydb/library/yql/core/yql_opt_utils.cpp b/ydb/library/yql/core/yql_opt_utils.cpp index 34d68d6edfa..e00c7004585 100644 --- a/ydb/library/yql/core/yql_opt_utils.cpp +++ b/ydb/library/yql/core/yql_opt_utils.cpp @@ -405,7 +405,13 @@ TExprNode::TPtr RemoveSetting(const TExprNode& settings, const TStringBuf& name, } TExprNode::TPtr ReplaceSetting(const TExprNode& settings, TPositionHandle pos, const TString& name, const TExprNode::TPtr& value, TExprContext& ctx) { + auto newSetting = value ? ctx.NewList(pos, { ctx.NewAtom(pos, name), value }) : ctx.NewList(pos, { ctx.NewAtom(pos, name) }); + return ReplaceSetting(settings, newSetting, ctx); +} + +TExprNode::TPtr ReplaceSetting(const TExprNode& settings, const TExprNode::TPtr& newSetting, TExprContext& ctx) { TExprNode::TListType newChildren; + const TStringBuf name = newSetting->Head().Content(); for (auto setting : settings.Children()) { if (setting->ChildrenSize() != 0 && setting->Head().Content() == name) { continue; @@ -414,21 +420,25 @@ TExprNode::TPtr ReplaceSetting(const TExprNode& settings, TPositionHandle pos, c newChildren.push_back(setting); } - newChildren.push_back(value ? ctx.NewList(pos, { ctx.NewAtom(pos, name), value }) : ctx.NewList(pos, { ctx.NewAtom(pos, name) })); + newChildren.push_back(newSetting); auto ret = ctx.NewList(settings.Pos(), std::move(newChildren)); return ret; } TExprNode::TPtr AddSetting(const TExprNode& settings, TPositionHandle pos, const TString& name, const TExprNode::TPtr& value, TExprContext& ctx) { + auto newSetting = value ? ctx.NewList(pos, { ctx.NewAtom(pos, name), value }) : ctx.NewList(pos, { ctx.NewAtom(pos, name) }); + return AddSetting(settings, newSetting, ctx); +} + +TExprNode::TPtr AddSetting(const TExprNode& settings, const TExprNode::TPtr& newSetting, TExprContext& ctx) { auto newChildren = settings.ChildrenList(); - newChildren.push_back(value ? ctx.NewList(pos, { ctx.NewAtom(pos, name), value }) : ctx.NewList(pos, { ctx.NewAtom(pos, name) })); + newChildren.push_back(newSetting); auto ret = ctx.NewList(settings.Pos(), std::move(newChildren)); return ret; } - TExprNode::TPtr MergeSettings(const TExprNode& settings1, const TExprNode& settings2, TExprContext& ctx) { auto newChildren = settings1.ChildrenList(); newChildren.insert( diff --git a/ydb/library/yql/core/yql_opt_utils.h b/ydb/library/yql/core/yql_opt_utils.h index f6ec3cdf973..cd5edca6f3c 100644 --- a/ydb/library/yql/core/yql_opt_utils.h +++ b/ydb/library/yql/core/yql_opt_utils.h @@ -48,8 +48,10 @@ bool HasSetting(const TExprNode& settings, const TStringBuf& name); bool HasAnySetting(const TExprNode& settings, const THashSet<TString>& names); TExprNode::TPtr RemoveSetting(const TExprNode& settings, const TStringBuf& name, TExprContext& ctx); TExprNode::TPtr AddSetting(const TExprNode& settings, TPositionHandle pos, const TString& name, const TExprNode::TPtr& value, TExprContext& ctx); +TExprNode::TPtr AddSetting(const TExprNode& settings, const TExprNode::TPtr& newSetting, TExprContext& ctx); TExprNode::TPtr MergeSettings(const TExprNode& settings1, const TExprNode& settings2, TExprContext& ctx); TExprNode::TPtr ReplaceSetting(const TExprNode& settings, TPositionHandle pos, const TString& name, const TExprNode::TPtr& value, TExprContext& ctx); +TExprNode::TPtr ReplaceSetting(const TExprNode& settings, const TExprNode::TPtr& newSetting, TExprContext& ctx); TMaybe<TIssue> ParseToDictSettings(const TExprNode& node, TExprContext& ctx, TMaybe<bool>& isMany, TMaybe<bool>& isHashed, TMaybe<ui64>& itemsCount, bool& isCompact); |