diff options
author | aneporada <aneporada@ydb.tech> | 2023-02-13 16:41:43 +0300 |
---|---|---|
committer | aneporada <aneporada@ydb.tech> | 2023-02-13 16:41:43 +0300 |
commit | b4781eb14f1b19ff7bb90d703ebf9ad470aa1e89 (patch) | |
tree | d936866b36b71430a901462a8bb52a8fcabb64ae | |
parent | 3cdb92a4a42524588d1de86faa3e6fdc05d7001d (diff) | |
download | ydb-b4781eb14f1b19ff7bb90d703ebf9ad470aa1e89.tar.gz |
Move detection of unused join keys to YT provider
It is too expensive (and not needed) to maintain unusedKey setting inside EquiJoin node
-rw-r--r-- | ydb/library/yql/core/common_opt/yql_co_simple1.cpp | 89 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_join.cpp | 57 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_join.h | 3 |
3 files changed, 12 insertions, 137 deletions
diff --git a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp index ff36f9de63c..193d367d6ff 100644 --- a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp @@ -299,7 +299,7 @@ TExprNode::TPtr ExpandFlattenEquiJoin(const TExprNode::TPtr& node, TExprContext& return ctx.NewCallable(node->Pos(), "Map", { std::move(newJoin), std::move(mapLambda) }); } -void GatherEquiJoinKeyColumnsFromEquality(TExprNode::TPtr columns, TSet<TString>& keyColumns) { +void GatherEquiJoinKeyColumnsFromEquality(TExprNode::TPtr columns, THashSet<TString>& keyColumns) { for (ui32 i = 0; i < columns->ChildrenSize(); i += 2) { auto table = columns->Child(i)->Content(); auto column = columns->Child(i + 1)->Content(); @@ -307,82 +307,21 @@ void GatherEquiJoinKeyColumnsFromEquality(TExprNode::TPtr columns, TSet<TString> } } -TExprNode::TPtr DoMarkUnusedKeyColumns(const TExprNode::TPtr& joinTree, THashSet<TString>& drops, THashSet<TString>& keyColumns, bool& needRebuild, bool topLevel, TExprContext& ctx) { - auto joinKind = joinTree->ChildPtr(0); - auto left = joinTree->ChildPtr(1); - auto right = joinTree->ChildPtr(2); - auto leftColumns = joinTree->Child(3); - auto rightColumns = joinTree->Child(4); - auto settings = joinTree->ChildPtr(5); - - TSet<TString> unusedKeys; - - TSet<TString> leftKeys; - GatherEquiJoinKeyColumnsFromEquality(leftColumns, leftKeys); - if (joinKind->Content() != "RightOnly" && joinKind->Content() != "RightSemi") { - for (auto& key : leftKeys) { - if (drops.contains(key)) { - unusedKeys.insert(key); - } - } - } - for (auto& key : leftKeys) { - drops.erase(key); - } - keyColumns.insert(leftKeys.begin(), leftKeys.end()); - - TSet<TString> rightKeys; - GatherEquiJoinKeyColumnsFromEquality(rightColumns, rightKeys); - if (joinKind->Content() != "LeftOnly" && joinKind->Content() != "LeftSemi") { - for (auto& key : rightKeys) { - if (drops.contains(key)) { - unusedKeys.insert(key); - } - } - } - for (auto& key : rightKeys) { - drops.erase(key); - } - keyColumns.insert(rightKeys.begin(), rightKeys.end()); - +void GatherEquiJoinKeyColumns(TExprNode::TPtr joinTree, THashSet<TString>& keyColumns) { + auto left = joinTree->Child(1); if (!left->IsAtom()) { - left = DoMarkUnusedKeyColumns(left, drops, keyColumns, needRebuild, false, ctx); + GatherEquiJoinKeyColumns(left, keyColumns); } + auto right = joinTree->Child(2); if (!right->IsAtom()) { - right = DoMarkUnusedKeyColumns(right, drops, keyColumns, needRebuild, false, ctx); - } - - TSet<TString> currentUnusedKeys; - if (auto setting = GetSetting(*settings, "unusedKeys")) { - for (ui32 i = 1; i < setting->ChildrenSize(); ++i) { - currentUnusedKeys.insert(ToString(setting->Child(i)->Content())); - } + GatherEquiJoinKeyColumns(right, keyColumns); } - if (!topLevel && currentUnusedKeys != unusedKeys) { - TExprNodeList settingValues; - settingValues.reserve(unusedKeys.size() + 1); - settingValues.push_back(ctx.NewAtom(settings->Pos(), "unusedKeys", TNodeFlags::Default)); - for (auto& key : unusedKeys) { - settingValues.push_back(ctx.NewAtom(settings->Pos(), key)); - } - settings = ReplaceSetting(*settings, ctx.NewList(settings->Pos(), std::move(settingValues)), ctx); - needRebuild = true; - } - - if (needRebuild) { - return ctx.NewList(joinTree->Pos(), { joinKind, left, right, leftColumns, rightColumns, settings }); - } - - return joinTree; -} - -TExprNode::TPtr MarkUnusedKeyColumns(const TExprNode::TPtr& joinTree, const TSet<TString>& drops, THashSet<TString>& keyColumns, TExprContext& ctx) { - bool needRebuild = false; - bool topLevel = true; - THashSet<TString> mutableDrops(drops.begin(), drops.end()); - return DoMarkUnusedKeyColumns(joinTree, mutableDrops, keyColumns, needRebuild, topLevel, ctx); + auto leftColumns = joinTree->Child(3); + auto rightColumns = joinTree->Child(4); + GatherEquiJoinKeyColumnsFromEquality(leftColumns, keyColumns); + GatherEquiJoinKeyColumnsFromEquality(rightColumns, keyColumns); } void GatherDroppedSingleTableColumns(TExprNode::TPtr joinTree, const TJoinLabels& labels, TSet<TString>& drops) { @@ -447,18 +386,14 @@ TExprNode::TPtr RemoveDeadPayloadColumns(const TExprNode::TPtr& node, TExprConte } } - auto joinTree = node->ChildPtr(node->ChildrenSize() - 2); + auto joinTree = node->Child(node->ChildrenSize() - 2); GatherDroppedSingleTableColumns(joinTree, labels, drops); if (drops.empty()) { return node; } THashSet<TString> keyColumns; - if (auto newTree = MarkUnusedKeyColumns(joinTree, drops, keyColumns, ctx); newTree != joinTree) { - YQL_CLOG(DEBUG, Core) << "MarkUnusedKeyColumns in EquiJoin"; - return ctx.ChangeChild(*node, node->ChildrenSize() - 2, std::move(newTree)); - } - + GatherEquiJoinKeyColumns(joinTree, keyColumns); for (auto& keyColumn : keyColumns) { drops.erase(keyColumn); } diff --git a/ydb/library/yql/core/yql_join.cpp b/ydb/library/yql/core/yql_join.cpp index 3b39cad7ecb..2a5d8037089 100644 --- a/ydb/library/yql/core/yql_join.cpp +++ b/ydb/library/yql/core/yql_join.cpp @@ -278,16 +278,6 @@ namespace { std::optional<std::unordered_set<std::string_view>> leftHints, rightHints; bool forceSortedMerge = false; - bool unusedKeysOption = false; - THashSet<TStringBuf> unusedKeys; - THashSet<TString> leftKeySet; - for (auto& [table, column] : leftKeys) { - leftKeySet.insert(FullColumnName(table, column)); - } - THashSet<TString> rightKeySet; - for (auto& [table, column] : rightKeys) { - rightKeySet.insert(FullColumnName(table, column)); - } for (auto child : linkOptions->Children()) { if (!EnsureTupleMinSize(*child, 1, ctx)) { return IGraphTransformer::TStatus::Error; @@ -337,39 +327,6 @@ namespace { } forceSortedMerge = true; } - else if (option.IsAtom("unusedKeys")) { - if (unusedKeysOption) { - ctx.AddError(TIssue(ctx.GetPosition(option.Pos()), TStringBuilder() << - "Duplicate " << option.Content() << " link option")); - return IGraphTransformer::TStatus::Error; - } - unusedKeysOption = true; - if (cross) { - ctx.AddError(TIssue(ctx.GetPosition(option.Pos()), TStringBuilder() << - "Link option " << option.Content() << " can not be used with CROSS JOIN")); - return IGraphTransformer::TStatus::Error; - } - for (ui32 i = 1; i < child->ChildrenSize(); ++i) { - bool isKey = false; - TStringBuf unusedKey = child->Child(i)->Content(); - if (singleSide) { - const auto& ks = leftSide ? leftKeySet : rightKeySet; - isKey = ks.contains(unusedKey); - } else { - isKey = leftKeySet.contains(unusedKey) || rightKeySet.contains(unusedKey); - } - if (!isKey) { - ctx.AddError(TIssue(ctx.GetPosition(option.Pos()), TStringBuilder() << - "Invalid key `" << unusedKey << "` for link option " << option.Content() << ", join type " << joinType.Content())); - return IGraphTransformer::TStatus::Error; - } - if (!unusedKeys.insert(unusedKey).second) { - ctx.AddError(TIssue(ctx.GetPosition(option.Pos()), TStringBuilder() << - "Duplicate key `" << unusedKey << "` for link option " << option.Content() )); - return IGraphTransformer::TStatus::Error; - } - } - } else { ctx.AddError(TIssue(ctx.GetPosition(option.Pos()), TStringBuilder() << "Unknown option name: " << option.Content())); @@ -1388,11 +1345,6 @@ TEquiJoinLinkSettings GetEquiJoinLinkSettings(const TExprNode& linkSettings) { } result.ForceSortedMerge = HasSetting(linkSettings, "forceSortedMerge"); - if (auto unusedKeys = GetSetting(linkSettings, "unusedKeys")) { - for (ui32 i = 1; i < unusedKeys->ChildrenSize(); ++i) { - result.UnusedKeyColumns.insert(ToString(unusedKeys->Child(i)->Content())); - } - } return result; } @@ -1426,15 +1378,6 @@ TExprNode::TPtr BuildEquiJoinLinkSettings(const TEquiJoinLinkSettings& linkSetti settings.push_back(builder("right")); } - if (!linkSettings.UnusedKeyColumns.empty()) { - TExprNodeList settingItems; - settingItems.push_back(ctx.NewAtom(linkSettings.Pos, "unusedKeys", TNodeFlags::Default)); - for (auto& key : linkSettings.UnusedKeyColumns) { - settingItems.push_back(ctx.NewAtom(linkSettings.Pos, key)); - } - settings.push_back(ctx.NewList(linkSettings.Pos, std::move(settingItems))); - } - return ctx.NewList(linkSettings.Pos, std::move(settings)); } diff --git a/ydb/library/yql/core/yql_join.h b/ydb/library/yql/core/yql_join.h index a1428456c0c..f2638f49cf8 100644 --- a/ydb/library/yql/core/yql_join.h +++ b/ydb/library/yql/core/yql_join.h @@ -144,9 +144,6 @@ struct TEquiJoinLinkSettings { TSet<TString> RightHints; // JOIN implementation may ignore this flags if SortedMerge strategy is not supported bool ForceSortedMerge = true; - // key columns that are unused after current JOIN - implementation may remove it immediately, - // otherwise they will be removed on top level - TSet<TString> UnusedKeyColumns; }; TEquiJoinLinkSettings GetEquiJoinLinkSettings(const TExprNode& linkSettings); |