aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoraneporada <aneporada@ydb.tech>2023-02-13 16:41:43 +0300
committeraneporada <aneporada@ydb.tech>2023-02-13 16:41:43 +0300
commitb4781eb14f1b19ff7bb90d703ebf9ad470aa1e89 (patch)
treed936866b36b71430a901462a8bb52a8fcabb64ae
parent3cdb92a4a42524588d1de86faa3e6fdc05d7001d (diff)
downloadydb-b4781eb14f1b19ff7bb90d703ebf9ad470aa1e89.tar.gz
Move detection of unused join keys to YT provider
It is too expensive (and not needed) to maintain unusedKey setting inside EquiJoin node
-rw-r--r--ydb/library/yql/core/common_opt/yql_co_simple1.cpp89
-rw-r--r--ydb/library/yql/core/yql_join.cpp57
-rw-r--r--ydb/library/yql/core/yql_join.h3
3 files changed, 12 insertions, 137 deletions
diff --git a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp
index ff36f9de63c..193d367d6ff 100644
--- a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp
+++ b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp
@@ -299,7 +299,7 @@ TExprNode::TPtr ExpandFlattenEquiJoin(const TExprNode::TPtr& node, TExprContext&
return ctx.NewCallable(node->Pos(), "Map", { std::move(newJoin), std::move(mapLambda) });
}
-void GatherEquiJoinKeyColumnsFromEquality(TExprNode::TPtr columns, TSet<TString>& keyColumns) {
+void GatherEquiJoinKeyColumnsFromEquality(TExprNode::TPtr columns, THashSet<TString>& keyColumns) {
for (ui32 i = 0; i < columns->ChildrenSize(); i += 2) {
auto table = columns->Child(i)->Content();
auto column = columns->Child(i + 1)->Content();
@@ -307,82 +307,21 @@ void GatherEquiJoinKeyColumnsFromEquality(TExprNode::TPtr columns, TSet<TString>
}
}
-TExprNode::TPtr DoMarkUnusedKeyColumns(const TExprNode::TPtr& joinTree, THashSet<TString>& drops, THashSet<TString>& keyColumns, bool& needRebuild, bool topLevel, TExprContext& ctx) {
- auto joinKind = joinTree->ChildPtr(0);
- auto left = joinTree->ChildPtr(1);
- auto right = joinTree->ChildPtr(2);
- auto leftColumns = joinTree->Child(3);
- auto rightColumns = joinTree->Child(4);
- auto settings = joinTree->ChildPtr(5);
-
- TSet<TString> unusedKeys;
-
- TSet<TString> leftKeys;
- GatherEquiJoinKeyColumnsFromEquality(leftColumns, leftKeys);
- if (joinKind->Content() != "RightOnly" && joinKind->Content() != "RightSemi") {
- for (auto& key : leftKeys) {
- if (drops.contains(key)) {
- unusedKeys.insert(key);
- }
- }
- }
- for (auto& key : leftKeys) {
- drops.erase(key);
- }
- keyColumns.insert(leftKeys.begin(), leftKeys.end());
-
- TSet<TString> rightKeys;
- GatherEquiJoinKeyColumnsFromEquality(rightColumns, rightKeys);
- if (joinKind->Content() != "LeftOnly" && joinKind->Content() != "LeftSemi") {
- for (auto& key : rightKeys) {
- if (drops.contains(key)) {
- unusedKeys.insert(key);
- }
- }
- }
- for (auto& key : rightKeys) {
- drops.erase(key);
- }
- keyColumns.insert(rightKeys.begin(), rightKeys.end());
-
+void GatherEquiJoinKeyColumns(TExprNode::TPtr joinTree, THashSet<TString>& keyColumns) {
+ auto left = joinTree->Child(1);
if (!left->IsAtom()) {
- left = DoMarkUnusedKeyColumns(left, drops, keyColumns, needRebuild, false, ctx);
+ GatherEquiJoinKeyColumns(left, keyColumns);
}
+ auto right = joinTree->Child(2);
if (!right->IsAtom()) {
- right = DoMarkUnusedKeyColumns(right, drops, keyColumns, needRebuild, false, ctx);
- }
-
- TSet<TString> currentUnusedKeys;
- if (auto setting = GetSetting(*settings, "unusedKeys")) {
- for (ui32 i = 1; i < setting->ChildrenSize(); ++i) {
- currentUnusedKeys.insert(ToString(setting->Child(i)->Content()));
- }
+ GatherEquiJoinKeyColumns(right, keyColumns);
}
- if (!topLevel && currentUnusedKeys != unusedKeys) {
- TExprNodeList settingValues;
- settingValues.reserve(unusedKeys.size() + 1);
- settingValues.push_back(ctx.NewAtom(settings->Pos(), "unusedKeys", TNodeFlags::Default));
- for (auto& key : unusedKeys) {
- settingValues.push_back(ctx.NewAtom(settings->Pos(), key));
- }
- settings = ReplaceSetting(*settings, ctx.NewList(settings->Pos(), std::move(settingValues)), ctx);
- needRebuild = true;
- }
-
- if (needRebuild) {
- return ctx.NewList(joinTree->Pos(), { joinKind, left, right, leftColumns, rightColumns, settings });
- }
-
- return joinTree;
-}
-
-TExprNode::TPtr MarkUnusedKeyColumns(const TExprNode::TPtr& joinTree, const TSet<TString>& drops, THashSet<TString>& keyColumns, TExprContext& ctx) {
- bool needRebuild = false;
- bool topLevel = true;
- THashSet<TString> mutableDrops(drops.begin(), drops.end());
- return DoMarkUnusedKeyColumns(joinTree, mutableDrops, keyColumns, needRebuild, topLevel, ctx);
+ auto leftColumns = joinTree->Child(3);
+ auto rightColumns = joinTree->Child(4);
+ GatherEquiJoinKeyColumnsFromEquality(leftColumns, keyColumns);
+ GatherEquiJoinKeyColumnsFromEquality(rightColumns, keyColumns);
}
void GatherDroppedSingleTableColumns(TExprNode::TPtr joinTree, const TJoinLabels& labels, TSet<TString>& drops) {
@@ -447,18 +386,14 @@ TExprNode::TPtr RemoveDeadPayloadColumns(const TExprNode::TPtr& node, TExprConte
}
}
- auto joinTree = node->ChildPtr(node->ChildrenSize() - 2);
+ auto joinTree = node->Child(node->ChildrenSize() - 2);
GatherDroppedSingleTableColumns(joinTree, labels, drops);
if (drops.empty()) {
return node;
}
THashSet<TString> keyColumns;
- if (auto newTree = MarkUnusedKeyColumns(joinTree, drops, keyColumns, ctx); newTree != joinTree) {
- YQL_CLOG(DEBUG, Core) << "MarkUnusedKeyColumns in EquiJoin";
- return ctx.ChangeChild(*node, node->ChildrenSize() - 2, std::move(newTree));
- }
-
+ GatherEquiJoinKeyColumns(joinTree, keyColumns);
for (auto& keyColumn : keyColumns) {
drops.erase(keyColumn);
}
diff --git a/ydb/library/yql/core/yql_join.cpp b/ydb/library/yql/core/yql_join.cpp
index 3b39cad7ecb..2a5d8037089 100644
--- a/ydb/library/yql/core/yql_join.cpp
+++ b/ydb/library/yql/core/yql_join.cpp
@@ -278,16 +278,6 @@ namespace {
std::optional<std::unordered_set<std::string_view>> leftHints, rightHints;
bool forceSortedMerge = false;
- bool unusedKeysOption = false;
- THashSet<TStringBuf> unusedKeys;
- THashSet<TString> leftKeySet;
- for (auto& [table, column] : leftKeys) {
- leftKeySet.insert(FullColumnName(table, column));
- }
- THashSet<TString> rightKeySet;
- for (auto& [table, column] : rightKeys) {
- rightKeySet.insert(FullColumnName(table, column));
- }
for (auto child : linkOptions->Children()) {
if (!EnsureTupleMinSize(*child, 1, ctx)) {
return IGraphTransformer::TStatus::Error;
@@ -337,39 +327,6 @@ namespace {
}
forceSortedMerge = true;
}
- else if (option.IsAtom("unusedKeys")) {
- if (unusedKeysOption) {
- ctx.AddError(TIssue(ctx.GetPosition(option.Pos()), TStringBuilder() <<
- "Duplicate " << option.Content() << " link option"));
- return IGraphTransformer::TStatus::Error;
- }
- unusedKeysOption = true;
- if (cross) {
- ctx.AddError(TIssue(ctx.GetPosition(option.Pos()), TStringBuilder() <<
- "Link option " << option.Content() << " can not be used with CROSS JOIN"));
- return IGraphTransformer::TStatus::Error;
- }
- for (ui32 i = 1; i < child->ChildrenSize(); ++i) {
- bool isKey = false;
- TStringBuf unusedKey = child->Child(i)->Content();
- if (singleSide) {
- const auto& ks = leftSide ? leftKeySet : rightKeySet;
- isKey = ks.contains(unusedKey);
- } else {
- isKey = leftKeySet.contains(unusedKey) || rightKeySet.contains(unusedKey);
- }
- if (!isKey) {
- ctx.AddError(TIssue(ctx.GetPosition(option.Pos()), TStringBuilder() <<
- "Invalid key `" << unusedKey << "` for link option " << option.Content() << ", join type " << joinType.Content()));
- return IGraphTransformer::TStatus::Error;
- }
- if (!unusedKeys.insert(unusedKey).second) {
- ctx.AddError(TIssue(ctx.GetPosition(option.Pos()), TStringBuilder() <<
- "Duplicate key `" << unusedKey << "` for link option " << option.Content() ));
- return IGraphTransformer::TStatus::Error;
- }
- }
- }
else {
ctx.AddError(TIssue(ctx.GetPosition(option.Pos()), TStringBuilder() <<
"Unknown option name: " << option.Content()));
@@ -1388,11 +1345,6 @@ TEquiJoinLinkSettings GetEquiJoinLinkSettings(const TExprNode& linkSettings) {
}
result.ForceSortedMerge = HasSetting(linkSettings, "forceSortedMerge");
- if (auto unusedKeys = GetSetting(linkSettings, "unusedKeys")) {
- for (ui32 i = 1; i < unusedKeys->ChildrenSize(); ++i) {
- result.UnusedKeyColumns.insert(ToString(unusedKeys->Child(i)->Content()));
- }
- }
return result;
}
@@ -1426,15 +1378,6 @@ TExprNode::TPtr BuildEquiJoinLinkSettings(const TEquiJoinLinkSettings& linkSetti
settings.push_back(builder("right"));
}
- if (!linkSettings.UnusedKeyColumns.empty()) {
- TExprNodeList settingItems;
- settingItems.push_back(ctx.NewAtom(linkSettings.Pos, "unusedKeys", TNodeFlags::Default));
- for (auto& key : linkSettings.UnusedKeyColumns) {
- settingItems.push_back(ctx.NewAtom(linkSettings.Pos, key));
- }
- settings.push_back(ctx.NewList(linkSettings.Pos, std::move(settingItems)));
- }
-
return ctx.NewList(linkSettings.Pos, std::move(settings));
}
diff --git a/ydb/library/yql/core/yql_join.h b/ydb/library/yql/core/yql_join.h
index a1428456c0c..f2638f49cf8 100644
--- a/ydb/library/yql/core/yql_join.h
+++ b/ydb/library/yql/core/yql_join.h
@@ -144,9 +144,6 @@ struct TEquiJoinLinkSettings {
TSet<TString> RightHints;
// JOIN implementation may ignore this flags if SortedMerge strategy is not supported
bool ForceSortedMerge = true;
- // key columns that are unused after current JOIN - implementation may remove it immediately,
- // otherwise they will be removed on top level
- TSet<TString> UnusedKeyColumns;
};
TEquiJoinLinkSettings GetEquiJoinLinkSettings(const TExprNode& linkSettings);