summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoraneporada <[email protected]>2023-02-06 11:35:08 +0300
committeraneporada <[email protected]>2023-02-06 11:35:08 +0300
commit732d6740cb6e70e1cbe0dea39977f9d4ea7e1545 (patch)
tree17b3419ff402d3a3f90b8b0f8f8d9619e758a8e0
parentc22ac5f189df2b00d1f889210781b0b010d3e4b5 (diff)
Implement and use unusedKeys hint for EquiJoin
-rw-r--r--ydb/library/yql/core/common_opt/yql_co_simple1.cpp89
-rw-r--r--ydb/library/yql/core/yql_join.cpp57
-rw-r--r--ydb/library/yql/core/yql_join.h3
-rw-r--r--ydb/library/yql/core/yql_opt_utils.cpp16
-rw-r--r--ydb/library/yql/core/yql_opt_utils.h2
5 files changed, 152 insertions, 15 deletions
diff --git a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp
index d1d2bacf7c0..8e06f1f87f6 100644
--- a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp
+++ b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp
@@ -291,7 +291,7 @@ TExprNode::TPtr ExpandFlattenEquiJoin(const TExprNode::TPtr& node, TExprContext&
return ctx.NewCallable(node->Pos(), "Map", { std::move(newJoin), std::move(mapLambda) });
}
-void GatherEquiJoinKeyColumnsFromEquality(TExprNode::TPtr columns, THashSet<TString>& keyColumns) {
+void GatherEquiJoinKeyColumnsFromEquality(TExprNode::TPtr columns, TSet<TString>& keyColumns) {
for (ui32 i = 0; i < columns->ChildrenSize(); i += 2) {
auto table = columns->Child(i)->Content();
auto column = columns->Child(i + 1)->Content();
@@ -299,21 +299,82 @@ void GatherEquiJoinKeyColumnsFromEquality(TExprNode::TPtr columns, THashSet<TStr
}
}
-void GatherEquiJoinKeyColumns(TExprNode::TPtr joinTree, THashSet<TString>& keyColumns) {
- auto left = joinTree->Child(1);
+TExprNode::TPtr DoMarkUnusedKeyColumns(const TExprNode::TPtr& joinTree, THashSet<TString>& drops, THashSet<TString>& keyColumns, bool& needRebuild, bool topLevel, TExprContext& ctx) {
+ auto joinKind = joinTree->ChildPtr(0);
+ auto left = joinTree->ChildPtr(1);
+ auto right = joinTree->ChildPtr(2);
+ auto leftColumns = joinTree->Child(3);
+ auto rightColumns = joinTree->Child(4);
+ auto settings = joinTree->ChildPtr(5);
+
+ TSet<TString> unusedKeys;
+
+ TSet<TString> leftKeys;
+ GatherEquiJoinKeyColumnsFromEquality(leftColumns, leftKeys);
+ if (joinKind->Content() != "RightOnly" && joinKind->Content() != "RightSemi") {
+ for (auto& key : leftKeys) {
+ if (drops.contains(key)) {
+ unusedKeys.insert(key);
+ }
+ }
+ }
+ for (auto& key : leftKeys) {
+ drops.erase(key);
+ }
+ keyColumns.insert(leftKeys.begin(), leftKeys.end());
+
+ TSet<TString> rightKeys;
+ GatherEquiJoinKeyColumnsFromEquality(rightColumns, rightKeys);
+ if (joinKind->Content() != "LeftOnly" && joinKind->Content() != "LeftSemi") {
+ for (auto& key : rightKeys) {
+ if (drops.contains(key)) {
+ unusedKeys.insert(key);
+ }
+ }
+ }
+ for (auto& key : rightKeys) {
+ drops.erase(key);
+ }
+ keyColumns.insert(rightKeys.begin(), rightKeys.end());
+
if (!left->IsAtom()) {
- GatherEquiJoinKeyColumns(left, keyColumns);
+ left = DoMarkUnusedKeyColumns(left, drops, keyColumns, needRebuild, false, ctx);
}
- auto right = joinTree->Child(2);
if (!right->IsAtom()) {
- GatherEquiJoinKeyColumns(right, keyColumns);
+ right = DoMarkUnusedKeyColumns(right, drops, keyColumns, needRebuild, false, ctx);
}
- auto leftColumns = joinTree->Child(3);
- auto rightColumns = joinTree->Child(4);
- GatherEquiJoinKeyColumnsFromEquality(leftColumns, keyColumns);
- GatherEquiJoinKeyColumnsFromEquality(rightColumns, keyColumns);
+ TSet<TString> currentUnusedKeys;
+ if (auto setting = GetSetting(*settings, "unusedKeys")) {
+ for (ui32 i = 1; i < setting->ChildrenSize(); ++i) {
+ currentUnusedKeys.insert(ToString(setting->Child(i)->Content()));
+ }
+ }
+
+ if (!topLevel && currentUnusedKeys != unusedKeys) {
+ TExprNodeList settingValues;
+ settingValues.reserve(unusedKeys.size() + 1);
+ settingValues.push_back(ctx.NewAtom(settings->Pos(), "unusedKeys", TNodeFlags::Default));
+ for (auto& key : unusedKeys) {
+ settingValues.push_back(ctx.NewAtom(settings->Pos(), key));
+ }
+ settings = ReplaceSetting(*settings, ctx.NewList(settings->Pos(), std::move(settingValues)), ctx);
+ needRebuild = true;
+ }
+
+ if (needRebuild) {
+ return ctx.NewList(joinTree->Pos(), { joinKind, left, right, leftColumns, rightColumns, settings });
+ }
+
+ return joinTree;
+}
+
+TExprNode::TPtr MarkUnusedKeyColumns(const TExprNode::TPtr& joinTree, const TSet<TString>& drops, THashSet<TString>& keyColumns, TExprContext& ctx) {
+ bool needRebuild = false;
+ bool topLevel = true;
+ THashSet<TString> mutableDrops(drops.begin(), drops.end());
+ return DoMarkUnusedKeyColumns(joinTree, mutableDrops, keyColumns, needRebuild, topLevel, ctx);
}
void GatherDroppedSingleTableColumns(TExprNode::TPtr joinTree, const TJoinLabels& labels, TSet<TString>& drops) {
@@ -378,14 +439,18 @@ TExprNode::TPtr RemoveDeadPayloadColumns(const TExprNode::TPtr& node, TExprConte
}
}
- auto joinTree = node->Child(node->ChildrenSize() - 2);
+ auto joinTree = node->ChildPtr(node->ChildrenSize() - 2);
GatherDroppedSingleTableColumns(joinTree, labels, drops);
if (drops.empty()) {
return node;
}
THashSet<TString> keyColumns;
- GatherEquiJoinKeyColumns(joinTree, keyColumns);
+ if (auto newTree = MarkUnusedKeyColumns(joinTree, drops, keyColumns, ctx); newTree != joinTree) {
+ YQL_CLOG(DEBUG, Core) << "MarkUnusedKeyColumns in EquiJoin";
+ return ctx.ChangeChild(*node, node->ChildrenSize() - 2, std::move(newTree));
+ }
+
for (auto& keyColumn : keyColumns) {
drops.erase(keyColumn);
}
diff --git a/ydb/library/yql/core/yql_join.cpp b/ydb/library/yql/core/yql_join.cpp
index af8d698ecf9..9b3b69ce45d 100644
--- a/ydb/library/yql/core/yql_join.cpp
+++ b/ydb/library/yql/core/yql_join.cpp
@@ -272,6 +272,16 @@ namespace {
std::optional<std::unordered_set<std::string_view>> leftHints, rightHints;
bool forceSortedMerge = false;
+ bool unusedKeysOption = false;
+ THashSet<TStringBuf> unusedKeys;
+ THashSet<TString> leftKeySet;
+ for (auto& [table, column] : leftKeys) {
+ leftKeySet.insert(FullColumnName(table, column));
+ }
+ THashSet<TString> rightKeySet;
+ for (auto& [table, column] : rightKeys) {
+ rightKeySet.insert(FullColumnName(table, column));
+ }
for (auto child : linkOptions->Children()) {
if (!EnsureTupleMinSize(*child, 1, ctx)) {
return IGraphTransformer::TStatus::Error;
@@ -321,6 +331,39 @@ namespace {
}
forceSortedMerge = true;
}
+ else if (option.IsAtom("unusedKeys")) {
+ if (unusedKeysOption) {
+ ctx.AddError(TIssue(ctx.GetPosition(option.Pos()), TStringBuilder() <<
+ "Duplicate " << option.Content() << " link option"));
+ return IGraphTransformer::TStatus::Error;
+ }
+ unusedKeysOption = true;
+ if (cross) {
+ ctx.AddError(TIssue(ctx.GetPosition(option.Pos()), TStringBuilder() <<
+ "Link option " << option.Content() << " can not be used with CROSS JOIN"));
+ return IGraphTransformer::TStatus::Error;
+ }
+ for (ui32 i = 1; i < child->ChildrenSize(); ++i) {
+ bool isKey = false;
+ TStringBuf unusedKey = child->Child(i)->Content();
+ if (singleSide) {
+ const auto& ks = leftSide ? leftKeySet : rightKeySet;
+ isKey = ks.contains(unusedKey);
+ } else {
+ isKey = leftKeySet.contains(unusedKey) || rightKeySet.contains(unusedKey);
+ }
+ if (!isKey) {
+ ctx.AddError(TIssue(ctx.GetPosition(option.Pos()), TStringBuilder() <<
+ "Invalid key `" << unusedKey << "` for link option " << option.Content() << ", join type " << joinType.Content()));
+ return IGraphTransformer::TStatus::Error;
+ }
+ if (!unusedKeys.insert(unusedKey).second) {
+ ctx.AddError(TIssue(ctx.GetPosition(option.Pos()), TStringBuilder() <<
+ "Duplicate key `" << unusedKey << "` for link option " << option.Content() ));
+ return IGraphTransformer::TStatus::Error;
+ }
+ }
+ }
else {
ctx.AddError(TIssue(ctx.GetPosition(option.Pos()), TStringBuilder() <<
"Unknown option name: " << option.Content()));
@@ -1338,6 +1381,11 @@ TEquiJoinLinkSettings GetEquiJoinLinkSettings(const TExprNode& linkSettings) {
}
result.ForceSortedMerge = HasSetting(linkSettings, "forceSortedMerge");
+ if (auto unusedKeys = GetSetting(linkSettings, "unusedKeys")) {
+ for (ui32 i = 1; i < unusedKeys->ChildrenSize(); ++i) {
+ result.UnusedKeyColumns.insert(ToString(unusedKeys->Child(i)->Content()));
+ }
+ }
return result;
}
@@ -1371,6 +1419,15 @@ TExprNode::TPtr BuildEquiJoinLinkSettings(const TEquiJoinLinkSettings& linkSetti
settings.push_back(builder("right"));
}
+ if (!linkSettings.UnusedKeyColumns.empty()) {
+ TExprNodeList settingItems;
+ settingItems.push_back(ctx.NewAtom(linkSettings.Pos, "unusedKeys", TNodeFlags::Default));
+ for (auto& key : linkSettings.UnusedKeyColumns) {
+ settingItems.push_back(ctx.NewAtom(linkSettings.Pos, key));
+ }
+ settings.push_back(ctx.NewList(linkSettings.Pos, std::move(settingItems)));
+ }
+
return ctx.NewList(linkSettings.Pos, std::move(settings));
}
diff --git a/ydb/library/yql/core/yql_join.h b/ydb/library/yql/core/yql_join.h
index eed03c19734..bade80b4f17 100644
--- a/ydb/library/yql/core/yql_join.h
+++ b/ydb/library/yql/core/yql_join.h
@@ -142,6 +142,9 @@ struct TEquiJoinLinkSettings {
TSet<TString> RightHints;
// JOIN implementation may ignore this flags if SortedMerge strategy is not supported
bool ForceSortedMerge = true;
+ // key columns that are unused after current JOIN - implementation may remove it immediately,
+ // otherwise they will be removed on top level
+ TSet<TString> UnusedKeyColumns;
};
TEquiJoinLinkSettings GetEquiJoinLinkSettings(const TExprNode& linkSettings);
diff --git a/ydb/library/yql/core/yql_opt_utils.cpp b/ydb/library/yql/core/yql_opt_utils.cpp
index 34d68d6edfa..e00c7004585 100644
--- a/ydb/library/yql/core/yql_opt_utils.cpp
+++ b/ydb/library/yql/core/yql_opt_utils.cpp
@@ -405,7 +405,13 @@ TExprNode::TPtr RemoveSetting(const TExprNode& settings, const TStringBuf& name,
}
TExprNode::TPtr ReplaceSetting(const TExprNode& settings, TPositionHandle pos, const TString& name, const TExprNode::TPtr& value, TExprContext& ctx) {
+ auto newSetting = value ? ctx.NewList(pos, { ctx.NewAtom(pos, name), value }) : ctx.NewList(pos, { ctx.NewAtom(pos, name) });
+ return ReplaceSetting(settings, newSetting, ctx);
+}
+
+TExprNode::TPtr ReplaceSetting(const TExprNode& settings, const TExprNode::TPtr& newSetting, TExprContext& ctx) {
TExprNode::TListType newChildren;
+ const TStringBuf name = newSetting->Head().Content();
for (auto setting : settings.Children()) {
if (setting->ChildrenSize() != 0 && setting->Head().Content() == name) {
continue;
@@ -414,21 +420,25 @@ TExprNode::TPtr ReplaceSetting(const TExprNode& settings, TPositionHandle pos, c
newChildren.push_back(setting);
}
- newChildren.push_back(value ? ctx.NewList(pos, { ctx.NewAtom(pos, name), value }) : ctx.NewList(pos, { ctx.NewAtom(pos, name) }));
+ newChildren.push_back(newSetting);
auto ret = ctx.NewList(settings.Pos(), std::move(newChildren));
return ret;
}
TExprNode::TPtr AddSetting(const TExprNode& settings, TPositionHandle pos, const TString& name, const TExprNode::TPtr& value, TExprContext& ctx) {
+ auto newSetting = value ? ctx.NewList(pos, { ctx.NewAtom(pos, name), value }) : ctx.NewList(pos, { ctx.NewAtom(pos, name) });
+ return AddSetting(settings, newSetting, ctx);
+}
+
+TExprNode::TPtr AddSetting(const TExprNode& settings, const TExprNode::TPtr& newSetting, TExprContext& ctx) {
auto newChildren = settings.ChildrenList();
- newChildren.push_back(value ? ctx.NewList(pos, { ctx.NewAtom(pos, name), value }) : ctx.NewList(pos, { ctx.NewAtom(pos, name) }));
+ newChildren.push_back(newSetting);
auto ret = ctx.NewList(settings.Pos(), std::move(newChildren));
return ret;
}
-
TExprNode::TPtr MergeSettings(const TExprNode& settings1, const TExprNode& settings2, TExprContext& ctx) {
auto newChildren = settings1.ChildrenList();
newChildren.insert(
diff --git a/ydb/library/yql/core/yql_opt_utils.h b/ydb/library/yql/core/yql_opt_utils.h
index f6ec3cdf973..cd5edca6f3c 100644
--- a/ydb/library/yql/core/yql_opt_utils.h
+++ b/ydb/library/yql/core/yql_opt_utils.h
@@ -48,8 +48,10 @@ bool HasSetting(const TExprNode& settings, const TStringBuf& name);
bool HasAnySetting(const TExprNode& settings, const THashSet<TString>& names);
TExprNode::TPtr RemoveSetting(const TExprNode& settings, const TStringBuf& name, TExprContext& ctx);
TExprNode::TPtr AddSetting(const TExprNode& settings, TPositionHandle pos, const TString& name, const TExprNode::TPtr& value, TExprContext& ctx);
+TExprNode::TPtr AddSetting(const TExprNode& settings, const TExprNode::TPtr& newSetting, TExprContext& ctx);
TExprNode::TPtr MergeSettings(const TExprNode& settings1, const TExprNode& settings2, TExprContext& ctx);
TExprNode::TPtr ReplaceSetting(const TExprNode& settings, TPositionHandle pos, const TString& name, const TExprNode::TPtr& value, TExprContext& ctx);
+TExprNode::TPtr ReplaceSetting(const TExprNode& settings, const TExprNode::TPtr& newSetting, TExprContext& ctx);
TMaybe<TIssue> ParseToDictSettings(const TExprNode& node, TExprContext& ctx, TMaybe<bool>& isMany, TMaybe<bool>& isHashed, TMaybe<ui64>& itemsCount, bool& isCompact);