summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--yql/essentials/core/common_opt/yql_flatmap_over_join.cpp396
-rw-r--r--yql/essentials/tests/sql/minirun/part0/canondata/result.json14
-rw-r--r--yql/essentials/tests/sql/minirun/part7/canondata/result.json14
-rw-r--r--yql/essentials/tests/sql/minirun/part9/canondata/result.json14
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/result.json36
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-eq_over_join_bad_rotate_/formatted.sql94
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-eq_over_join_basic_/formatted.sql69
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-eq_over_join_same_keys_/formatted.sql35
-rw-r--r--yql/essentials/tests/sql/suites/join/eq_over_join_bad_rotate.sql50
-rw-r--r--yql/essentials/tests/sql/suites/join/eq_over_join_basic.sql30
-rw-r--r--yql/essentials/tests/sql/suites/join/eq_over_join_same_keys.sql18
11 files changed, 760 insertions, 10 deletions
diff --git a/yql/essentials/core/common_opt/yql_flatmap_over_join.cpp b/yql/essentials/core/common_opt/yql_flatmap_over_join.cpp
index efc69c5cb84..c2b3da4867a 100644
--- a/yql/essentials/core/common_opt/yql_flatmap_over_join.cpp
+++ b/yql/essentials/core/common_opt/yql_flatmap_over_join.cpp
@@ -1,5 +1,4 @@
#include "yql_flatmap_over_join.h"
-#include "yql_co.h"
#include <yql/essentials/core/yql_expr_optimize.h>
#include <yql/essentials/core/yql_expr_type_annotation.h>
@@ -8,6 +7,8 @@
#include <yql/essentials/utils/log/log.h>
+#include <library/cpp/disjoint_sets/disjoint_sets.h>
+
namespace NYql {
using namespace NNodes;
@@ -922,6 +923,373 @@ bool NeedEmitSkipNullMembers(const TTypeAnnotationContext* types) {
return true;
}
+bool IsEqualityFilterOverJoinEnabled(const TTypeAnnotationContext* types) {
+ YQL_ENSURE(types);
+ static const char flag[] = "EqualityFilterOverJoin";
+ return IsOptimizerEnabled<flag>(*types) && !IsOptimizerDisabled<flag>(*types);
+}
+
+struct TExtraInputPredicates {
+ TExprNode::TPtr Row;
+ TExprNodeList Preds;
+ TString MainColumn;
+};
+
+void AppendEquality(TPositionHandle pos, TExtraInputPredicates& dst, const TString& left, const TString& right, const TJoinLabel& label, TExprContext& ctx) {
+ if (!dst.Row) {
+ YQL_ENSURE(dst.Preds.empty());
+ dst.Row = ctx.NewArgument(pos, "row");
+ }
+
+ TStringBuf lTable = label.TableName(left);
+ TStringBuf rTable = label.TableName(right);
+ TStringBuf lColumn = label.ColumnName(left);
+ TStringBuf rColumn = label.ColumnName(right);
+
+ dst.Preds.push_back(ctx.Builder(pos)
+ .Callable("Coalesce")
+ .Callable(0, "==")
+ .Callable(0, "Member")
+ .Add(0, dst.Row)
+ .Atom(1, label.MemberName(lTable, lColumn))
+ .Seal()
+ .Callable(1, "Member")
+ .Add(0, dst.Row)
+ .Atom(1, label.MemberName(rTable, rColumn))
+ .Seal()
+ .Seal()
+ .Add(1, MakeBool<false>(pos, ctx))
+ .Seal()
+ .Build()
+ );
+}
+
+struct TJoinEqRebuildResult {
+ TExprNode::TPtr JoinTree;
+ TSet<ui32> InputsInScope;
+};
+
+struct TEqColumn {
+ TString Name;
+ size_t UseCount = 0;
+};
+
+template<typename T, typename U>
+bool HasIntersection(const T& a, const U& b) {
+ return AnyOf(a, [&b](const auto& item) { return b.contains(item); });
+}
+
+TVector<ui32> FilterByScope(const TMap<ui32, TEqColumn>& input, const TSet<ui32>& scope) {
+ TVector<ui32> result;
+ for (auto [i, _] : input) {
+ if (scope.contains(i)) {
+ result.push_back(i);
+ }
+ }
+ return result;
+}
+
+TJoinEqRebuildResult RebuildJoinTreeForEquality(TVector<TMap<ui32, TEqColumn>>& equalitySetsByInput, const THashSet<ui32>& notNullInputs, const TJoinLabels& labels, TCoEquiJoinTuple joinTree, TExprContext& ctx) {
+ const TStringBuf joinType = joinTree.Type().Value();
+
+ TJoinEqRebuildResult left;
+ if (joinType != "RightOnly" && joinType != "RightSemi") {
+ if (auto maybeAtom = joinTree.LeftScope().Maybe<TCoAtom>()) {
+ left.JoinTree = joinTree.LeftScope().Ptr();
+ auto inputIdx = labels.FindInputIndex(maybeAtom.Cast().Value());
+ YQL_ENSURE(inputIdx);
+ left.InputsInScope.insert(*inputIdx);
+ } else {
+ left = RebuildJoinTreeForEquality(equalitySetsByInput, notNullInputs, labels, joinTree.LeftScope().Cast<TCoEquiJoinTuple>(), ctx);
+ }
+ } else {
+ left.JoinTree = joinTree.LeftScope().Ptr();
+ }
+
+ TJoinEqRebuildResult right;
+ if (joinType != "LeftOnly" && joinType != "LeftSemi") {
+ if (auto maybeAtom = joinTree.RightScope().Maybe<TCoAtom>()) {
+ right.JoinTree = joinTree.RightScope().Ptr();
+ auto inputIdx = labels.FindInputIndex(maybeAtom.Cast().Value());
+ YQL_ENSURE(inputIdx);
+ right.InputsInScope.insert(*inputIdx);
+ } else {
+ right = RebuildJoinTreeForEquality(equalitySetsByInput, notNullInputs, labels, joinTree.RightScope().Cast<TCoEquiJoinTuple>(), ctx);
+ }
+ } else {
+ right.JoinTree = joinTree.RightScope().Ptr();
+ }
+
+ YQL_ENSURE(!HasIntersection(left.InputsInScope, right.InputsInScope));
+
+ if (joinType == "Exclusion" || !left.JoinTree || !right.JoinTree) {
+ // TODO: support equality over exclustion join
+ return {};
+ }
+
+ const bool leftNotNull = HasIntersection(left.InputsInScope, notNullInputs);
+ const bool rightNotNull = HasIntersection(right.InputsInScope, notNullInputs);
+
+ TStringBuf newJoinType = joinType;
+ if (joinType == "Full") {
+ if (leftNotNull && rightNotNull) {
+ newJoinType = "Inner";
+ } else if (leftNotNull) {
+ newJoinType = "Left";
+ } else if (rightNotNull) {
+ newJoinType = "Right";
+ }
+ } else if (joinType == "Left" && rightNotNull || joinType == "Right" && leftNotNull) {
+ newJoinType = "Inner";
+ }
+
+ TExprNodeList leftKeys = joinTree.LeftKeys().Ref().ChildrenList();
+ TExprNodeList rightKeys = joinTree.RightKeys().Ref().ChildrenList();
+ for (auto& es : equalitySetsByInput) {
+ auto leftInputs = FilterByScope(es, left.InputsInScope);
+ auto rightInputs = FilterByScope(es, right.InputsInScope);
+
+ const size_t sz = std::min(leftInputs.size(), rightInputs.size());
+ for (size_t i = 0; i < sz; ++i) {
+ auto lIdx = leftInputs[i];
+ auto rIdx = rightInputs[i];
+ if (es[lIdx].UseCount && es[rIdx].UseCount) {
+ continue;
+ }
+ es[lIdx].UseCount++;
+ es[rIdx].UseCount++;
+
+ TStringBuf table;
+ TStringBuf column;
+
+ SplitTableName(es[lIdx].Name, table, column);
+ leftKeys.emplace_back(ctx.NewAtom(joinTree.LeftKeys().Pos(), table));
+ leftKeys.emplace_back(ctx.NewAtom(joinTree.LeftKeys().Pos(), column));
+
+ SplitTableName(es[rIdx].Name, table, column);
+ rightKeys.emplace_back(ctx.NewAtom(joinTree.RightKeys().Pos(), table));
+ rightKeys.emplace_back(ctx.NewAtom(joinTree.RightKeys().Pos(), column));
+
+ if (newJoinType == "Cross") {
+ newJoinType = "Inner";
+ }
+ }
+ }
+
+ TJoinEqRebuildResult result;
+ result.JoinTree = Build<TCoEquiJoinTuple>(ctx, joinTree.Pos())
+ .Type().Build(newJoinType)
+ .LeftScope(left.JoinTree)
+ .RightScope(right.JoinTree)
+ .LeftKeys(ctx.NewList(joinTree.LeftKeys().Pos(), std::move(leftKeys)))
+ .RightKeys(ctx.NewList(joinTree.RightKeys().Pos(), std::move(rightKeys)))
+ .Options(joinTree.Options())
+ .Done().Ptr();
+ if (newJoinType != "RightSemi" && newJoinType != "RightOnly") {
+ result.InputsInScope.insert(left.InputsInScope.begin(), left.InputsInScope.end());
+ }
+ if (newJoinType != "LeftSemit" && newJoinType != "LeftOnly") {
+ result.InputsInScope.insert(right.InputsInScope.begin(), right.InputsInScope.end());
+ }
+ return result;
+}
+
+
+TExprBase HandleEqualityFilterOverJoin(const TCoFlatMapBase& node, const TJoinLabels& labels,
+ const THashMap<TString, TString>& backRenameMap, TExprContext& ctx)
+{
+ const auto& row = node.Lambda().Args().Arg(0).Ref();
+ auto predicate = node.Lambda().Body().Ref().ChildPtr(0);
+
+ TExprNodeList andComponents;
+ if (predicate->IsCallable("And")) {
+ andComponents = predicate->ChildrenList();
+ } else {
+ andComponents.push_back(predicate);
+ }
+
+ TExprNodeList rest;
+
+ TVector<TString> columns;
+ TVector<TString> uniqColumns;
+ THashMap<TString, size_t> column2id;
+ THashSet<ui32> makeNoNullInputs;
+ for (auto pred : andComponents) {
+ TExprNode::TPtr left, right;
+ if (!IsEquality(pred, left, right) ||
+ !left->IsCallable("Member") || left->Child(0) != &row ||
+ !right->IsCallable("Member") || right->Child(0) != &row)
+ {
+ rest.push_back(pred);
+ continue;
+ }
+
+ TString leftCol{left->Child(1)->Content()};
+ if (auto it = backRenameMap.find(leftCol); it != backRenameMap.end()) {
+ leftCol = it->second;
+ }
+
+ TString rightCol{right->Child(1)->Content()};
+ if (auto it = backRenameMap.find(rightCol); it != backRenameMap.end()) {
+ rightCol = it->second;
+ }
+
+ if (leftCol == rightCol) {
+ // TODO: add optimizer for "==" over same arguments with optional types
+ rest.push_back(pred);
+ continue;
+ }
+
+ TStringBuf leftTable, rightTable;
+ TStringBuf column;
+ SplitTableName(leftCol, leftTable, column);
+ SplitTableName(rightCol, rightTable, column);
+
+ const auto leftInput = labels.FindInputIndex(leftTable);
+ YQL_ENSURE(leftInput);
+ const auto rightInput = labels.FindInputIndex(rightTable);
+ YQL_ENSURE(rightInput);
+
+ makeNoNullInputs.insert(*leftInput);
+ makeNoNullInputs.insert(*rightInput);
+
+ auto processColumn = [&](const TString& col) {
+ columns.push_back(col);
+ if (column2id.insert({ col, uniqColumns.size() }).second) {
+ uniqColumns.push_back(col);
+ }
+ };
+
+ processColumn(leftCol);
+ processColumn(rightCol);
+ }
+
+ if (columns.empty()) {
+ return node;
+ }
+
+ YQL_ENSURE(columns.size() % 2 == 0);
+
+ TDisjointSets ds(uniqColumns.size());
+ for (size_t i = 0; i < columns.size(); i += 2) {
+ ds.UnionSets(column2id[columns[i]], column2id[columns[i + 1]]);
+ }
+
+ TVector<TSet<TString>> equalitySets(uniqColumns.size());
+ for (const auto& col : uniqColumns) {
+ equalitySets[ds.CanonicSetElement(column2id[col])].insert(col);
+ }
+
+ EraseIf(equalitySets, [](const auto& s) { return s.empty(); });
+ YQL_ENSURE(!equalitySets.empty());
+
+ const TCoEquiJoin equiJoin = node.Input().Cast<TCoEquiJoin>();
+ const size_t inputsCount = equiJoin.ArgCount() - 2;
+ YQL_ENSURE(labels.Inputs.size() == inputsCount);
+
+ TVector<TMap<ui32, TEqColumn>> equalitySetsByInput; // single column for each input (other instances are pushed directly to input)
+ TVector<TExtraInputPredicates> extraInputPreds(inputsCount);
+
+ for (const TSet<TString>& eqSet : equalitySets) {
+ TMap<ui32, TEqColumn>& eqSetByInput = equalitySetsByInput.emplace_back();
+ for (const auto& col : eqSet) {
+ YQL_ENSURE(!col.empty());
+
+ TStringBuf table;
+ TStringBuf column;
+ SplitTableName(col, table, column);
+ auto idx = labels.FindInputIndex(table);
+ YQL_ENSURE(idx && *idx < inputsCount);
+
+ auto it = eqSetByInput.find(*idx);
+ if (it != eqSetByInput.end()) {
+ YQL_ENSURE(col != it->second.Name);
+ const auto& label = labels.Inputs[*idx];
+ extraInputPreds[*idx].MainColumn = it->second.Name;
+ AppendEquality(predicate->Pos(), extraInputPreds[*idx], it->second.Name, col, label, ctx);
+ } else {
+ eqSetByInput.insert({*idx, {col, 0}});
+ }
+ }
+ }
+
+ auto res = RebuildJoinTreeForEquality(equalitySetsByInput, makeNoNullInputs, labels, equiJoin.Arg(inputsCount).Cast<TCoEquiJoinTuple>(), ctx);
+ if (!res.JoinTree) {
+ return node;
+ }
+
+ for (const TMap<ui32, TEqColumn>& es : equalitySetsByInput) {
+ for (auto& [_, eqCol] : es) {
+ YQL_ENSURE(eqCol.UseCount || AnyOf(extraInputPreds, [&](const TExtraInputPredicates& item) { return item.MainColumn == eqCol.Name; } ));
+ }
+ }
+
+ YQL_CLOG(DEBUG, Core) << "Equality filter over EquiJoin: processed " << (columns.size() / 2) << " predicates";
+
+ TExprNodeList equiJoinArgs = equiJoin.Ref().ChildrenList();
+ equiJoinArgs[inputsCount] = res.JoinTree;
+ for (size_t i = 0; i < inputsCount; ++i) {
+ auto& toPush = extraInputPreds[i];
+ if (toPush.Preds.empty()) {
+ continue;
+ }
+ YQL_ENSURE(toPush.Row);
+
+ const auto pos = toPush.Row->Pos();
+ TExprNode::TPtr pred = ctx.NewCallable(pos, "And", std::move(toPush.Preds));
+
+ TExprNode::TPtr& inputTuple = equiJoinArgs[i];
+ TExprNode::TPtr oldInput = inputTuple->ChildPtr(TCoEquiJoinInput::idx_List);
+ auto newInput = ctx.Builder(oldInput->Pos())
+ .Callable("OrderedFilter")
+ .Add(0, oldInput)
+ .Add(1, ctx.NewLambda(pos, ctx.NewArguments(pos, { toPush.Row }), std::move(pred)))
+ .Seal()
+ .Build();
+
+ inputTuple = ctx.ChangeChild(*inputTuple, TCoEquiJoinInput::idx_List, std::move(newInput));
+ }
+
+ auto origJoinItemTypeNode = ExpandType(equiJoin.Pos(), *GetSeqItemType(*node.Input().Ref().GetTypeAnn()).Cast<TStructExprType>(), ctx);
+ auto newEquiJoin = ctx.Builder(equiJoin.Pos())
+ .Callable(node.CallableName() == "OrderedFlatMap" ? "OrderedMap" : "Map")
+ .Add(0, ctx.NewCallable(equiJoin.Pos(), "EquiJoin", std::move(equiJoinArgs)))
+ .Lambda(1)
+ .Param("row")
+ .Callable("EnsureType")
+ .Callable(0, "SafeCast")
+ .Arg(0, "row")
+ .Add(1, origJoinItemTypeNode)
+ .Seal()
+ .Add(1, origJoinItemTypeNode)
+ .Atom(2, "Mismatch type while performing Equality over EquiJoin optimizer")
+ .Seal()
+ .Seal()
+ .Seal()
+ .Build();
+
+ if (rest.empty()) {
+ rest.push_back(MakeBool<true>(predicate->Pos(), ctx));
+ }
+
+ YQL_ENSURE(TCoConditionalValueBase::Match(node.Lambda().Body().Raw()));
+ auto newPred = ctx.NewCallable(predicate->Pos(), "And", std::move(rest));
+ auto newCond = ctx.ChangeChild(node.Lambda().Body().Ref(), TCoConditionalValueBase::idx_Predicate, std::move(newPred));
+ auto newLambda = ctx.ChangeChild(node.Lambda().Ref(), TCoLambda::idx_Body, std::move(newCond));
+
+ return TExprBase(ctx.Builder(node.Pos())
+ .Callable(node.CallableName())
+ .Add(0, newEquiJoin)
+ .Lambda(1)
+ .Param("row")
+ .Apply(newLambda)
+ .With(0, "row")
+ .Seal()
+ .Seal()
+ .Seal()
+ .Build());
+}
+
} // namespace
TExprBase FlatMapOverEquiJoin(
@@ -1046,13 +1414,8 @@ TExprBase FlatMapOverEquiJoin(
}
}
- TExprNode::TListType andTerms;
- bool isPg;
- GatherAndTerms(predicate, andTerms, isPg, ctx);
- TExprNode::TPtr ret;
- TExprNode::TPtr extraPredicate;
- auto joinSettings = equiJoin.Ref().Child(equiJoin.Ref().ChildrenSize() - 1);
- auto renameMap = LoadJoinRenameMap(*joinSettings);
+ const auto joinSettings = equiJoin.Ref().Child(equiJoin.Ref().ChildrenSize() - 1);
+ const auto renameMap = LoadJoinRenameMap(*joinSettings);
THashMap<TString, TString> backRenameMap;
for (auto& x : renameMap) {
if (!x.second.empty()) {
@@ -1062,10 +1425,23 @@ TExprBase FlatMapOverEquiJoin(
}
}
+ if (IsEqualityFilterOverJoinEnabled(types)) {
+ auto newNode = HandleEqualityFilterOverJoin(node, labels, backRenameMap, ctx);
+ if (newNode.Raw() != node.Raw()) {
+ return newNode;
+ }
+ }
+
+ TExprNode::TListType andTerms;
+ bool isPg;
+ GatherAndTerms(predicate, andTerms, isPg, ctx);
+ TExprNode::TPtr ret;
+ TExprNode::TPtr extraPredicate;
+
const bool ordered = node.Maybe<TCoOrderedFlatMap>().IsValid();
const bool skipNulls = NeedEmitSkipNullMembers(types);
- for (auto& andTerm : andTerms) {
+ for (const auto& andTerm : andTerms) {
if (andTerm->IsCallable("Likely")) {
continue;
}
@@ -1101,7 +1477,7 @@ TExprBase FlatMapOverEquiJoin(
}
}
- if (inputs.size() == 2) {
+ if (!IsEqualityFilterOverJoinEnabled(types) && inputs.size() == 2) {
auto newJoin = DecayCrossJoinIntoInner(equiJoin.Ptr(), andTerm,
labels, *inputs.begin(), *(++inputs.begin()), row, backRenameMap, parentsMap, ctx, types->RotateJoinTree);
if (newJoin != equiJoin.Ptr()) {
diff --git a/yql/essentials/tests/sql/minirun/part0/canondata/result.json b/yql/essentials/tests/sql/minirun/part0/canondata/result.json
index bdad61722c1..3c1aa86fec2 100644
--- a/yql/essentials/tests/sql/minirun/part0/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part0/canondata/result.json
@@ -698,6 +698,20 @@
"uri": "https://{canondata_backend}/995452/c22e7d9867fa56e2ee0c270fded39566aaa63a48/resource.tar.gz#test.test_flexible_types-group_by2-default.txt-Results_/results.txt"
}
],
+ "test.test[join-eq_over_join_bad_rotate-default.txt-Debug]": [
+ {
+ "checksum": "75bc473eee49f48848cfac6902f99607",
+ "size": 1784,
+ "uri": "https://{canondata_backend}/1946324/fbb1b986a8af3f4a5932aa11a3d24263ba935543/resource.tar.gz#test.test_join-eq_over_join_bad_rotate-default.txt-Debug_/opt.yql"
+ }
+ ],
+ "test.test[join-eq_over_join_bad_rotate-default.txt-Results]": [
+ {
+ "checksum": "e8ae895d664f93e239570274b7b66d8d",
+ "size": 7624,
+ "uri": "https://{canondata_backend}/1946324/fbb1b986a8af3f4a5932aa11a3d24263ba935543/resource.tar.gz#test.test_join-eq_over_join_bad_rotate-default.txt-Results_/results.txt"
+ }
+ ],
"test.test[json-json_query/common_syntax-default.txt-Debug]": [
{
"checksum": "f9ce44fdf704adf735b1895d221411f0",
diff --git a/yql/essentials/tests/sql/minirun/part7/canondata/result.json b/yql/essentials/tests/sql/minirun/part7/canondata/result.json
index 6094e839581..399f6d226b1 100644
--- a/yql/essentials/tests/sql/minirun/part7/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part7/canondata/result.json
@@ -555,6 +555,20 @@
"uri": "https://{canondata_backend}/1937429/ab4dd66771a60c21f698c24d93aafbe26098e494/resource.tar.gz#test.test_in-in_with_nulls_and_optionals_extra_ansi-default.txt-Results_/results.txt"
}
],
+ "test.test[join-eq_over_join_basic-default.txt-Debug]": [
+ {
+ "checksum": "da1603d7d463a99c9b81d07ab562f3c7",
+ "size": 1625,
+ "uri": "https://{canondata_backend}/937458/0493b86f10f9e96f10e8955e2d365f91a1c2a439/resource.tar.gz#test.test_join-eq_over_join_basic-default.txt-Debug_/opt.yql"
+ }
+ ],
+ "test.test[join-eq_over_join_basic-default.txt-Results]": [
+ {
+ "checksum": "ce622e48816b3c6eb5f8255906683831",
+ "size": 5679,
+ "uri": "https://{canondata_backend}/937458/0493b86f10f9e96f10e8955e2d365f91a1c2a439/resource.tar.gz#test.test_join-eq_over_join_basic-default.txt-Results_/results.txt"
+ }
+ ],
"test.test[join-inmem_with_set_key-default.txt-Debug]": [
{
"checksum": "847a008013add2841f187dbf17a8dc0f",
diff --git a/yql/essentials/tests/sql/minirun/part9/canondata/result.json b/yql/essentials/tests/sql/minirun/part9/canondata/result.json
index 7be476f491f..011e1f94a1b 100644
--- a/yql/essentials/tests/sql/minirun/part9/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part9/canondata/result.json
@@ -751,6 +751,20 @@
"uri": "https://{canondata_backend}/1942173/f37f53f2bdf2e81f0ffdb8cf146faecaff60e7af/resource.tar.gz#test.test_in-large_in_YQL-19183--Results_/results.txt"
}
],
+ "test.test[join-eq_over_join_same_keys-default.txt-Debug]": [
+ {
+ "checksum": "d333803a811e7a49abcfacfde8ce6a45",
+ "size": 1023,
+ "uri": "https://{canondata_backend}/1937424/b341ff273ddff0681036174e754c5aaffb3b2e18/resource.tar.gz#test.test_join-eq_over_join_same_keys-default.txt-Debug_/opt.yql"
+ }
+ ],
+ "test.test[join-eq_over_join_same_keys-default.txt-Results]": [
+ {
+ "checksum": "206c052c946dbbcaa9a7fb357139610a",
+ "size": 2220,
+ "uri": "https://{canondata_backend}/1942415/88908f3f1c2c172f76ce2e0c2ad414ba2d95573e/resource.tar.gz#test.test_join-eq_over_join_same_keys-default.txt-Results_/results.txt"
+ }
+ ],
"test.test[join-left_join_with_self_aggr-default.txt-Debug]": [
{
"checksum": "270003dd9cfab29ddd670dfc824c2915",
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json
index 5e3d3921af9..96ad2ba4f9d 100644
--- a/yql/essentials/tests/sql/sql2yql/canondata/result.json
+++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json
@@ -3793,6 +3793,27 @@
"uri": "https://{canondata_backend}/1942173/99e88108149e222741552e7e6cddef041d6a2846/resource.tar.gz#test_sql2yql.test_join-cross_join_with_lazy_list_/sql.yql"
}
],
+ "test_sql2yql.test[join-eq_over_join_bad_rotate]": [
+ {
+ "checksum": "47d27e638402a2dc26e78efb0e097629",
+ "size": 7441,
+ "uri": "https://{canondata_backend}/1946324/891082d4c661d16090f3d65c259aac9e885ad06c/resource.tar.gz#test_sql2yql.test_join-eq_over_join_bad_rotate_/sql.yql"
+ }
+ ],
+ "test_sql2yql.test[join-eq_over_join_basic]": [
+ {
+ "checksum": "91b591c1976c48ea0a66269257975354",
+ "size": 5943,
+ "uri": "https://{canondata_backend}/937458/9c1b5511bc814fa8de5eebef1b58eb00227de013/resource.tar.gz#test_sql2yql.test_join-eq_over_join_basic_/sql.yql"
+ }
+ ],
+ "test_sql2yql.test[join-eq_over_join_same_keys]": [
+ {
+ "checksum": "19517c88b9123deeb88f95937f20b46d",
+ "size": 3745,
+ "uri": "https://{canondata_backend}/1130705/cce783534f6c1bc1ef0dac74b138f4dd17bb6df8/resource.tar.gz#test_sql2yql.test_join-eq_over_join_same_keys_/sql.yql"
+ }
+ ],
"test_sql2yql.test[join-inmem_by_uncomparable_structs]": [
{
"checksum": "800f3ffe362c85dc001eb5237220bfd7",
@@ -10003,6 +10024,21 @@
"uri": "file://test_sql_format.test_join-cross_join_with_lazy_list_/formatted.sql"
}
],
+ "test_sql_format.test[join-eq_over_join_bad_rotate]": [
+ {
+ "uri": "file://test_sql_format.test_join-eq_over_join_bad_rotate_/formatted.sql"
+ }
+ ],
+ "test_sql_format.test[join-eq_over_join_basic]": [
+ {
+ "uri": "file://test_sql_format.test_join-eq_over_join_basic_/formatted.sql"
+ }
+ ],
+ "test_sql_format.test[join-eq_over_join_same_keys]": [
+ {
+ "uri": "file://test_sql_format.test_join-eq_over_join_same_keys_/formatted.sql"
+ }
+ ],
"test_sql_format.test[join-inmem_by_uncomparable_structs]": [
{
"uri": "file://test_sql_format.test_join-inmem_by_uncomparable_structs_/formatted.sql"
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-eq_over_join_bad_rotate_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-eq_over_join_bad_rotate_/formatted.sql
new file mode 100644
index 00000000000..7dd88528dfa
--- /dev/null
+++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-eq_over_join_bad_rotate_/formatted.sql
@@ -0,0 +1,94 @@
+PRAGMA config.flags('OptimizerFlags', 'EqualityFilterOverJoin');
+PRAGMA AnsiOptionalAs;
+
+-- part of tpcds-6
+$item = (
+ SELECT
+ *
+ FROM
+ as_table([
+ <|i_current_price: Just(1.0f), i_category: Just('aaa'), i_item_sk: Just(125l)|>,
+ <|i_current_price: Just(2.0f), i_category: Just('bbb'), i_item_sk: Just(999l)|>,
+ ])
+);
+
+$sub2 = (
+ SELECT
+ i_current_price,
+ i_category
+ FROM
+ $item
+);
+
+$customer_address = (
+ SELECT
+ *
+ FROM
+ as_table([
+ <|ca_address_sk: Just(120l)|>,
+ <|ca_address_sk: Just(150l)|>,
+ ])
+);
+
+$customer = (
+ SELECT
+ *
+ FROM
+ as_table([
+ <|c_current_addr_sk: Just(150l), c_customer_sk: Just(4l)|>,
+ <|c_current_addr_sk: Just(120l), c_customer_sk: Just(2l)|>,
+ ])
+);
+
+$store_sales = (
+ SELECT
+ *
+ FROM
+ as_table([
+ <|ss_sold_date_sk: Just(1l), ss_customer_sk: Just(2l), ss_item_sk: Just(3l)|>,
+ <|ss_sold_date_sk: Just(3l), ss_customer_sk: Just(4l), ss_item_sk: Just(5l)|>,
+ ])
+);
+
+$date_dim = (
+ SELECT
+ *
+ FROM
+ as_table([
+ <|d_date_sk: Just(1l)|>,
+ <|d_date_sk: Just(2l)|>,
+ ])
+);
+
+$item = (
+ SELECT
+ *
+ FROM
+ as_table([
+ <|i_category: Just('aaa'), i_item_sk: Just(3l)|>,
+ <|i_category: Just('bbb'), i_item_sk: Just(5l)|>,
+ ])
+);
+
+SELECT
+ JoinTableRow() cnt
+FROM
+ $customer_address a
+CROSS JOIN
+ $customer c
+CROSS JOIN
+ $store_sales s
+CROSS JOIN
+ $date_dim d
+CROSS JOIN
+ $item i
+LEFT JOIN
+ $sub2 AS j
+ON
+ i.i_category == j.i_category
+WHERE
+ s.ss_sold_date_sk == d.d_date_sk
+ AND a.ca_address_sk == c.c_current_addr_sk
+ AND c.c_customer_sk == s.ss_customer_sk
+ AND s.ss_item_sk == i.i_item_sk
+;
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-eq_over_join_basic_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-eq_over_join_basic_/formatted.sql
new file mode 100644
index 00000000000..d3850b024fe
--- /dev/null
+++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-eq_over_join_basic_/formatted.sql
@@ -0,0 +1,69 @@
+PRAGMA config.flags('OptimizerFlags', 'EqualityFilterOverJoin');
+
+$a = (
+ SELECT
+ *
+ FROM
+ as_table([
+ <|x: Just(1), t: 1, u: 1, extra: 1|>,
+ <|x: 1, t: 1, u: 5, extra: 2|>,
+ ])
+);
+
+$b = (
+ SELECT
+ *
+ FROM
+ as_table([
+ <|y: 1|>,
+ <|y: 1|>,
+ ])
+);
+
+$c = (
+ SELECT
+ *
+ FROM
+ as_table([
+ <|z: 1|>,
+ <|z: 1|>,
+ ])
+);
+
+$d = (
+ SELECT
+ *
+ FROM
+ as_table([
+ <|c: 2, d: 3|>,
+ <|c: 3, d: 3|>,
+ ])
+);
+
+SELECT
+ *
+FROM (
+ SELECT
+ c.z AS cz,
+ b.y AS by,
+ a.u AS au,
+ a.t AS at,
+ a.x AS ax,
+ d.c AS dc,
+ d.d AS dd
+ FROM
+ $a AS a
+ RIGHT JOIN
+ $b AS b
+ ON
+ a.x == b.y
+ CROSS JOIN
+ $d AS d
+ FULL JOIN
+ $c AS c
+ ON
+ b.y == c.z
+)
+WHERE
+ cz == at AND by == au AND ax == by AND dc == dd
+;
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-eq_over_join_same_keys_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-eq_over_join_same_keys_/formatted.sql
new file mode 100644
index 00000000000..657baf30f5c
--- /dev/null
+++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-eq_over_join_same_keys_/formatted.sql
@@ -0,0 +1,35 @@
+PRAGMA config.flags('OptimizerFlags', 'EqualityFilterOverJoin');
+
+$p = 1;
+
+$simpleKey = (
+ SELECT
+ *
+ FROM
+ as_table([<|Key: Just(1), Value: 'qqq'|>, <|Key: Just(2), Value: 'aaa'|>])
+);
+
+$complexKey = (
+ SELECT
+ *
+ FROM
+ as_table([<|Key: Just(2), Fk: 2, Value: 'zzz'|>, <|Key: Just(2), Fk: 3, Value: 'ttt'|>])
+);
+
+SELECT
+ l.Key,
+ l.Fk,
+ l.Value,
+ r.Key,
+ r.Value
+FROM
+ $simpleKey AS r
+INNER JOIN
+ $complexKey AS l
+ON
+ l.Fk == r.Key
+WHERE
+ l.Key == 1 + $p AND l.Key == l.Key
+ORDER BY
+ r.Value
+;
diff --git a/yql/essentials/tests/sql/suites/join/eq_over_join_bad_rotate.sql b/yql/essentials/tests/sql/suites/join/eq_over_join_bad_rotate.sql
new file mode 100644
index 00000000000..c3d151ccf70
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/join/eq_over_join_bad_rotate.sql
@@ -0,0 +1,50 @@
+pragma config.flags("OptimizerFlags", "EqualityFilterOverJoin");
+
+pragma AnsiOptionalAs;
+
+-- part of tpcds-6
+
+$item = select * from as_table([
+ <|i_current_price:Just(1.0f), i_category:Just("aaa"), i_item_sk:Just(125l)|>,
+ <|i_current_price:Just(2.0f), i_category:Just("bbb"), i_item_sk:Just(999l)|>,
+ ]);
+
+$sub2 = (select i_current_price, i_category from $item);
+
+$customer_address = select * from as_table([
+ <|ca_address_sk:Just(120l)|>,
+ <|ca_address_sk:Just(150l)|>,
+ ]);
+
+$customer = select * from as_table([
+ <|c_current_addr_sk:Just(150l), c_customer_sk:Just(4l)|>,
+ <|c_current_addr_sk:Just(120l), c_customer_sk:Just(2l)|>,
+ ]);
+
+$store_sales = select * from as_table([
+ <|ss_sold_date_sk:Just(1l), ss_customer_sk:Just(2l), ss_item_sk:Just(3l)|>,
+ <|ss_sold_date_sk:Just(3l), ss_customer_sk:Just(4l), ss_item_sk:Just(5l)|>,
+ ]);
+
+$date_dim = select * from as_table([
+ <|d_date_sk:Just(1l)|>,
+ <|d_date_sk:Just(2l)|>,
+ ]);
+
+$item = select * from as_table([
+ <|i_category:Just("aaa"), i_item_sk:Just(3l)|>,
+ <|i_category:Just("bbb"), i_item_sk:Just(5l)|>,
+ ]);
+
+select JoinTableRow() cnt
+from $customer_address a
+ cross join $customer c
+ cross join $store_sales s
+ cross join $date_dim d
+ cross join $item i
+ left join $sub2 as j on i.i_category = j.i_category
+ where
+ s.ss_sold_date_sk = d.d_date_sk
+ and a.ca_address_sk = c.c_current_addr_sk
+ and c.c_customer_sk = s.ss_customer_sk
+ and s.ss_item_sk = i.i_item_sk
diff --git a/yql/essentials/tests/sql/suites/join/eq_over_join_basic.sql b/yql/essentials/tests/sql/suites/join/eq_over_join_basic.sql
new file mode 100644
index 00000000000..83f706fd6a0
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/join/eq_over_join_basic.sql
@@ -0,0 +1,30 @@
+pragma config.flags("OptimizerFlags", "EqualityFilterOverJoin");
+
+$a = select * from as_table([
+ <|x:Just(1), t:1, u:1, extra:1|>,
+ <|x:1, t:1, u:5, extra:2|>,
+ ]);
+
+$b = select * from as_table([
+ <|y:1|>,
+ <|y:1|>,
+ ]);
+
+$c = select * from as_table([
+ <|z:1|>,
+ <|z:1|>,
+ ]);
+
+$d = select * from as_table([
+ <|c:2, d:3|>,
+ <|c:3, d:3|>,
+ ]);
+
+
+select * from (
+ select c.z as cz, b.y as by, a.u as au, a.t as at, a.x as ax, d.c as dc, d.d as dd from
+ $a as a right join $b as b on a.x=b.y
+ cross join $d as d
+ full join $c as c on b.y = c.z
+)
+where cz = at and by = au and ax = by and dc = dd;
diff --git a/yql/essentials/tests/sql/suites/join/eq_over_join_same_keys.sql b/yql/essentials/tests/sql/suites/join/eq_over_join_same_keys.sql
new file mode 100644
index 00000000000..cdd242269d3
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/join/eq_over_join_same_keys.sql
@@ -0,0 +1,18 @@
+pragma config.flags("OptimizerFlags", "EqualityFilterOverJoin");
+
+$p = 1;
+
+
+$simpleKey =
+select * from as_table([<|Key:Just(1), Value:"qqq"|>, <|Key:Just(2), Value:"aaa"|>]);
+
+$complexKey =
+select * from as_table([<|Key:Just(2), Fk:2, Value:"zzz"|>, <|Key:Just(2), Fk:3, Value:"ttt"|>]);
+
+
+
+SELECT l.Key, l.Fk, l.Value, r.Key, r.Value FROM $simpleKey AS r
+INNER JOIN $complexKey AS l
+ ON l.Fk = r.Key
+WHERE l.Key = 1 + $p and l.Key = l.Key
+ORDER BY r.Value