diff options
11 files changed, 760 insertions, 10 deletions
diff --git a/yql/essentials/core/common_opt/yql_flatmap_over_join.cpp b/yql/essentials/core/common_opt/yql_flatmap_over_join.cpp index efc69c5cb84..c2b3da4867a 100644 --- a/yql/essentials/core/common_opt/yql_flatmap_over_join.cpp +++ b/yql/essentials/core/common_opt/yql_flatmap_over_join.cpp @@ -1,5 +1,4 @@ #include "yql_flatmap_over_join.h" -#include "yql_co.h" #include <yql/essentials/core/yql_expr_optimize.h> #include <yql/essentials/core/yql_expr_type_annotation.h> @@ -8,6 +7,8 @@ #include <yql/essentials/utils/log/log.h> +#include <library/cpp/disjoint_sets/disjoint_sets.h> + namespace NYql { using namespace NNodes; @@ -922,6 +923,373 @@ bool NeedEmitSkipNullMembers(const TTypeAnnotationContext* types) { return true; } +bool IsEqualityFilterOverJoinEnabled(const TTypeAnnotationContext* types) { + YQL_ENSURE(types); + static const char flag[] = "EqualityFilterOverJoin"; + return IsOptimizerEnabled<flag>(*types) && !IsOptimizerDisabled<flag>(*types); +} + +struct TExtraInputPredicates { + TExprNode::TPtr Row; + TExprNodeList Preds; + TString MainColumn; +}; + +void AppendEquality(TPositionHandle pos, TExtraInputPredicates& dst, const TString& left, const TString& right, const TJoinLabel& label, TExprContext& ctx) { + if (!dst.Row) { + YQL_ENSURE(dst.Preds.empty()); + dst.Row = ctx.NewArgument(pos, "row"); + } + + TStringBuf lTable = label.TableName(left); + TStringBuf rTable = label.TableName(right); + TStringBuf lColumn = label.ColumnName(left); + TStringBuf rColumn = label.ColumnName(right); + + dst.Preds.push_back(ctx.Builder(pos) + .Callable("Coalesce") + .Callable(0, "==") + .Callable(0, "Member") + .Add(0, dst.Row) + .Atom(1, label.MemberName(lTable, lColumn)) + .Seal() + .Callable(1, "Member") + .Add(0, dst.Row) + .Atom(1, label.MemberName(rTable, rColumn)) + .Seal() + .Seal() + .Add(1, MakeBool<false>(pos, ctx)) + .Seal() + .Build() + ); +} + +struct TJoinEqRebuildResult { + TExprNode::TPtr JoinTree; + TSet<ui32> InputsInScope; +}; + +struct TEqColumn { + TString Name; + size_t UseCount = 0; +}; + +template<typename T, typename U> +bool HasIntersection(const T& a, const U& b) { + return AnyOf(a, [&b](const auto& item) { return b.contains(item); }); +} + +TVector<ui32> FilterByScope(const TMap<ui32, TEqColumn>& input, const TSet<ui32>& scope) { + TVector<ui32> result; + for (auto [i, _] : input) { + if (scope.contains(i)) { + result.push_back(i); + } + } + return result; +} + +TJoinEqRebuildResult RebuildJoinTreeForEquality(TVector<TMap<ui32, TEqColumn>>& equalitySetsByInput, const THashSet<ui32>& notNullInputs, const TJoinLabels& labels, TCoEquiJoinTuple joinTree, TExprContext& ctx) { + const TStringBuf joinType = joinTree.Type().Value(); + + TJoinEqRebuildResult left; + if (joinType != "RightOnly" && joinType != "RightSemi") { + if (auto maybeAtom = joinTree.LeftScope().Maybe<TCoAtom>()) { + left.JoinTree = joinTree.LeftScope().Ptr(); + auto inputIdx = labels.FindInputIndex(maybeAtom.Cast().Value()); + YQL_ENSURE(inputIdx); + left.InputsInScope.insert(*inputIdx); + } else { + left = RebuildJoinTreeForEquality(equalitySetsByInput, notNullInputs, labels, joinTree.LeftScope().Cast<TCoEquiJoinTuple>(), ctx); + } + } else { + left.JoinTree = joinTree.LeftScope().Ptr(); + } + + TJoinEqRebuildResult right; + if (joinType != "LeftOnly" && joinType != "LeftSemi") { + if (auto maybeAtom = joinTree.RightScope().Maybe<TCoAtom>()) { + right.JoinTree = joinTree.RightScope().Ptr(); + auto inputIdx = labels.FindInputIndex(maybeAtom.Cast().Value()); + YQL_ENSURE(inputIdx); + right.InputsInScope.insert(*inputIdx); + } else { + right = RebuildJoinTreeForEquality(equalitySetsByInput, notNullInputs, labels, joinTree.RightScope().Cast<TCoEquiJoinTuple>(), ctx); + } + } else { + right.JoinTree = joinTree.RightScope().Ptr(); + } + + YQL_ENSURE(!HasIntersection(left.InputsInScope, right.InputsInScope)); + + if (joinType == "Exclusion" || !left.JoinTree || !right.JoinTree) { + // TODO: support equality over exclustion join + return {}; + } + + const bool leftNotNull = HasIntersection(left.InputsInScope, notNullInputs); + const bool rightNotNull = HasIntersection(right.InputsInScope, notNullInputs); + + TStringBuf newJoinType = joinType; + if (joinType == "Full") { + if (leftNotNull && rightNotNull) { + newJoinType = "Inner"; + } else if (leftNotNull) { + newJoinType = "Left"; + } else if (rightNotNull) { + newJoinType = "Right"; + } + } else if (joinType == "Left" && rightNotNull || joinType == "Right" && leftNotNull) { + newJoinType = "Inner"; + } + + TExprNodeList leftKeys = joinTree.LeftKeys().Ref().ChildrenList(); + TExprNodeList rightKeys = joinTree.RightKeys().Ref().ChildrenList(); + for (auto& es : equalitySetsByInput) { + auto leftInputs = FilterByScope(es, left.InputsInScope); + auto rightInputs = FilterByScope(es, right.InputsInScope); + + const size_t sz = std::min(leftInputs.size(), rightInputs.size()); + for (size_t i = 0; i < sz; ++i) { + auto lIdx = leftInputs[i]; + auto rIdx = rightInputs[i]; + if (es[lIdx].UseCount && es[rIdx].UseCount) { + continue; + } + es[lIdx].UseCount++; + es[rIdx].UseCount++; + + TStringBuf table; + TStringBuf column; + + SplitTableName(es[lIdx].Name, table, column); + leftKeys.emplace_back(ctx.NewAtom(joinTree.LeftKeys().Pos(), table)); + leftKeys.emplace_back(ctx.NewAtom(joinTree.LeftKeys().Pos(), column)); + + SplitTableName(es[rIdx].Name, table, column); + rightKeys.emplace_back(ctx.NewAtom(joinTree.RightKeys().Pos(), table)); + rightKeys.emplace_back(ctx.NewAtom(joinTree.RightKeys().Pos(), column)); + + if (newJoinType == "Cross") { + newJoinType = "Inner"; + } + } + } + + TJoinEqRebuildResult result; + result.JoinTree = Build<TCoEquiJoinTuple>(ctx, joinTree.Pos()) + .Type().Build(newJoinType) + .LeftScope(left.JoinTree) + .RightScope(right.JoinTree) + .LeftKeys(ctx.NewList(joinTree.LeftKeys().Pos(), std::move(leftKeys))) + .RightKeys(ctx.NewList(joinTree.RightKeys().Pos(), std::move(rightKeys))) + .Options(joinTree.Options()) + .Done().Ptr(); + if (newJoinType != "RightSemi" && newJoinType != "RightOnly") { + result.InputsInScope.insert(left.InputsInScope.begin(), left.InputsInScope.end()); + } + if (newJoinType != "LeftSemit" && newJoinType != "LeftOnly") { + result.InputsInScope.insert(right.InputsInScope.begin(), right.InputsInScope.end()); + } + return result; +} + + +TExprBase HandleEqualityFilterOverJoin(const TCoFlatMapBase& node, const TJoinLabels& labels, + const THashMap<TString, TString>& backRenameMap, TExprContext& ctx) +{ + const auto& row = node.Lambda().Args().Arg(0).Ref(); + auto predicate = node.Lambda().Body().Ref().ChildPtr(0); + + TExprNodeList andComponents; + if (predicate->IsCallable("And")) { + andComponents = predicate->ChildrenList(); + } else { + andComponents.push_back(predicate); + } + + TExprNodeList rest; + + TVector<TString> columns; + TVector<TString> uniqColumns; + THashMap<TString, size_t> column2id; + THashSet<ui32> makeNoNullInputs; + for (auto pred : andComponents) { + TExprNode::TPtr left, right; + if (!IsEquality(pred, left, right) || + !left->IsCallable("Member") || left->Child(0) != &row || + !right->IsCallable("Member") || right->Child(0) != &row) + { + rest.push_back(pred); + continue; + } + + TString leftCol{left->Child(1)->Content()}; + if (auto it = backRenameMap.find(leftCol); it != backRenameMap.end()) { + leftCol = it->second; + } + + TString rightCol{right->Child(1)->Content()}; + if (auto it = backRenameMap.find(rightCol); it != backRenameMap.end()) { + rightCol = it->second; + } + + if (leftCol == rightCol) { + // TODO: add optimizer for "==" over same arguments with optional types + rest.push_back(pred); + continue; + } + + TStringBuf leftTable, rightTable; + TStringBuf column; + SplitTableName(leftCol, leftTable, column); + SplitTableName(rightCol, rightTable, column); + + const auto leftInput = labels.FindInputIndex(leftTable); + YQL_ENSURE(leftInput); + const auto rightInput = labels.FindInputIndex(rightTable); + YQL_ENSURE(rightInput); + + makeNoNullInputs.insert(*leftInput); + makeNoNullInputs.insert(*rightInput); + + auto processColumn = [&](const TString& col) { + columns.push_back(col); + if (column2id.insert({ col, uniqColumns.size() }).second) { + uniqColumns.push_back(col); + } + }; + + processColumn(leftCol); + processColumn(rightCol); + } + + if (columns.empty()) { + return node; + } + + YQL_ENSURE(columns.size() % 2 == 0); + + TDisjointSets ds(uniqColumns.size()); + for (size_t i = 0; i < columns.size(); i += 2) { + ds.UnionSets(column2id[columns[i]], column2id[columns[i + 1]]); + } + + TVector<TSet<TString>> equalitySets(uniqColumns.size()); + for (const auto& col : uniqColumns) { + equalitySets[ds.CanonicSetElement(column2id[col])].insert(col); + } + + EraseIf(equalitySets, [](const auto& s) { return s.empty(); }); + YQL_ENSURE(!equalitySets.empty()); + + const TCoEquiJoin equiJoin = node.Input().Cast<TCoEquiJoin>(); + const size_t inputsCount = equiJoin.ArgCount() - 2; + YQL_ENSURE(labels.Inputs.size() == inputsCount); + + TVector<TMap<ui32, TEqColumn>> equalitySetsByInput; // single column for each input (other instances are pushed directly to input) + TVector<TExtraInputPredicates> extraInputPreds(inputsCount); + + for (const TSet<TString>& eqSet : equalitySets) { + TMap<ui32, TEqColumn>& eqSetByInput = equalitySetsByInput.emplace_back(); + for (const auto& col : eqSet) { + YQL_ENSURE(!col.empty()); + + TStringBuf table; + TStringBuf column; + SplitTableName(col, table, column); + auto idx = labels.FindInputIndex(table); + YQL_ENSURE(idx && *idx < inputsCount); + + auto it = eqSetByInput.find(*idx); + if (it != eqSetByInput.end()) { + YQL_ENSURE(col != it->second.Name); + const auto& label = labels.Inputs[*idx]; + extraInputPreds[*idx].MainColumn = it->second.Name; + AppendEquality(predicate->Pos(), extraInputPreds[*idx], it->second.Name, col, label, ctx); + } else { + eqSetByInput.insert({*idx, {col, 0}}); + } + } + } + + auto res = RebuildJoinTreeForEquality(equalitySetsByInput, makeNoNullInputs, labels, equiJoin.Arg(inputsCount).Cast<TCoEquiJoinTuple>(), ctx); + if (!res.JoinTree) { + return node; + } + + for (const TMap<ui32, TEqColumn>& es : equalitySetsByInput) { + for (auto& [_, eqCol] : es) { + YQL_ENSURE(eqCol.UseCount || AnyOf(extraInputPreds, [&](const TExtraInputPredicates& item) { return item.MainColumn == eqCol.Name; } )); + } + } + + YQL_CLOG(DEBUG, Core) << "Equality filter over EquiJoin: processed " << (columns.size() / 2) << " predicates"; + + TExprNodeList equiJoinArgs = equiJoin.Ref().ChildrenList(); + equiJoinArgs[inputsCount] = res.JoinTree; + for (size_t i = 0; i < inputsCount; ++i) { + auto& toPush = extraInputPreds[i]; + if (toPush.Preds.empty()) { + continue; + } + YQL_ENSURE(toPush.Row); + + const auto pos = toPush.Row->Pos(); + TExprNode::TPtr pred = ctx.NewCallable(pos, "And", std::move(toPush.Preds)); + + TExprNode::TPtr& inputTuple = equiJoinArgs[i]; + TExprNode::TPtr oldInput = inputTuple->ChildPtr(TCoEquiJoinInput::idx_List); + auto newInput = ctx.Builder(oldInput->Pos()) + .Callable("OrderedFilter") + .Add(0, oldInput) + .Add(1, ctx.NewLambda(pos, ctx.NewArguments(pos, { toPush.Row }), std::move(pred))) + .Seal() + .Build(); + + inputTuple = ctx.ChangeChild(*inputTuple, TCoEquiJoinInput::idx_List, std::move(newInput)); + } + + auto origJoinItemTypeNode = ExpandType(equiJoin.Pos(), *GetSeqItemType(*node.Input().Ref().GetTypeAnn()).Cast<TStructExprType>(), ctx); + auto newEquiJoin = ctx.Builder(equiJoin.Pos()) + .Callable(node.CallableName() == "OrderedFlatMap" ? "OrderedMap" : "Map") + .Add(0, ctx.NewCallable(equiJoin.Pos(), "EquiJoin", std::move(equiJoinArgs))) + .Lambda(1) + .Param("row") + .Callable("EnsureType") + .Callable(0, "SafeCast") + .Arg(0, "row") + .Add(1, origJoinItemTypeNode) + .Seal() + .Add(1, origJoinItemTypeNode) + .Atom(2, "Mismatch type while performing Equality over EquiJoin optimizer") + .Seal() + .Seal() + .Seal() + .Build(); + + if (rest.empty()) { + rest.push_back(MakeBool<true>(predicate->Pos(), ctx)); + } + + YQL_ENSURE(TCoConditionalValueBase::Match(node.Lambda().Body().Raw())); + auto newPred = ctx.NewCallable(predicate->Pos(), "And", std::move(rest)); + auto newCond = ctx.ChangeChild(node.Lambda().Body().Ref(), TCoConditionalValueBase::idx_Predicate, std::move(newPred)); + auto newLambda = ctx.ChangeChild(node.Lambda().Ref(), TCoLambda::idx_Body, std::move(newCond)); + + return TExprBase(ctx.Builder(node.Pos()) + .Callable(node.CallableName()) + .Add(0, newEquiJoin) + .Lambda(1) + .Param("row") + .Apply(newLambda) + .With(0, "row") + .Seal() + .Seal() + .Seal() + .Build()); +} + } // namespace TExprBase FlatMapOverEquiJoin( @@ -1046,13 +1414,8 @@ TExprBase FlatMapOverEquiJoin( } } - TExprNode::TListType andTerms; - bool isPg; - GatherAndTerms(predicate, andTerms, isPg, ctx); - TExprNode::TPtr ret; - TExprNode::TPtr extraPredicate; - auto joinSettings = equiJoin.Ref().Child(equiJoin.Ref().ChildrenSize() - 1); - auto renameMap = LoadJoinRenameMap(*joinSettings); + const auto joinSettings = equiJoin.Ref().Child(equiJoin.Ref().ChildrenSize() - 1); + const auto renameMap = LoadJoinRenameMap(*joinSettings); THashMap<TString, TString> backRenameMap; for (auto& x : renameMap) { if (!x.second.empty()) { @@ -1062,10 +1425,23 @@ TExprBase FlatMapOverEquiJoin( } } + if (IsEqualityFilterOverJoinEnabled(types)) { + auto newNode = HandleEqualityFilterOverJoin(node, labels, backRenameMap, ctx); + if (newNode.Raw() != node.Raw()) { + return newNode; + } + } + + TExprNode::TListType andTerms; + bool isPg; + GatherAndTerms(predicate, andTerms, isPg, ctx); + TExprNode::TPtr ret; + TExprNode::TPtr extraPredicate; + const bool ordered = node.Maybe<TCoOrderedFlatMap>().IsValid(); const bool skipNulls = NeedEmitSkipNullMembers(types); - for (auto& andTerm : andTerms) { + for (const auto& andTerm : andTerms) { if (andTerm->IsCallable("Likely")) { continue; } @@ -1101,7 +1477,7 @@ TExprBase FlatMapOverEquiJoin( } } - if (inputs.size() == 2) { + if (!IsEqualityFilterOverJoinEnabled(types) && inputs.size() == 2) { auto newJoin = DecayCrossJoinIntoInner(equiJoin.Ptr(), andTerm, labels, *inputs.begin(), *(++inputs.begin()), row, backRenameMap, parentsMap, ctx, types->RotateJoinTree); if (newJoin != equiJoin.Ptr()) { diff --git a/yql/essentials/tests/sql/minirun/part0/canondata/result.json b/yql/essentials/tests/sql/minirun/part0/canondata/result.json index bdad61722c1..3c1aa86fec2 100644 --- a/yql/essentials/tests/sql/minirun/part0/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part0/canondata/result.json @@ -698,6 +698,20 @@ "uri": "https://{canondata_backend}/995452/c22e7d9867fa56e2ee0c270fded39566aaa63a48/resource.tar.gz#test.test_flexible_types-group_by2-default.txt-Results_/results.txt" } ], + "test.test[join-eq_over_join_bad_rotate-default.txt-Debug]": [ + { + "checksum": "75bc473eee49f48848cfac6902f99607", + "size": 1784, + "uri": "https://{canondata_backend}/1946324/fbb1b986a8af3f4a5932aa11a3d24263ba935543/resource.tar.gz#test.test_join-eq_over_join_bad_rotate-default.txt-Debug_/opt.yql" + } + ], + "test.test[join-eq_over_join_bad_rotate-default.txt-Results]": [ + { + "checksum": "e8ae895d664f93e239570274b7b66d8d", + "size": 7624, + "uri": "https://{canondata_backend}/1946324/fbb1b986a8af3f4a5932aa11a3d24263ba935543/resource.tar.gz#test.test_join-eq_over_join_bad_rotate-default.txt-Results_/results.txt" + } + ], "test.test[json-json_query/common_syntax-default.txt-Debug]": [ { "checksum": "f9ce44fdf704adf735b1895d221411f0", diff --git a/yql/essentials/tests/sql/minirun/part7/canondata/result.json b/yql/essentials/tests/sql/minirun/part7/canondata/result.json index 6094e839581..399f6d226b1 100644 --- a/yql/essentials/tests/sql/minirun/part7/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part7/canondata/result.json @@ -555,6 +555,20 @@ "uri": "https://{canondata_backend}/1937429/ab4dd66771a60c21f698c24d93aafbe26098e494/resource.tar.gz#test.test_in-in_with_nulls_and_optionals_extra_ansi-default.txt-Results_/results.txt" } ], + "test.test[join-eq_over_join_basic-default.txt-Debug]": [ + { + "checksum": "da1603d7d463a99c9b81d07ab562f3c7", + "size": 1625, + "uri": "https://{canondata_backend}/937458/0493b86f10f9e96f10e8955e2d365f91a1c2a439/resource.tar.gz#test.test_join-eq_over_join_basic-default.txt-Debug_/opt.yql" + } + ], + "test.test[join-eq_over_join_basic-default.txt-Results]": [ + { + "checksum": "ce622e48816b3c6eb5f8255906683831", + "size": 5679, + "uri": "https://{canondata_backend}/937458/0493b86f10f9e96f10e8955e2d365f91a1c2a439/resource.tar.gz#test.test_join-eq_over_join_basic-default.txt-Results_/results.txt" + } + ], "test.test[join-inmem_with_set_key-default.txt-Debug]": [ { "checksum": "847a008013add2841f187dbf17a8dc0f", diff --git a/yql/essentials/tests/sql/minirun/part9/canondata/result.json b/yql/essentials/tests/sql/minirun/part9/canondata/result.json index 7be476f491f..011e1f94a1b 100644 --- a/yql/essentials/tests/sql/minirun/part9/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part9/canondata/result.json @@ -751,6 +751,20 @@ "uri": "https://{canondata_backend}/1942173/f37f53f2bdf2e81f0ffdb8cf146faecaff60e7af/resource.tar.gz#test.test_in-large_in_YQL-19183--Results_/results.txt" } ], + "test.test[join-eq_over_join_same_keys-default.txt-Debug]": [ + { + "checksum": "d333803a811e7a49abcfacfde8ce6a45", + "size": 1023, + "uri": "https://{canondata_backend}/1937424/b341ff273ddff0681036174e754c5aaffb3b2e18/resource.tar.gz#test.test_join-eq_over_join_same_keys-default.txt-Debug_/opt.yql" + } + ], + "test.test[join-eq_over_join_same_keys-default.txt-Results]": [ + { + "checksum": "206c052c946dbbcaa9a7fb357139610a", + "size": 2220, + "uri": "https://{canondata_backend}/1942415/88908f3f1c2c172f76ce2e0c2ad414ba2d95573e/resource.tar.gz#test.test_join-eq_over_join_same_keys-default.txt-Results_/results.txt" + } + ], "test.test[join-left_join_with_self_aggr-default.txt-Debug]": [ { "checksum": "270003dd9cfab29ddd670dfc824c2915", diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json index 5e3d3921af9..96ad2ba4f9d 100644 --- a/yql/essentials/tests/sql/sql2yql/canondata/result.json +++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json @@ -3793,6 +3793,27 @@ "uri": "https://{canondata_backend}/1942173/99e88108149e222741552e7e6cddef041d6a2846/resource.tar.gz#test_sql2yql.test_join-cross_join_with_lazy_list_/sql.yql" } ], + "test_sql2yql.test[join-eq_over_join_bad_rotate]": [ + { + "checksum": "47d27e638402a2dc26e78efb0e097629", + "size": 7441, + "uri": "https://{canondata_backend}/1946324/891082d4c661d16090f3d65c259aac9e885ad06c/resource.tar.gz#test_sql2yql.test_join-eq_over_join_bad_rotate_/sql.yql" + } + ], + "test_sql2yql.test[join-eq_over_join_basic]": [ + { + "checksum": "91b591c1976c48ea0a66269257975354", + "size": 5943, + "uri": "https://{canondata_backend}/937458/9c1b5511bc814fa8de5eebef1b58eb00227de013/resource.tar.gz#test_sql2yql.test_join-eq_over_join_basic_/sql.yql" + } + ], + "test_sql2yql.test[join-eq_over_join_same_keys]": [ + { + "checksum": "19517c88b9123deeb88f95937f20b46d", + "size": 3745, + "uri": "https://{canondata_backend}/1130705/cce783534f6c1bc1ef0dac74b138f4dd17bb6df8/resource.tar.gz#test_sql2yql.test_join-eq_over_join_same_keys_/sql.yql" + } + ], "test_sql2yql.test[join-inmem_by_uncomparable_structs]": [ { "checksum": "800f3ffe362c85dc001eb5237220bfd7", @@ -10003,6 +10024,21 @@ "uri": "file://test_sql_format.test_join-cross_join_with_lazy_list_/formatted.sql" } ], + "test_sql_format.test[join-eq_over_join_bad_rotate]": [ + { + "uri": "file://test_sql_format.test_join-eq_over_join_bad_rotate_/formatted.sql" + } + ], + "test_sql_format.test[join-eq_over_join_basic]": [ + { + "uri": "file://test_sql_format.test_join-eq_over_join_basic_/formatted.sql" + } + ], + "test_sql_format.test[join-eq_over_join_same_keys]": [ + { + "uri": "file://test_sql_format.test_join-eq_over_join_same_keys_/formatted.sql" + } + ], "test_sql_format.test[join-inmem_by_uncomparable_structs]": [ { "uri": "file://test_sql_format.test_join-inmem_by_uncomparable_structs_/formatted.sql" diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-eq_over_join_bad_rotate_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-eq_over_join_bad_rotate_/formatted.sql new file mode 100644 index 00000000000..7dd88528dfa --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-eq_over_join_bad_rotate_/formatted.sql @@ -0,0 +1,94 @@ +PRAGMA config.flags('OptimizerFlags', 'EqualityFilterOverJoin'); +PRAGMA AnsiOptionalAs; + +-- part of tpcds-6 +$item = ( + SELECT + * + FROM + as_table([ + <|i_current_price: Just(1.0f), i_category: Just('aaa'), i_item_sk: Just(125l)|>, + <|i_current_price: Just(2.0f), i_category: Just('bbb'), i_item_sk: Just(999l)|>, + ]) +); + +$sub2 = ( + SELECT + i_current_price, + i_category + FROM + $item +); + +$customer_address = ( + SELECT + * + FROM + as_table([ + <|ca_address_sk: Just(120l)|>, + <|ca_address_sk: Just(150l)|>, + ]) +); + +$customer = ( + SELECT + * + FROM + as_table([ + <|c_current_addr_sk: Just(150l), c_customer_sk: Just(4l)|>, + <|c_current_addr_sk: Just(120l), c_customer_sk: Just(2l)|>, + ]) +); + +$store_sales = ( + SELECT + * + FROM + as_table([ + <|ss_sold_date_sk: Just(1l), ss_customer_sk: Just(2l), ss_item_sk: Just(3l)|>, + <|ss_sold_date_sk: Just(3l), ss_customer_sk: Just(4l), ss_item_sk: Just(5l)|>, + ]) +); + +$date_dim = ( + SELECT + * + FROM + as_table([ + <|d_date_sk: Just(1l)|>, + <|d_date_sk: Just(2l)|>, + ]) +); + +$item = ( + SELECT + * + FROM + as_table([ + <|i_category: Just('aaa'), i_item_sk: Just(3l)|>, + <|i_category: Just('bbb'), i_item_sk: Just(5l)|>, + ]) +); + +SELECT + JoinTableRow() cnt +FROM + $customer_address a +CROSS JOIN + $customer c +CROSS JOIN + $store_sales s +CROSS JOIN + $date_dim d +CROSS JOIN + $item i +LEFT JOIN + $sub2 AS j +ON + i.i_category == j.i_category +WHERE + s.ss_sold_date_sk == d.d_date_sk + AND a.ca_address_sk == c.c_current_addr_sk + AND c.c_customer_sk == s.ss_customer_sk + AND s.ss_item_sk == i.i_item_sk +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-eq_over_join_basic_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-eq_over_join_basic_/formatted.sql new file mode 100644 index 00000000000..d3850b024fe --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-eq_over_join_basic_/formatted.sql @@ -0,0 +1,69 @@ +PRAGMA config.flags('OptimizerFlags', 'EqualityFilterOverJoin'); + +$a = ( + SELECT + * + FROM + as_table([ + <|x: Just(1), t: 1, u: 1, extra: 1|>, + <|x: 1, t: 1, u: 5, extra: 2|>, + ]) +); + +$b = ( + SELECT + * + FROM + as_table([ + <|y: 1|>, + <|y: 1|>, + ]) +); + +$c = ( + SELECT + * + FROM + as_table([ + <|z: 1|>, + <|z: 1|>, + ]) +); + +$d = ( + SELECT + * + FROM + as_table([ + <|c: 2, d: 3|>, + <|c: 3, d: 3|>, + ]) +); + +SELECT + * +FROM ( + SELECT + c.z AS cz, + b.y AS by, + a.u AS au, + a.t AS at, + a.x AS ax, + d.c AS dc, + d.d AS dd + FROM + $a AS a + RIGHT JOIN + $b AS b + ON + a.x == b.y + CROSS JOIN + $d AS d + FULL JOIN + $c AS c + ON + b.y == c.z +) +WHERE + cz == at AND by == au AND ax == by AND dc == dd +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-eq_over_join_same_keys_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-eq_over_join_same_keys_/formatted.sql new file mode 100644 index 00000000000..657baf30f5c --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-eq_over_join_same_keys_/formatted.sql @@ -0,0 +1,35 @@ +PRAGMA config.flags('OptimizerFlags', 'EqualityFilterOverJoin'); + +$p = 1; + +$simpleKey = ( + SELECT + * + FROM + as_table([<|Key: Just(1), Value: 'qqq'|>, <|Key: Just(2), Value: 'aaa'|>]) +); + +$complexKey = ( + SELECT + * + FROM + as_table([<|Key: Just(2), Fk: 2, Value: 'zzz'|>, <|Key: Just(2), Fk: 3, Value: 'ttt'|>]) +); + +SELECT + l.Key, + l.Fk, + l.Value, + r.Key, + r.Value +FROM + $simpleKey AS r +INNER JOIN + $complexKey AS l +ON + l.Fk == r.Key +WHERE + l.Key == 1 + $p AND l.Key == l.Key +ORDER BY + r.Value +; diff --git a/yql/essentials/tests/sql/suites/join/eq_over_join_bad_rotate.sql b/yql/essentials/tests/sql/suites/join/eq_over_join_bad_rotate.sql new file mode 100644 index 00000000000..c3d151ccf70 --- /dev/null +++ b/yql/essentials/tests/sql/suites/join/eq_over_join_bad_rotate.sql @@ -0,0 +1,50 @@ +pragma config.flags("OptimizerFlags", "EqualityFilterOverJoin"); + +pragma AnsiOptionalAs; + +-- part of tpcds-6 + +$item = select * from as_table([ + <|i_current_price:Just(1.0f), i_category:Just("aaa"), i_item_sk:Just(125l)|>, + <|i_current_price:Just(2.0f), i_category:Just("bbb"), i_item_sk:Just(999l)|>, + ]); + +$sub2 = (select i_current_price, i_category from $item); + +$customer_address = select * from as_table([ + <|ca_address_sk:Just(120l)|>, + <|ca_address_sk:Just(150l)|>, + ]); + +$customer = select * from as_table([ + <|c_current_addr_sk:Just(150l), c_customer_sk:Just(4l)|>, + <|c_current_addr_sk:Just(120l), c_customer_sk:Just(2l)|>, + ]); + +$store_sales = select * from as_table([ + <|ss_sold_date_sk:Just(1l), ss_customer_sk:Just(2l), ss_item_sk:Just(3l)|>, + <|ss_sold_date_sk:Just(3l), ss_customer_sk:Just(4l), ss_item_sk:Just(5l)|>, + ]); + +$date_dim = select * from as_table([ + <|d_date_sk:Just(1l)|>, + <|d_date_sk:Just(2l)|>, + ]); + +$item = select * from as_table([ + <|i_category:Just("aaa"), i_item_sk:Just(3l)|>, + <|i_category:Just("bbb"), i_item_sk:Just(5l)|>, + ]); + +select JoinTableRow() cnt +from $customer_address a + cross join $customer c + cross join $store_sales s + cross join $date_dim d + cross join $item i + left join $sub2 as j on i.i_category = j.i_category + where + s.ss_sold_date_sk = d.d_date_sk + and a.ca_address_sk = c.c_current_addr_sk + and c.c_customer_sk = s.ss_customer_sk + and s.ss_item_sk = i.i_item_sk diff --git a/yql/essentials/tests/sql/suites/join/eq_over_join_basic.sql b/yql/essentials/tests/sql/suites/join/eq_over_join_basic.sql new file mode 100644 index 00000000000..83f706fd6a0 --- /dev/null +++ b/yql/essentials/tests/sql/suites/join/eq_over_join_basic.sql @@ -0,0 +1,30 @@ +pragma config.flags("OptimizerFlags", "EqualityFilterOverJoin"); + +$a = select * from as_table([ + <|x:Just(1), t:1, u:1, extra:1|>, + <|x:1, t:1, u:5, extra:2|>, + ]); + +$b = select * from as_table([ + <|y:1|>, + <|y:1|>, + ]); + +$c = select * from as_table([ + <|z:1|>, + <|z:1|>, + ]); + +$d = select * from as_table([ + <|c:2, d:3|>, + <|c:3, d:3|>, + ]); + + +select * from ( + select c.z as cz, b.y as by, a.u as au, a.t as at, a.x as ax, d.c as dc, d.d as dd from + $a as a right join $b as b on a.x=b.y + cross join $d as d + full join $c as c on b.y = c.z +) +where cz = at and by = au and ax = by and dc = dd; diff --git a/yql/essentials/tests/sql/suites/join/eq_over_join_same_keys.sql b/yql/essentials/tests/sql/suites/join/eq_over_join_same_keys.sql new file mode 100644 index 00000000000..cdd242269d3 --- /dev/null +++ b/yql/essentials/tests/sql/suites/join/eq_over_join_same_keys.sql @@ -0,0 +1,18 @@ +pragma config.flags("OptimizerFlags", "EqualityFilterOverJoin"); + +$p = 1; + + +$simpleKey = +select * from as_table([<|Key:Just(1), Value:"qqq"|>, <|Key:Just(2), Value:"aaa"|>]); + +$complexKey = +select * from as_table([<|Key:Just(2), Fk:2, Value:"zzz"|>, <|Key:Just(2), Fk:3, Value:"ttt"|>]); + + + +SELECT l.Key, l.Fk, l.Value, r.Key, r.Value FROM $simpleKey AS r +INNER JOIN $complexKey AS l + ON l.Fk = r.Key +WHERE l.Key = 1 + $p and l.Key = l.Key +ORDER BY r.Value |
