diff options
author | maxkovalev <maxkovalev@yandex-team.com> | 2023-10-06 18:29:08 +0300 |
---|---|---|
committer | maxkovalev <maxkovalev@yandex-team.com> | 2023-10-06 21:12:21 +0300 |
commit | 749a3a22ccd677ae7daa98687ece416b4b7c4f45 (patch) | |
tree | e8d4159c1f00457567cc4051b1172e8877fe0f04 | |
parent | a8f808af42d92f92f79a0473f9c069ce8b2c178b (diff) | |
download | ydb-749a3a22ccd677ae7daa98687ece416b4b7c4f45.tar.gz |
YQL-15441: Set preferred_sort for Aggregate case
YQL-15441: Fill preferred_sort for Aggregate case
5 files changed, 91 insertions, 27 deletions
diff --git a/ydb/library/yql/providers/yt/provider/yql_yt_physical_optimize.cpp b/ydb/library/yql/providers/yt/provider/yql_yt_physical_optimize.cpp index a45050a92db..2d46507198b 100644 --- a/ydb/library/yql/providers/yt/provider/yql_yt_physical_optimize.cpp +++ b/ydb/library/yql/providers/yt/provider/yql_yt_physical_optimize.cpp @@ -858,41 +858,73 @@ private: } static bool CollectSortSet(const TExprNode& sortNode, TSet<TVector<TStringBuf>>& sortSets) { - YQL_ENSURE(sortNode.IsCallable("Sort")); + if (sortNode.IsCallable("Sort")) { + auto directions = sortNode.ChildPtr(1); - auto directions = sortNode.ChildPtr(1); + auto lambdaArg = sortNode.Child(2)->Child(0)->Child(0); + auto lambdaBody = sortNode.Child(2)->ChildPtr(1); - auto lambdaArg = sortNode.Child(2)->Child(0)->Child(0); - auto lambdaBody = sortNode.Child(2)->ChildPtr(1); + TExprNode::TListType directionItems; + if (directions->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Tuple) { + directionItems = directions->ChildrenList(); + } else { + directionItems.push_back(directions); + } - TExprNode::TListType directionItems; - if (directions->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Tuple) { - directionItems = directions->ChildrenList(); - } else { - directionItems.push_back(directions); - } + if (AnyOf(directionItems, [](const TExprNode::TPtr& direction) { return !IsAscending(*direction); })) { + return false; + } - if (AnyOf(directionItems, [](const TExprNode::TPtr& direction) { return !IsAscending(*direction); })) { - return false; - } + TExprNode::TListType lambdaBodyItems; + if (directions->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Tuple) { + lambdaBodyItems = lambdaBody->ChildrenList(); + } else { + lambdaBodyItems.push_back(lambdaBody); + } - TExprNode::TListType lambdaBodyItems; - if (directions->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Tuple) { - lambdaBodyItems = lambdaBody->ChildrenList(); - } else { - lambdaBodyItems.push_back(lambdaBody); - } + TVector<TStringBuf> sortBy; + for (auto& item : lambdaBodyItems) { + if (!item->IsCallable("Member") || item->Child(0) != lambdaArg) { + return false; + } + YQL_ENSURE(item->Child(1)->IsAtom()); + sortBy.push_back(item->Child(1)->Content()); + } - TVector<TStringBuf> sortBy; - for (auto& item : lambdaBodyItems) { - if (!item->IsCallable("Member") || item->Child(0) != lambdaArg) { + return sortSets.insert(sortBy).second; + } else if (sortNode.IsCallable("Aggregate")) { + if (!HasSetting(TCoAggregate(&sortNode).Settings().Ref(), "compact")) { return false; } - YQL_ENSURE(item->Child(1)->IsAtom()); - sortBy.push_back(item->Child(1)->Content()); - } + auto keys = sortNode.Child(1); + const auto keyNum = keys->ChildrenSize(); + if (keyNum == 0) { + return false; + } + + TVector<TStringBuf> keyList; + keyList.reserve(keys->ChildrenSize()); + + for (const auto& key : keys->ChildrenList()) { + keyList.push_back(key->Content()); + } - return sortSets.insert(sortBy).second; + do { + TVector<TStringBuf> sortBy; + sortBy.reserve(keyNum); + copy(keyList.begin(), keyList.end(), std::back_inserter(sortBy)); + sortSets.insert(sortBy); + if (sortSets.size() > 20) { + YQL_CLOG(WARN, ProviderYt) << __FUNCTION__ << ": join's preferred_sort can't have more than 20 key combinations"; + return true; + } + } while(next_permutation(keyList.begin(), keyList.end())); + sortSets.insert(keyList); + + return true; + } else { + return false; + } } static TExprNode::TPtr CollectPreferredSortsForEquiJoinOutput(TExprBase join, const TExprNode::TPtr& options, @@ -906,7 +938,7 @@ private: TSet<TVector<TStringBuf>> sortSets = LoadJoinSortSets(*options); size_t collected = 0; for (auto& parent : parentsIt->second) { - if (parent->IsCallable("Sort") && CollectSortSet(*parent, sortSets)) { + if (CollectSortSet(*parent, sortSets)) { ++collected; } } diff --git a/ydb/library/yql/tests/sql/suites/join/group_compact_by.cfg b/ydb/library/yql/tests/sql/suites/join/group_compact_by.cfg new file mode 100644 index 00000000000..7697efeaa29 --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/join/group_compact_by.cfg @@ -0,0 +1 @@ +in Input input8.txt diff --git a/ydb/library/yql/tests/sql/suites/join/group_compact_by.sql b/ydb/library/yql/tests/sql/suites/join/group_compact_by.sql new file mode 100644 index 00000000000..d5f33612dfd --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/join/group_compact_by.sql @@ -0,0 +1,14 @@ +USE plato; + +PRAGMA yt.JoinMergeForce = "1"; +pragma yt.JoinMergeTablesLimit="10"; + +SELECT key1, subkey1 +FROM + ( + SELECT a.key as key1, a.subkey as subkey1 + FROM (SELECT * FROM Input WHERE subkey != "bar") AS a + JOIN (SELECT * FROM Input WHERE subkey != "foo") AS b + ON a.key = b.key AND a.subkey = b.subkey + ) +GROUP COMPACT BY key1, subkey1; diff --git a/ydb/library/yql/tests/sql/suites/join/input8.txt b/ydb/library/yql/tests/sql/suites/join/input8.txt new file mode 100644 index 00000000000..2417d71bb37 --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/join/input8.txt @@ -0,0 +1,4 @@ +{"key"="020";"subkey"="1";"value"="q"}; +{"key"="075";"subkey"="2";"value"="abc"}; +{"key"="150";"subkey"="3";"value"="qzz"}; +{"key"="800";"subkey"="4";"value"="ddd"}; diff --git a/ydb/library/yql/tests/sql/suites/join/input8.txt.attr b/ydb/library/yql/tests/sql/suites/join/input8.txt.attr new file mode 100644 index 00000000000..7ec3f318dcd --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/join/input8.txt.attr @@ -0,0 +1,13 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"String"]]; + ["subkey";["DataType";"String"]]; + ["value";["DataType";"String"]]] + ]; + "SortDirections"=[1;1]; + "SortedBy"=["key";"subkey"]; + "SortedByTypes"=[["DataType";"String"];["DataType";"String"]]; + "SortMembers"=["key";"subkey"]; + } +}
\ No newline at end of file |