diff options
author | orlovorlov <orlovorlov@yandex-team.com> | 2024-12-11 22:42:01 +0300 |
---|---|---|
committer | orlovorlov <orlovorlov@yandex-team.com> | 2024-12-11 23:15:01 +0300 |
commit | 9869ab73a159e19a462cd2aa22ef2beb126ba11c (patch) | |
tree | acbaf00ecddf4862c354e4afccc879384d521d34 | |
parent | c00ffbc88ae54c7d7867636eb7f136a67452c04b (diff) | |
download | ydb-9869ab73a159e19a462cd2aa22ef2beb126ba11c.tar.gz |
pragma yt.CompactForDistinct
commit_hash:78c474deede40e17d0afc94e3ad796c2ba3f02d7
10 files changed, 132 insertions, 1 deletions
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json index 9233984d80..47f93caece 100644 --- a/yql/essentials/tests/sql/sql2yql/canondata/result.json +++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json @@ -1833,6 +1833,13 @@ "uri": "https://{canondata_backend}/1937429/434276f26b2857be3c5ad3fdbbf877d2bf775ac5/resource.tar.gz#test_sql2yql.test_aggregate-avg_with_having_/sql.yql" } ], + "test_sql2yql.test[aggregate-compact_distinct]": [ + { + "checksum": "c6aa750c244ef573293be11d07397a54", + "size": 3863, + "uri": "https://{canondata_backend}/1773845/f26b5f394704b0b9c6108eac91165c8420756fb1/resource.tar.gz#test_sql2yql.test_aggregate-compact_distinct_/sql.yql" + } + ], "test_sql2yql.test[aggregate-compare_by]": [ { "checksum": "a9eacab25486aef4e5000b928097e2f7", @@ -2610,6 +2617,13 @@ "uri": "https://{canondata_backend}/1937429/434276f26b2857be3c5ad3fdbbf877d2bf775ac5/resource.tar.gz#test_sql2yql.test_aggregate-native_desc_group_compact_by_/sql.yql" } ], + "test_sql2yql.test[aggregate-no_compact_distinct]": [ + { + "checksum": "7a533d8080711b15f39de0c29270692f", + "size": 3771, + "uri": "https://{canondata_backend}/1899731/184b8df51a2ed58cdacc5419a09c9170dbd7ef88/resource.tar.gz#test_sql2yql.test_aggregate-no_compact_distinct_/sql.yql" + } + ], "test_sql2yql.test[aggregate-null_type]": [ { "checksum": "33f4556d059ae6520e3e360b3a780337", @@ -21259,6 +21273,11 @@ "uri": "file://test_sql_format.test_aggregate-avg_with_having_/formatted.sql" } ], + "test_sql_format.test[aggregate-compact_distinct]": [ + { + "uri": "file://test_sql_format.test_aggregate-compact_distinct_/formatted.sql" + } + ], "test_sql_format.test[aggregate-compare_by]": [ { "uri": "file://test_sql_format.test_aggregate-compare_by_/formatted.sql" @@ -21814,6 +21833,11 @@ "uri": "file://test_sql_format.test_aggregate-native_desc_group_compact_by_/formatted.sql" } ], + "test_sql_format.test[aggregate-no_compact_distinct]": [ + { + "uri": "file://test_sql_format.test_aggregate-no_compact_distinct_/formatted.sql" + } + ], "test_sql_format.test[aggregate-null_type]": [ { "uri": "file://test_sql_format.test_aggregate-null_type_/formatted.sql" diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_aggregate-compact_distinct_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_aggregate-compact_distinct_/formatted.sql new file mode 100644 index 0000000000..c59d082159 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_aggregate-compact_distinct_/formatted.sql @@ -0,0 +1,36 @@ +USE plato; + +PRAGMA AnsiOptionalAs; +PRAGMA yt.CompactForDistinct; + +$x = ( + SELECT + key, + AVG(DISTINCT CAST(subkey AS float)) s + FROM + InputB + GROUP BY + key +); + +$y = ( + SELECT + key, + SUM(CAST(subkey AS float)) s + FROM + InputC + GROUP BY + key +); + +SELECT + x.key, + x.s AS s1, + y.s AS s2 +FROM + $x x +FULL OUTER JOIN + $y y +ON + x.key == y.key +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_aggregate-no_compact_distinct_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_aggregate-no_compact_distinct_/formatted.sql new file mode 100644 index 0000000000..96db5d308f --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_aggregate-no_compact_distinct_/formatted.sql @@ -0,0 +1,35 @@ +USE plato; + +PRAGMA AnsiOptionalAs; + +$x = ( + SELECT + key, + AVG(DISTINCT CAST(subkey AS float)) s + FROM + InputB + GROUP BY + key +); + +$y = ( + SELECT + key, + SUM(CAST(subkey AS float)) s + FROM + InputC + GROUP BY + key +); + +SELECT + x.key, + x.s AS s1, + y.s AS s2 +FROM + $x x +FULL OUTER JOIN + $y y +ON + x.key == y.key +; diff --git a/yql/essentials/tests/sql/suites/aggregate/compact_distinct.cfg b/yql/essentials/tests/sql/suites/aggregate/compact_distinct.cfg new file mode 100644 index 0000000000..711689a5cb --- /dev/null +++ b/yql/essentials/tests/sql/suites/aggregate/compact_distinct.cfg @@ -0,0 +1,3 @@ +providers yt +in InputB input2.txt +in InputC input3.txt diff --git a/yql/essentials/tests/sql/suites/aggregate/compact_distinct.sql b/yql/essentials/tests/sql/suites/aggregate/compact_distinct.sql new file mode 100644 index 0000000000..4176ad60aa --- /dev/null +++ b/yql/essentials/tests/sql/suites/aggregate/compact_distinct.sql @@ -0,0 +1,14 @@ +USE plato; + +pragma AnsiOptionalAs; +pragma yt.CompactForDistinct; + +$x = ( + SELECT key, AVG(DISTINCT Cast(subkey as float)) s FROM InputB GROUP BY key +); + +$y = ( + SELECT key, SUM(Cast(subkey as float)) s FROM InputC GROUP BY key +); + +SELECT x.key, x.s AS s1, y.s AS s2 FROM $x x FULL OUTER JOIN $y y ON x.key = y.key; diff --git a/yql/essentials/tests/sql/suites/aggregate/no_compact_distinct.cfg b/yql/essentials/tests/sql/suites/aggregate/no_compact_distinct.cfg new file mode 100644 index 0000000000..711689a5cb --- /dev/null +++ b/yql/essentials/tests/sql/suites/aggregate/no_compact_distinct.cfg @@ -0,0 +1,3 @@ +providers yt +in InputB input2.txt +in InputC input3.txt diff --git a/yql/essentials/tests/sql/suites/aggregate/no_compact_distinct.sql b/yql/essentials/tests/sql/suites/aggregate/no_compact_distinct.sql new file mode 100644 index 0000000000..757c9d55cc --- /dev/null +++ b/yql/essentials/tests/sql/suites/aggregate/no_compact_distinct.sql @@ -0,0 +1,13 @@ +USE plato; + +pragma AnsiOptionalAs; + +$x = ( + SELECT key, AVG(DISTINCT Cast(subkey as float)) s FROM InputB GROUP BY key +); + +$y = ( + SELECT key, SUM(Cast(subkey as float)) s FROM InputC GROUP BY key +); + +SELECT x.key, x.s AS s1, y.s AS s2 FROM $x x FULL OUTER JOIN $y y ON x.key = y.key; diff --git a/yt/yql/providers/yt/common/yql_yt_settings.cpp b/yt/yql/providers/yt/common/yql_yt_settings.cpp index d74a0e6044..c6ea9bc3a3 100644 --- a/yt/yql/providers/yt/common/yql_yt_settings.cpp +++ b/yt/yql/providers/yt/common/yql_yt_settings.cpp @@ -510,6 +510,7 @@ TYtConfiguration::TYtConfiguration(TTypeAnnotationContext& typeCtx) REGISTER_SETTING(*this, ExtendedStatsMaxChunkCount); REGISTER_SETTING(*this, JobBlockInput); REGISTER_SETTING(*this, _EnableYtDqProcessWriteConstraints); + REGISTER_SETTING(*this, CompactForDistinct); } EReleaseTempDataMode GetReleaseTempDataMode(const TYtSettings& settings) { diff --git a/yt/yql/providers/yt/common/yql_yt_settings.h b/yt/yql/providers/yt/common/yql_yt_settings.h index 724f9fb7f3..27378b3ddb 100644 --- a/yt/yql/providers/yt/common/yql_yt_settings.h +++ b/yt/yql/providers/yt/common/yql_yt_settings.h @@ -287,6 +287,7 @@ struct TYtSettings { NCommon::TConfSetting<ui64, false> ExtendedStatsMaxChunkCount; NCommon::TConfSetting<bool, false> JobBlockInput; NCommon::TConfSetting<bool, false> _EnableYtDqProcessWriteConstraints; + NCommon::TConfSetting<bool, false> CompactForDistinct; }; EReleaseTempDataMode GetReleaseTempDataMode(const TYtSettings& settings); diff --git a/yt/yql/providers/yt/provider/yql_yt_logical_optimize.cpp b/yt/yql/providers/yt/provider/yql_yt_logical_optimize.cpp index 8577696de2..55c3956a7c 100644 --- a/yt/yql/providers/yt/provider/yql_yt_logical_optimize.cpp +++ b/yt/yql/providers/yt/provider/yql_yt_logical_optimize.cpp @@ -354,7 +354,8 @@ protected: auto usePhases = State_->Configuration->UseAggPhases.Get().GetOrElse(false); auto usePartitionsByKeys = State_->Configuration->UsePartitionsByKeysForFinalAgg.Get().GetOrElse(true); - TAggregateExpander aggExpander(usePartitionsByKeys, false, node.Ptr(), ctx, *State_->Types, false, false, + auto compactForDistinct = State_->Configuration->CompactForDistinct.Get().GetOrElse(false); + TAggregateExpander aggExpander(usePartitionsByKeys, false, node.Ptr(), ctx, *State_->Types, false, compactForDistinct, usePhases, State_->Types->UseBlocks || State_->Types->BlockEngineMode == EBlockEngineMode::Force); return aggExpander.ExpandAggregate(); } |