diff options
author | vitya-smirnov <[email protected]> | 2025-07-15 17:01:51 +0300 |
---|---|---|
committer | vitya-smirnov <[email protected]> | 2025-07-15 17:19:51 +0300 |
commit | 64137fb0cbe9afe92dca8efc335ef9ff16b78926 (patch) | |
tree | 1f35ca0b330569f9725b3cfc2971a894b65b25f7 /yql/essentials/sql | |
parent | 6fb5119e5d0aee18cf78fb28d7f71c9539cca6dc (diff) |
YQL-20171: Fix aggregation joining key
There was a bug with a aggregation deduplication
by a column at the translator.
For a single column the system joining all
aggregations using the generic key. The
generic key was just a column name without
source name what leads to collision when
aggregating multiple different sources with
same column names.
This patch fixes the generic key by adding a
data source name there. Also tests are added.
commit_hash:1c0a9da512f68c58d2830e096de76b769b733cb2
Diffstat (limited to 'yql/essentials/sql')
-rw-r--r-- | yql/essentials/sql/v1/aggregation.cpp | 26 | ||||
-rw-r--r-- | yql/essentials/sql/v1/node.cpp | 4 | ||||
-rw-r--r-- | yql/essentials/sql/v1/node.h | 2 | ||||
-rw-r--r-- | yql/essentials/sql/v1/source.cpp | 8 | ||||
-rw-r--r-- | yql/essentials/sql/v1/sql_ut_common.h | 34 |
5 files changed, 63 insertions, 11 deletions
diff --git a/yql/essentials/sql/v1/aggregation.cpp b/yql/essentials/sql/v1/aggregation.cpp index 130ec26d5f7..d7324012c13 100644 --- a/yql/essentials/sql/v1/aggregation.cpp +++ b/yql/essentials/sql/v1/aggregation.cpp @@ -714,8 +714,16 @@ public: {} private: - const TString* GetGenericKey() const final { - return Column_; + TMaybe<TString> GetGenericKey() const final { + if (!Column_) { + return Nothing(); + } + + TStringBuilder key; + if (Source_) { + key << *Source_ << "."; + } + return key << *Column_; } void Join(IAggregation* aggr) final { @@ -736,7 +744,16 @@ private: } if (!isFactory) { - Column_ = exprs.front()->GetColumnName(); + Source_ = Nothing(); + Column_ = Nothing(); + + const auto& expr = exprs.front(); + if (const TString* source = expr->GetSourceName()) { + Source_ = *source; + } + if (const TString* column = expr->GetColumnName()) { + Column_ = *column; + } } if (!TAggregationFactory::InitAggr(ctx, isFactory, src, node, isFactory ? TVector<TNodePtr>() : TVector<TNodePtr>(1, exprs.front()))) @@ -828,7 +845,8 @@ private: TSourcePtr FakeSource_; std::multimap<TString, TNodePtr> Percentiles_; TNodePtr FactoryPercentile_; - const TString* Column_ = nullptr; + TMaybe<TString> Source_; + TMaybe<TString> Column_; }; TAggregationPtr BuildPercentileFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) { diff --git a/yql/essentials/sql/v1/node.cpp b/yql/essentials/sql/v1/node.cpp index 84395eb78cc..77acab8813d 100644 --- a/yql/essentials/sql/v1/node.cpp +++ b/yql/essentials/sql/v1/node.cpp @@ -1709,8 +1709,8 @@ void IAggregation::DoUpdateState() const { State_.Set(ENodeState::OverWindowDistinct, AggMode_ == EAggregateMode::OverWindowDistinct); } -const TString* IAggregation::GetGenericKey() const { - return nullptr; +TMaybe<TString> IAggregation::GetGenericKey() const { + return Nothing(); } void IAggregation::Join(IAggregation*) { diff --git a/yql/essentials/sql/v1/node.h b/yql/essentials/sql/v1/node.h index b076d6df6d1..ea10c4a7e3e 100644 --- a/yql/essentials/sql/v1/node.h +++ b/yql/essentials/sql/v1/node.h @@ -974,7 +974,7 @@ namespace NSQLTranslationV1 { void DoUpdateState() const override; - virtual const TString* GetGenericKey() const; + virtual TMaybe<TString> GetGenericKey() const; virtual bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) = 0; diff --git a/yql/essentials/sql/v1/source.cpp b/yql/essentials/sql/v1/source.cpp index 7b313adff13..94ef5c290fd 100644 --- a/yql/essentials/sql/v1/source.cpp +++ b/yql/essentials/sql/v1/source.cpp @@ -582,14 +582,14 @@ std::pair<TNodePtr, bool> ISource::BuildAggregation(const TString& label, TConte std::map<std::pair<bool, TString>, std::vector<IAggregation*>> genericAggrs; for (const auto& aggr: Aggregations_) { - if (const auto key = aggr->GetGenericKey()) { + if (auto key = aggr->GetGenericKey()) { genericAggrs[{aggr->IsDistinct(), *key}].emplace_back(aggr.Get()); } } - for (const auto& aggr : genericAggrs) { - for (size_t i = 1U; i < aggr.second.size(); ++i) { - aggr.second.front()->Join(aggr.second[i]); + for (const auto& [_, aggrs] : genericAggrs) { + for (size_t i = 1; i < aggrs.size(); ++i) { + aggrs.front()->Join(aggrs[i]); } } diff --git a/yql/essentials/sql/v1/sql_ut_common.h b/yql/essentials/sql/v1/sql_ut_common.h index 89a1cf4c0d0..f0bc669ea34 100644 --- a/yql/essentials/sql/v1/sql_ut_common.h +++ b/yql/essentials/sql/v1/sql_ut_common.h @@ -8884,3 +8884,37 @@ Y_UNIT_TEST_SUITE(Crashes) { UNIT_ASSERT_C(res.IsOk(), res.Issues.ToString()); } } + +Y_UNIT_TEST_SUITE(Aggregation) { + + Y_UNIT_TEST(DeduplicationDistinctSources) { + NYql::TAstParseResult res = SqlToYql(R"sql( + SELECT Percentile(a.x, 0.50), Percentile(b.x, 0.75) + FROM plato.Input1 AS a + JOIN plato.Input1 AS b ON a.x == b.x; + )sql"); + + UNIT_ASSERT_C(res.IsOk(), res.Issues.ToString()); + + TWordCountHive count = {{TString("percentile_traits_factory"), 0}}; + VerifyProgram(res, count); + + UNIT_ASSERT_VALUES_EQUAL(2, count["percentile_traits_factory"]); + } + + Y_UNIT_TEST(DeduplicationSameSource) { + NYql::TAstParseResult res = SqlToYql(R"sql( + SELECT Percentile(a.x, 0.50), Percentile(a.x, 0.75) + FROM plato.Input1 AS a + JOIN plato.Input1 AS b ON a.x == b.x; + )sql"); + + UNIT_ASSERT_C(res.IsOk(), res.Issues.ToString()); + + TWordCountHive count = {{TString("percentile_traits_factory"), 0}}; + VerifyProgram(res, count); + + UNIT_ASSERT_VALUES_EQUAL(1, count["percentile_traits_factory"]); + } + +} |