summaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql
diff options
context:
space:
mode:
authorvitya-smirnov <[email protected]>2025-07-15 17:01:51 +0300
committervitya-smirnov <[email protected]>2025-07-15 17:19:51 +0300
commit64137fb0cbe9afe92dca8efc335ef9ff16b78926 (patch)
tree1f35ca0b330569f9725b3cfc2971a894b65b25f7 /yql/essentials/sql
parent6fb5119e5d0aee18cf78fb28d7f71c9539cca6dc (diff)
YQL-20171: Fix aggregation joining key
There was a bug with a aggregation deduplication by a column at the translator. For a single column the system joining all aggregations using the generic key. The generic key was just a column name without source name what leads to collision when aggregating multiple different sources with same column names. This patch fixes the generic key by adding a data source name there. Also tests are added. commit_hash:1c0a9da512f68c58d2830e096de76b769b733cb2
Diffstat (limited to 'yql/essentials/sql')
-rw-r--r--yql/essentials/sql/v1/aggregation.cpp26
-rw-r--r--yql/essentials/sql/v1/node.cpp4
-rw-r--r--yql/essentials/sql/v1/node.h2
-rw-r--r--yql/essentials/sql/v1/source.cpp8
-rw-r--r--yql/essentials/sql/v1/sql_ut_common.h34
5 files changed, 63 insertions, 11 deletions
diff --git a/yql/essentials/sql/v1/aggregation.cpp b/yql/essentials/sql/v1/aggregation.cpp
index 130ec26d5f7..d7324012c13 100644
--- a/yql/essentials/sql/v1/aggregation.cpp
+++ b/yql/essentials/sql/v1/aggregation.cpp
@@ -714,8 +714,16 @@ public:
{}
private:
- const TString* GetGenericKey() const final {
- return Column_;
+ TMaybe<TString> GetGenericKey() const final {
+ if (!Column_) {
+ return Nothing();
+ }
+
+ TStringBuilder key;
+ if (Source_) {
+ key << *Source_ << ".";
+ }
+ return key << *Column_;
}
void Join(IAggregation* aggr) final {
@@ -736,7 +744,16 @@ private:
}
if (!isFactory) {
- Column_ = exprs.front()->GetColumnName();
+ Source_ = Nothing();
+ Column_ = Nothing();
+
+ const auto& expr = exprs.front();
+ if (const TString* source = expr->GetSourceName()) {
+ Source_ = *source;
+ }
+ if (const TString* column = expr->GetColumnName()) {
+ Column_ = *column;
+ }
}
if (!TAggregationFactory::InitAggr(ctx, isFactory, src, node, isFactory ? TVector<TNodePtr>() : TVector<TNodePtr>(1, exprs.front())))
@@ -828,7 +845,8 @@ private:
TSourcePtr FakeSource_;
std::multimap<TString, TNodePtr> Percentiles_;
TNodePtr FactoryPercentile_;
- const TString* Column_ = nullptr;
+ TMaybe<TString> Source_;
+ TMaybe<TString> Column_;
};
TAggregationPtr BuildPercentileFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) {
diff --git a/yql/essentials/sql/v1/node.cpp b/yql/essentials/sql/v1/node.cpp
index 84395eb78cc..77acab8813d 100644
--- a/yql/essentials/sql/v1/node.cpp
+++ b/yql/essentials/sql/v1/node.cpp
@@ -1709,8 +1709,8 @@ void IAggregation::DoUpdateState() const {
State_.Set(ENodeState::OverWindowDistinct, AggMode_ == EAggregateMode::OverWindowDistinct);
}
-const TString* IAggregation::GetGenericKey() const {
- return nullptr;
+TMaybe<TString> IAggregation::GetGenericKey() const {
+ return Nothing();
}
void IAggregation::Join(IAggregation*) {
diff --git a/yql/essentials/sql/v1/node.h b/yql/essentials/sql/v1/node.h
index b076d6df6d1..ea10c4a7e3e 100644
--- a/yql/essentials/sql/v1/node.h
+++ b/yql/essentials/sql/v1/node.h
@@ -974,7 +974,7 @@ namespace NSQLTranslationV1 {
void DoUpdateState() const override;
- virtual const TString* GetGenericKey() const;
+ virtual TMaybe<TString> GetGenericKey() const;
virtual bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) = 0;
diff --git a/yql/essentials/sql/v1/source.cpp b/yql/essentials/sql/v1/source.cpp
index 7b313adff13..94ef5c290fd 100644
--- a/yql/essentials/sql/v1/source.cpp
+++ b/yql/essentials/sql/v1/source.cpp
@@ -582,14 +582,14 @@ std::pair<TNodePtr, bool> ISource::BuildAggregation(const TString& label, TConte
std::map<std::pair<bool, TString>, std::vector<IAggregation*>> genericAggrs;
for (const auto& aggr: Aggregations_) {
- if (const auto key = aggr->GetGenericKey()) {
+ if (auto key = aggr->GetGenericKey()) {
genericAggrs[{aggr->IsDistinct(), *key}].emplace_back(aggr.Get());
}
}
- for (const auto& aggr : genericAggrs) {
- for (size_t i = 1U; i < aggr.second.size(); ++i) {
- aggr.second.front()->Join(aggr.second[i]);
+ for (const auto& [_, aggrs] : genericAggrs) {
+ for (size_t i = 1; i < aggrs.size(); ++i) {
+ aggrs.front()->Join(aggrs[i]);
}
}
diff --git a/yql/essentials/sql/v1/sql_ut_common.h b/yql/essentials/sql/v1/sql_ut_common.h
index 89a1cf4c0d0..f0bc669ea34 100644
--- a/yql/essentials/sql/v1/sql_ut_common.h
+++ b/yql/essentials/sql/v1/sql_ut_common.h
@@ -8884,3 +8884,37 @@ Y_UNIT_TEST_SUITE(Crashes) {
UNIT_ASSERT_C(res.IsOk(), res.Issues.ToString());
}
}
+
+Y_UNIT_TEST_SUITE(Aggregation) {
+
+ Y_UNIT_TEST(DeduplicationDistinctSources) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ SELECT Percentile(a.x, 0.50), Percentile(b.x, 0.75)
+ FROM plato.Input1 AS a
+ JOIN plato.Input1 AS b ON a.x == b.x;
+ )sql");
+
+ UNIT_ASSERT_C(res.IsOk(), res.Issues.ToString());
+
+ TWordCountHive count = {{TString("percentile_traits_factory"), 0}};
+ VerifyProgram(res, count);
+
+ UNIT_ASSERT_VALUES_EQUAL(2, count["percentile_traits_factory"]);
+ }
+
+ Y_UNIT_TEST(DeduplicationSameSource) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ SELECT Percentile(a.x, 0.50), Percentile(a.x, 0.75)
+ FROM plato.Input1 AS a
+ JOIN plato.Input1 AS b ON a.x == b.x;
+ )sql");
+
+ UNIT_ASSERT_C(res.IsOk(), res.Issues.ToString());
+
+ TWordCountHive count = {{TString("percentile_traits_factory"), 0}};
+ VerifyProgram(res, count);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, count["percentile_traits_factory"]);
+ }
+
+}