aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormpereskokova <mpereskokova@yandex-team.com>2025-06-03 15:52:10 +0300
committermpereskokova <mpereskokova@yandex-team.com>2025-06-03 17:18:41 +0300
commitf341c9126a1c66f49b841f6ef787ab18bc3d3bf5 (patch)
tree59f68f1118f69ec0fd34a3e086dd18a5a87d590a
parentb51b122d7876de2baef5405f3c7da3b10cf99b26 (diff)
downloadydb-f341c9126a1c66f49b841f6ef787ab18bc3d3bf5.tar.gz
Save distinct constraint for PruneKeys
commit_hash:c571530f0a74edc44a65da0388e8e4c543121044
-rw-r--r--yql/essentials/ast/yql_constraint.cpp1
-rw-r--r--yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp8
-rw-r--r--yql/essentials/core/ut/yql_expr_constraint_ut.cpp128
-rw-r--r--yql/essentials/core/yql_expr_constraint.cpp51
-rw-r--r--yql/essentials/core/yql_opt_utils.cpp6
-rw-r--r--yql/essentials/core/yql_opt_utils.h1
-rw-r--r--yql/essentials/tests/sql/minirun/part3/canondata/result.json14
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/result.json12
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-prune_keys_YQL-19979_/formatted.sql46
-rw-r--r--yql/essentials/tests/sql/suites/join/prune_keys_YQL-19979.sql22
10 files changed, 275 insertions, 14 deletions
diff --git a/yql/essentials/ast/yql_constraint.cpp b/yql/essentials/ast/yql_constraint.cpp
index 296d2c4f5e6..54ef1f44916 100644
--- a/yql/essentials/ast/yql_constraint.cpp
+++ b/yql/essentials/ast/yql_constraint.cpp
@@ -986,6 +986,7 @@ TUniqueConstraintNodeBase<Distinct>::DedupSets(TContentType&& sets) {
if (ot->size() < it->size() && std::all_of(ot->cbegin(), ot->cend(), [it](const TConstraintWithFieldsNode::TSetType& set) { return it->contains(set); })) {
it = sets.erase(it);
found = true;
+ break;
} else
++it;
}
diff --git a/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp b/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp
index 3f62f755562..0c9d875b95e 100644
--- a/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp
+++ b/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp
@@ -2839,7 +2839,7 @@ TExprNode::TPtr ExpandPruneKeys(const TExprNode::TPtr& input, TExprContext& ctx,
YQL_CLOG(DEBUG, CorePeepHole) << "Expand " << input->Content();
if (type->GetKind() == ETypeAnnotationKind::List) {
- return ctx.Builder(input->Pos())
+ return KeepUniqueDistinct(ctx.Builder(input->Pos())
.Callable("CombineByKey")
.Add(0, input->HeadPtr())
.Lambda(1) // preMap
@@ -2853,7 +2853,7 @@ TExprNode::TPtr ExpandPruneKeys(const TExprNode::TPtr& input, TExprContext& ctx,
.Add(4, updateHandler)
.Add(5, finishHandler)
.Seal()
- .Build();
+ .Build(), *input, ctx);
} else {
// Slight copy of GetDictionaryKeyTypes to check if keyExtractorLambda result type is complicated
// mkql CombineCore supports only simple types; for others we should add pickling
@@ -2888,7 +2888,7 @@ TExprNode::TPtr ExpandPruneKeys(const TExprNode::TPtr& input, TExprContext& ctx,
.Build();
}
- return ctx.Builder(input->Pos())
+ return KeepUniqueDistinct(ctx.Builder(input->Pos())
.Callable("CombineCore")
.Add(0, input->HeadPtr())
.Add(1, keyExtractorLambda)
@@ -2897,7 +2897,7 @@ TExprNode::TPtr ExpandPruneKeys(const TExprNode::TPtr& input, TExprContext& ctx,
.Add(4, finishHandler)
.Atom(5, ToString(typesCtx.PruneKeysMemLimit))
.Seal()
- .Build();
+ .Build(), *input, ctx);
}
}
diff --git a/yql/essentials/core/ut/yql_expr_constraint_ut.cpp b/yql/essentials/core/ut/yql_expr_constraint_ut.cpp
index a4145b9c5a8..cf5787887b9 100644
--- a/yql/essentials/core/ut/yql_expr_constraint_ut.cpp
+++ b/yql/essentials/core/ut/yql_expr_constraint_ut.cpp
@@ -71,6 +71,134 @@ Y_UNIT_TEST_SUITE(TYqlExprConstraints) {
}
}
+ Y_UNIT_TEST(PruneAdjacentKeysAddUniqueDistinct) {
+ const auto s = R"((
+ (let res (DataSink 'result))
+ (let list (AsList
+ (AsStruct '('"a" (Nothing (OptionalType (DataType 'Int32)))) '('"b" (Int32 '"3")))
+ (AsStruct '('"a" (Nothing (OptionalType (DataType 'Int32)))) '('"b" (Int32 '"4")))
+ (AsStruct '('"a" (Int32 '"1")) '('"b" (Int32 '"3")))
+ ))
+ (let pruned (PruneAdjacentKeys list (lambda '(item) (Member item '"a"))))
+ (let world (Write! world res (Key) pruned '()))
+ (let world (Commit! world res))
+ (return world)
+ ))";
+
+ TExprContext exprCtx;
+ const auto exprRoot = ParseAndAnnotate(s, exprCtx);
+ CheckConstraint<TUniqueConstraintNode>(exprRoot, "PruneAdjacentKeys", "Unique((a))");
+ CheckConstraint<TDistinctConstraintNode>(exprRoot, "PruneAdjacentKeys", "Distinct((a))");
+ }
+
+ Y_UNIT_TEST(PruneAdjacentKeysAddUniqueDistinctForAlreadyUniqueDistinct) {
+ const auto s = R"((
+ (let res (DataSink 'result))
+ (let list (AsList
+ (AsStruct '('"a" (Nothing (OptionalType (DataType 'Int32)))) '('"b" (Int32 '"3")))
+ (AsStruct '('"a" (Nothing (OptionalType (DataType 'Int32)))) '('"b" (Int32 '"4")))
+ (AsStruct '('"a" (Int32 '"1")) '('"b" (Int32 '"5")))
+ ))
+ (let list (AssumeUnique list '('b)))
+ (let list (AssumeDistinct list '('b)))
+ (let pruned (PruneAdjacentKeys list (lambda '(item) (Member item '"a"))))
+ (let world (Write! world res (Key) pruned '()))
+ (let world (Commit! world res))
+ (return world)
+ ))";
+
+ TExprContext exprCtx;
+ const auto exprRoot = ParseAndAnnotate(s, exprCtx);
+ CheckConstraint<TUniqueConstraintNode>(exprRoot, "PruneAdjacentKeys", "Unique((a)(b))");
+ CheckConstraint<TDistinctConstraintNode>(exprRoot, "PruneAdjacentKeys", "Distinct((a)(b))");
+ }
+
+ Y_UNIT_TEST(PruneAdjacentKeysAddUniqueDistinctForAlreadyUniqueDistinctYetAnother) {
+ const auto s = R"((
+ (let res (DataSink 'result))
+ (let list (AsList
+ (AsStruct '('"a" (Nothing (OptionalType (DataType 'Int32)))) '('"b" (Int32 '"3")))
+ (AsStruct '('"a" (Int32 '"0")) '('"b" (Int32 '"4")))
+ (AsStruct '('"a" (Int32 '"1")) '('"b" (Int32 '"5")))
+ ))
+ (let list (AssumeUnique list '('a 'b)))
+ (let list (AssumeDistinct list '('a 'b)))
+ (let pruned (PruneAdjacentKeys list (lambda '(item) (Member item '"a"))))
+ (let world (Write! world res (Key) pruned '()))
+ (let world (Commit! world res))
+ (return world)
+ ))";
+
+ TExprContext exprCtx;
+ const auto exprRoot = ParseAndAnnotate(s, exprCtx);
+ CheckConstraint<TUniqueConstraintNode>(exprRoot, "PruneAdjacentKeys", "Unique((a))");
+ CheckConstraint<TDistinctConstraintNode>(exprRoot, "PruneAdjacentKeys", "Distinct((a))");
+ }
+
+ Y_UNIT_TEST(PruneAdjacentKeysForTupleAddUniqueDistinct) {
+ const auto s = R"((
+ (let res (DataSink 'result))
+ (let list (AsList
+ (AsStruct '('"a" (Nothing (OptionalType (DataType 'Int32)))) '('"b" (Int32 '"3")))
+ (AsStruct '('"a" (Nothing (OptionalType (DataType 'Int32)))) '('"b" (Int32 '"4")))
+ (AsStruct '('"a" (Int32 '"1")) '('"b" (Int32 '"3")))
+ ))
+ (let pruned (PruneAdjacentKeys list (lambda '(item) '((Member item '"a") (Member item '"b")))))
+ (let world (Write! world res (Key) pruned '()))
+ (let world (Commit! world res))
+ (return world)
+ ))";
+
+ TExprContext exprCtx;
+ const auto exprRoot = ParseAndAnnotate(s, exprCtx);
+ CheckConstraint<TUniqueConstraintNode>(exprRoot, "PruneAdjacentKeys", "Unique((a,b))");
+ CheckConstraint<TDistinctConstraintNode>(exprRoot, "PruneAdjacentKeys", "Distinct((a,b))");
+ }
+
+ Y_UNIT_TEST(PruneAdjacentKeysForTupleAddUniqueDistinctForAlreadyUniqueDistinct) {
+ const auto s = R"((
+ (let res (DataSink 'result))
+ (let list (AsList
+ (AsStruct '('"a" (Nothing (OptionalType (DataType 'Int32)))) '('"b" (Int32 '"3")))
+ (AsStruct '('"a" (Nothing (OptionalType (DataType 'Int32)))) '('"b" (Int32 '"4")))
+ (AsStruct '('"a" (Int32 '"1")) '('"b" (Int32 '"5")))
+ ))
+ (let list (AssumeUnique list '('b)))
+ (let list (AssumeDistinct list '('b)))
+ (let pruned (PruneAdjacentKeys list (lambda '(item) '((Member item '"a") (Member item '"b")))))
+ (let world (Write! world res (Key) pruned '()))
+ (let world (Commit! world res))
+ (return world)
+ ))";
+
+ TExprContext exprCtx;
+ const auto exprRoot = ParseAndAnnotate(s, exprCtx);
+ CheckConstraint<TUniqueConstraintNode>(exprRoot, "PruneAdjacentKeys", "Unique((b))");
+ CheckConstraint<TDistinctConstraintNode>(exprRoot, "PruneAdjacentKeys", "Distinct((b))");
+ }
+
+ Y_UNIT_TEST(PruneAdjacentKeysForTupleAddUniqueDistinctForAlreadyUniqueDistinctYetAnother) {
+ const auto s = R"((
+ (let res (DataSink 'result))
+ (let list (AsList
+ (AsStruct '('"a" (Nothing (OptionalType (DataType 'Int32)))) '('"b" (Int32 '"3")))
+ (AsStruct '('"a" (Int32 '"0")) '('"b" (Int32 '"4")))
+ (AsStruct '('"a" (Int32 '"1")) '('"b" (Int32 '"5")))
+ ))
+ (let list (AssumeUnique list '('a 'b)))
+ (let list (AssumeDistinct list '('a 'b)))
+ (let pruned (PruneAdjacentKeys list (lambda '(item) '((Member item '"a") (Member item '"b")))))
+ (let world (Write! world res (Key) pruned '()))
+ (let world (Commit! world res))
+ (return world)
+ ))";
+
+ TExprContext exprCtx;
+ const auto exprRoot = ParseAndAnnotate(s, exprCtx);
+ CheckConstraint<TUniqueConstraintNode>(exprRoot, "PruneAdjacentKeys", "Unique((a,b))");
+ CheckConstraint<TDistinctConstraintNode>(exprRoot, "PruneAdjacentKeys", "Distinct((a,b))");
+ }
+
Y_UNIT_TEST(Sort) {
const auto s = R"((
(let res (DataSink 'result))
diff --git a/yql/essentials/core/yql_expr_constraint.cpp b/yql/essentials/core/yql_expr_constraint.cpp
index 7fcf560a2cf..dede320a721 100644
--- a/yql/essentials/core/yql_expr_constraint.cpp
+++ b/yql/essentials/core/yql_expr_constraint.cpp
@@ -822,23 +822,54 @@ private:
}
if constexpr (Adjacent) {
- if (const auto status = CopyAllFrom<0>(input, output, ctx); status != TStatus::Ok) {
- return status;
- }
+ TSet<TStringBuf> except = {
+ TUniqueConstraintNode::Name(),
+ TDistinctConstraintNode::Name(),
+ };
+ CopyExcept(*input, *input->Child(0), except);
+
+ auto uniqueConstraint = input->Child(0)->GetConstraint<TUniqueConstraintNode>();
+ auto distinctConstraint = input->Child(0)->GetConstraint<TDistinctConstraintNode>();
TPartOfConstraintBase::TSetType keys = GetPathsToKeys<true>(input->Child(1)->Tail(), input->Child(1)->Head().Head());
- TPartOfConstraintBase::TSetOfSetsType uniqueKeys;
- for (const auto& elem : keys) {
- uniqueKeys.insert(TPartOfConstraintBase::TSetType{elem});
- }
if (!keys.empty()) {
- input->AddConstraint(ctx.MakeConstraint<TUniqueConstraintNode>(TUniqueConstraintNode::TContentType{uniqueKeys}));
- input->AddConstraint(ctx.MakeConstraint<TDistinctConstraintNode>(TDistinctConstraintNode::TContentType{uniqueKeys}));
+ TPartOfConstraintBase::TSetOfSetsType uniqueKeys;
+ for (const auto& elem : keys) {
+ uniqueKeys.insert(TPartOfConstraintBase::TSetType{elem});
+ }
+
+ auto newUniqueConstraint = ctx.MakeConstraint<TUniqueConstraintNode>(TUniqueConstraintNode::TContentType{uniqueKeys});
+ if (uniqueConstraint) {
+ uniqueConstraint = TUniqueConstraintNode::Merge(
+ newUniqueConstraint,
+ dynamic_cast<const TUniqueConstraintNode*>(uniqueConstraint),
+ ctx);
+ } else {
+ uniqueConstraint = newUniqueConstraint;
+ }
+
+ auto newDistinctConstraint = ctx.MakeConstraint<TDistinctConstraintNode>(TDistinctConstraintNode::TContentType{uniqueKeys});
+ if (distinctConstraint) {
+ distinctConstraint = TDistinctConstraintNode::Merge(
+ newDistinctConstraint,
+ dynamic_cast<const TDistinctConstraintNode*>(distinctConstraint),
+ ctx);
+ } else {
+ distinctConstraint = newDistinctConstraint;
+ }
+ }
+
+ if (uniqueConstraint) {
+ input->AddConstraint(uniqueConstraint);
+ }
+ if (distinctConstraint) {
+ input->AddConstraint(distinctConstraint);
}
+
return TStatus::Ok;
}
- return FromFirst<TEmptyConstraintNode>(input, output, ctx);
+ return FromFirst<TEmptyConstraintNode, TUniqueConstraintNode, TDistinctConstraintNode>(input, output, ctx);
}
template<class TConstraint>
diff --git a/yql/essentials/core/yql_opt_utils.cpp b/yql/essentials/core/yql_opt_utils.cpp
index 0b956ea5de6..9c40079c224 100644
--- a/yql/essentials/core/yql_opt_utils.cpp
+++ b/yql/essentials/core/yql_opt_utils.cpp
@@ -2039,6 +2039,12 @@ TExprNode::TPtr KeepConstraints(TExprNode::TPtr node, const TExprNode& src, TExp
return res;
}
+TExprNode::TPtr KeepUniqueDistinct(TExprNode::TPtr node, const TExprNode& src, TExprContext& ctx) {
+ auto res = KeepUniqueConstraint<true>(node, src, ctx);
+ res = KeepUniqueConstraint<false>(std::move(res), src, ctx);
+ return res;
+}
+
bool HasOnlyOneJoinType(const TExprNode& joinTree, TStringBuf joinType) {
if (joinTree.IsAtom()) {
return true;
diff --git a/yql/essentials/core/yql_opt_utils.h b/yql/essentials/core/yql_opt_utils.h
index 447d53737d1..3e5b2090157 100644
--- a/yql/essentials/core/yql_opt_utils.h
+++ b/yql/essentials/core/yql_opt_utils.h
@@ -153,6 +153,7 @@ bool HasDependsOn(const TExprNode::TPtr& node, const TExprNode::TPtr& arg);
TExprNode::TPtr KeepSortedConstraint(TExprNode::TPtr node, const TSortedConstraintNode* sorted, const TTypeAnnotationNode* rowType, TExprContext& ctx);
TExprNode::TPtr MakeSortByConstraint(TExprNode::TPtr node, const TSortedConstraintNode* sorted, const TTypeAnnotationNode* rowType, TExprContext& ctx);
TExprNode::TPtr KeepConstraints(TExprNode::TPtr node, const TExprNode& src, TExprContext& ctx);
+TExprNode::TPtr KeepUniqueDistinct(TExprNode::TPtr node, const TExprNode& src, TExprContext& ctx);
bool HasMissingWorlds(const TExprNode::TPtr& node, const TExprNode& src, const TTypeAnnotationContext& types);
TExprNode::TPtr KeepWorld(TExprNode::TPtr node, const TExprNode& src, TExprContext& ctx, const TTypeAnnotationContext& types);
diff --git a/yql/essentials/tests/sql/minirun/part3/canondata/result.json b/yql/essentials/tests/sql/minirun/part3/canondata/result.json
index 0771058e6e6..baf4cd34051 100644
--- a/yql/essentials/tests/sql/minirun/part3/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part3/canondata/result.json
@@ -718,6 +718,20 @@
"uri": "https://{canondata_backend}/1936273/7fdc6447866dad56167cedfaa870756d81f351c4/resource.tar.gz#test.test_join-convert_check_key_mem-default.txt-Results_/results.txt"
}
],
+ "test.test[join-prune_keys_YQL-19979-default.txt-Debug]": [
+ {
+ "checksum": "b96a18e5e334ac232f7c5ccebb12f7d8",
+ "size": 1066,
+ "uri": "https://{canondata_backend}/1881367/b41bcb116da4fb485fcfebc0eabbd4996673c557/resource.tar.gz#test.test_join-prune_keys_YQL-19979-default.txt-Debug_/opt.yql"
+ }
+ ],
+ "test.test[join-prune_keys_YQL-19979-default.txt-Results]": [
+ {
+ "checksum": "859af0b132df00e7b9fe76b8d6902a89",
+ "size": 764,
+ "uri": "https://{canondata_backend}/1881367/b41bcb116da4fb485fcfebc0eabbd4996673c557/resource.tar.gz#test.test_join-prune_keys_YQL-19979-default.txt-Results_/results.txt"
+ }
+ ],
"test.test[json-json_value/passing-default.txt-Debug]": [
{
"checksum": "8dc638061f1f8f0d78d93b6c0e79b845",
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json
index d13181fa778..8524b042474 100644
--- a/yql/essentials/tests/sql/sql2yql/canondata/result.json
+++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json
@@ -3982,6 +3982,13 @@
"uri": "https://{canondata_backend}/1599023/2a161150407124ac83c2566a6542b19a53abfccb/resource.tar.gz#test_sql2yql.test_join-prune_keys_/sql.yql"
}
],
+ "test_sql2yql.test[join-prune_keys_YQL-19979]": [
+ {
+ "checksum": "0dad5d395f90148805e893a30f0b4963",
+ "size": 3845,
+ "uri": "https://{canondata_backend}/1942525/94a477066ea16f69d4848bbe524485fc029978b8/resource.tar.gz#test_sql2yql.test_join-prune_keys_YQL-19979_/sql.yql"
+ }
+ ],
"test_sql2yql.test[join-yql-19192]": [
{
"checksum": "fffdf1cbb40643da9daf9bdf3edec121",
@@ -10355,6 +10362,11 @@
"uri": "file://test_sql_format.test_join-prune_keys_/formatted.sql"
}
],
+ "test_sql_format.test[join-prune_keys_YQL-19979]": [
+ {
+ "uri": "file://test_sql_format.test_join-prune_keys_YQL-19979_/formatted.sql"
+ }
+ ],
"test_sql_format.test[join-yql-19192]": [
{
"uri": "file://test_sql_format.test_join-yql-19192_/formatted.sql"
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-prune_keys_YQL-19979_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-prune_keys_YQL-19979_/formatted.sql
new file mode 100644
index 00000000000..9b5ecce4604
--- /dev/null
+++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-prune_keys_YQL-19979_/formatted.sql
@@ -0,0 +1,46 @@
+PRAGMA config.flags('OptimizerFlags', 'EmitPruneKeys');
+
+$a = (
+ SELECT
+ *
+ FROM
+ as_table([
+ <|x: 1, t: 1|>,
+ <|x: 1, t: 1|>,
+ <|x: 1, t: 2|>,
+ <|x: 3, t: 1|>,
+ <|x: 3, t: 4|>,
+ <|x: 3, t: 2|>,
+ ])
+);
+
+$b = (
+ SELECT
+ *
+ FROM
+ as_table([
+ <|x: 1, y: 1|>,
+ <|x: 1, y: 2|>,
+ <|x: 1, y: 3|>,
+ <|x: 1, y: 3|>,
+ <|x: 2, y: 3|>,
+ <|x: 2, y: 4|>,
+ ])
+);
+
+$distinct_a = (
+ SELECT DISTINCT
+ *
+ FROM
+ $a
+);
+
+SELECT
+ a.x
+FROM ANY
+ $distinct_a AS a
+JOIN ANY
+ $b AS b
+ON
+ a.x == b.y
+;
diff --git a/yql/essentials/tests/sql/suites/join/prune_keys_YQL-19979.sql b/yql/essentials/tests/sql/suites/join/prune_keys_YQL-19979.sql
new file mode 100644
index 00000000000..0b93ada1906
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/join/prune_keys_YQL-19979.sql
@@ -0,0 +1,22 @@
+pragma config.flags('OptimizerFlags', 'EmitPruneKeys');
+
+$a = select * from as_table([
+ <|x:1, t:1|>,
+ <|x:1, t:1|>,
+ <|x:1, t:2|>,
+ <|x:3, t:1|>,
+ <|x:3, t:4|>,
+ <|x:3, t:2|>,
+ ]);
+
+$b = select * from as_table([
+ <|x:1, y:1|>,
+ <|x:1, y:2|>,
+ <|x:1, y:3|>,
+ <|x:1, y:3|>,
+ <|x:2, y:3|>,
+ <|x:2, y:4|>,
+ ]);
+
+$distinct_a = (select distinct * from $a);
+select a.x from any $distinct_a as a join any $b as b on a.x == b.y;