diff options
author | mpereskokova <mpereskokova@yandex-team.com> | 2025-06-03 15:52:10 +0300 |
---|---|---|
committer | mpereskokova <mpereskokova@yandex-team.com> | 2025-06-03 17:18:41 +0300 |
commit | f341c9126a1c66f49b841f6ef787ab18bc3d3bf5 (patch) | |
tree | 59f68f1118f69ec0fd34a3e086dd18a5a87d590a | |
parent | b51b122d7876de2baef5405f3c7da3b10cf99b26 (diff) | |
download | ydb-f341c9126a1c66f49b841f6ef787ab18bc3d3bf5.tar.gz |
Save distinct constraint for PruneKeys
commit_hash:c571530f0a74edc44a65da0388e8e4c543121044
10 files changed, 275 insertions, 14 deletions
diff --git a/yql/essentials/ast/yql_constraint.cpp b/yql/essentials/ast/yql_constraint.cpp index 296d2c4f5e6..54ef1f44916 100644 --- a/yql/essentials/ast/yql_constraint.cpp +++ b/yql/essentials/ast/yql_constraint.cpp @@ -986,6 +986,7 @@ TUniqueConstraintNodeBase<Distinct>::DedupSets(TContentType&& sets) { if (ot->size() < it->size() && std::all_of(ot->cbegin(), ot->cend(), [it](const TConstraintWithFieldsNode::TSetType& set) { return it->contains(set); })) { it = sets.erase(it); found = true; + break; } else ++it; } diff --git a/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp b/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp index 3f62f755562..0c9d875b95e 100644 --- a/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp +++ b/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp @@ -2839,7 +2839,7 @@ TExprNode::TPtr ExpandPruneKeys(const TExprNode::TPtr& input, TExprContext& ctx, YQL_CLOG(DEBUG, CorePeepHole) << "Expand " << input->Content(); if (type->GetKind() == ETypeAnnotationKind::List) { - return ctx.Builder(input->Pos()) + return KeepUniqueDistinct(ctx.Builder(input->Pos()) .Callable("CombineByKey") .Add(0, input->HeadPtr()) .Lambda(1) // preMap @@ -2853,7 +2853,7 @@ TExprNode::TPtr ExpandPruneKeys(const TExprNode::TPtr& input, TExprContext& ctx, .Add(4, updateHandler) .Add(5, finishHandler) .Seal() - .Build(); + .Build(), *input, ctx); } else { // Slight copy of GetDictionaryKeyTypes to check if keyExtractorLambda result type is complicated // mkql CombineCore supports only simple types; for others we should add pickling @@ -2888,7 +2888,7 @@ TExprNode::TPtr ExpandPruneKeys(const TExprNode::TPtr& input, TExprContext& ctx, .Build(); } - return ctx.Builder(input->Pos()) + return KeepUniqueDistinct(ctx.Builder(input->Pos()) .Callable("CombineCore") .Add(0, input->HeadPtr()) .Add(1, keyExtractorLambda) @@ -2897,7 +2897,7 @@ TExprNode::TPtr ExpandPruneKeys(const TExprNode::TPtr& input, TExprContext& ctx, .Add(4, finishHandler) .Atom(5, ToString(typesCtx.PruneKeysMemLimit)) .Seal() - .Build(); + .Build(), *input, ctx); } } diff --git a/yql/essentials/core/ut/yql_expr_constraint_ut.cpp b/yql/essentials/core/ut/yql_expr_constraint_ut.cpp index a4145b9c5a8..cf5787887b9 100644 --- a/yql/essentials/core/ut/yql_expr_constraint_ut.cpp +++ b/yql/essentials/core/ut/yql_expr_constraint_ut.cpp @@ -71,6 +71,134 @@ Y_UNIT_TEST_SUITE(TYqlExprConstraints) { } } + Y_UNIT_TEST(PruneAdjacentKeysAddUniqueDistinct) { + const auto s = R"(( + (let res (DataSink 'result)) + (let list (AsList + (AsStruct '('"a" (Nothing (OptionalType (DataType 'Int32)))) '('"b" (Int32 '"3"))) + (AsStruct '('"a" (Nothing (OptionalType (DataType 'Int32)))) '('"b" (Int32 '"4"))) + (AsStruct '('"a" (Int32 '"1")) '('"b" (Int32 '"3"))) + )) + (let pruned (PruneAdjacentKeys list (lambda '(item) (Member item '"a")))) + (let world (Write! world res (Key) pruned '())) + (let world (Commit! world res)) + (return world) + ))"; + + TExprContext exprCtx; + const auto exprRoot = ParseAndAnnotate(s, exprCtx); + CheckConstraint<TUniqueConstraintNode>(exprRoot, "PruneAdjacentKeys", "Unique((a))"); + CheckConstraint<TDistinctConstraintNode>(exprRoot, "PruneAdjacentKeys", "Distinct((a))"); + } + + Y_UNIT_TEST(PruneAdjacentKeysAddUniqueDistinctForAlreadyUniqueDistinct) { + const auto s = R"(( + (let res (DataSink 'result)) + (let list (AsList + (AsStruct '('"a" (Nothing (OptionalType (DataType 'Int32)))) '('"b" (Int32 '"3"))) + (AsStruct '('"a" (Nothing (OptionalType (DataType 'Int32)))) '('"b" (Int32 '"4"))) + (AsStruct '('"a" (Int32 '"1")) '('"b" (Int32 '"5"))) + )) + (let list (AssumeUnique list '('b))) + (let list (AssumeDistinct list '('b))) + (let pruned (PruneAdjacentKeys list (lambda '(item) (Member item '"a")))) + (let world (Write! world res (Key) pruned '())) + (let world (Commit! world res)) + (return world) + ))"; + + TExprContext exprCtx; + const auto exprRoot = ParseAndAnnotate(s, exprCtx); + CheckConstraint<TUniqueConstraintNode>(exprRoot, "PruneAdjacentKeys", "Unique((a)(b))"); + CheckConstraint<TDistinctConstraintNode>(exprRoot, "PruneAdjacentKeys", "Distinct((a)(b))"); + } + + Y_UNIT_TEST(PruneAdjacentKeysAddUniqueDistinctForAlreadyUniqueDistinctYetAnother) { + const auto s = R"(( + (let res (DataSink 'result)) + (let list (AsList + (AsStruct '('"a" (Nothing (OptionalType (DataType 'Int32)))) '('"b" (Int32 '"3"))) + (AsStruct '('"a" (Int32 '"0")) '('"b" (Int32 '"4"))) + (AsStruct '('"a" (Int32 '"1")) '('"b" (Int32 '"5"))) + )) + (let list (AssumeUnique list '('a 'b))) + (let list (AssumeDistinct list '('a 'b))) + (let pruned (PruneAdjacentKeys list (lambda '(item) (Member item '"a")))) + (let world (Write! world res (Key) pruned '())) + (let world (Commit! world res)) + (return world) + ))"; + + TExprContext exprCtx; + const auto exprRoot = ParseAndAnnotate(s, exprCtx); + CheckConstraint<TUniqueConstraintNode>(exprRoot, "PruneAdjacentKeys", "Unique((a))"); + CheckConstraint<TDistinctConstraintNode>(exprRoot, "PruneAdjacentKeys", "Distinct((a))"); + } + + Y_UNIT_TEST(PruneAdjacentKeysForTupleAddUniqueDistinct) { + const auto s = R"(( + (let res (DataSink 'result)) + (let list (AsList + (AsStruct '('"a" (Nothing (OptionalType (DataType 'Int32)))) '('"b" (Int32 '"3"))) + (AsStruct '('"a" (Nothing (OptionalType (DataType 'Int32)))) '('"b" (Int32 '"4"))) + (AsStruct '('"a" (Int32 '"1")) '('"b" (Int32 '"3"))) + )) + (let pruned (PruneAdjacentKeys list (lambda '(item) '((Member item '"a") (Member item '"b"))))) + (let world (Write! world res (Key) pruned '())) + (let world (Commit! world res)) + (return world) + ))"; + + TExprContext exprCtx; + const auto exprRoot = ParseAndAnnotate(s, exprCtx); + CheckConstraint<TUniqueConstraintNode>(exprRoot, "PruneAdjacentKeys", "Unique((a,b))"); + CheckConstraint<TDistinctConstraintNode>(exprRoot, "PruneAdjacentKeys", "Distinct((a,b))"); + } + + Y_UNIT_TEST(PruneAdjacentKeysForTupleAddUniqueDistinctForAlreadyUniqueDistinct) { + const auto s = R"(( + (let res (DataSink 'result)) + (let list (AsList + (AsStruct '('"a" (Nothing (OptionalType (DataType 'Int32)))) '('"b" (Int32 '"3"))) + (AsStruct '('"a" (Nothing (OptionalType (DataType 'Int32)))) '('"b" (Int32 '"4"))) + (AsStruct '('"a" (Int32 '"1")) '('"b" (Int32 '"5"))) + )) + (let list (AssumeUnique list '('b))) + (let list (AssumeDistinct list '('b))) + (let pruned (PruneAdjacentKeys list (lambda '(item) '((Member item '"a") (Member item '"b"))))) + (let world (Write! world res (Key) pruned '())) + (let world (Commit! world res)) + (return world) + ))"; + + TExprContext exprCtx; + const auto exprRoot = ParseAndAnnotate(s, exprCtx); + CheckConstraint<TUniqueConstraintNode>(exprRoot, "PruneAdjacentKeys", "Unique((b))"); + CheckConstraint<TDistinctConstraintNode>(exprRoot, "PruneAdjacentKeys", "Distinct((b))"); + } + + Y_UNIT_TEST(PruneAdjacentKeysForTupleAddUniqueDistinctForAlreadyUniqueDistinctYetAnother) { + const auto s = R"(( + (let res (DataSink 'result)) + (let list (AsList + (AsStruct '('"a" (Nothing (OptionalType (DataType 'Int32)))) '('"b" (Int32 '"3"))) + (AsStruct '('"a" (Int32 '"0")) '('"b" (Int32 '"4"))) + (AsStruct '('"a" (Int32 '"1")) '('"b" (Int32 '"5"))) + )) + (let list (AssumeUnique list '('a 'b))) + (let list (AssumeDistinct list '('a 'b))) + (let pruned (PruneAdjacentKeys list (lambda '(item) '((Member item '"a") (Member item '"b"))))) + (let world (Write! world res (Key) pruned '())) + (let world (Commit! world res)) + (return world) + ))"; + + TExprContext exprCtx; + const auto exprRoot = ParseAndAnnotate(s, exprCtx); + CheckConstraint<TUniqueConstraintNode>(exprRoot, "PruneAdjacentKeys", "Unique((a,b))"); + CheckConstraint<TDistinctConstraintNode>(exprRoot, "PruneAdjacentKeys", "Distinct((a,b))"); + } + Y_UNIT_TEST(Sort) { const auto s = R"(( (let res (DataSink 'result)) diff --git a/yql/essentials/core/yql_expr_constraint.cpp b/yql/essentials/core/yql_expr_constraint.cpp index 7fcf560a2cf..dede320a721 100644 --- a/yql/essentials/core/yql_expr_constraint.cpp +++ b/yql/essentials/core/yql_expr_constraint.cpp @@ -822,23 +822,54 @@ private: } if constexpr (Adjacent) { - if (const auto status = CopyAllFrom<0>(input, output, ctx); status != TStatus::Ok) { - return status; - } + TSet<TStringBuf> except = { + TUniqueConstraintNode::Name(), + TDistinctConstraintNode::Name(), + }; + CopyExcept(*input, *input->Child(0), except); + + auto uniqueConstraint = input->Child(0)->GetConstraint<TUniqueConstraintNode>(); + auto distinctConstraint = input->Child(0)->GetConstraint<TDistinctConstraintNode>(); TPartOfConstraintBase::TSetType keys = GetPathsToKeys<true>(input->Child(1)->Tail(), input->Child(1)->Head().Head()); - TPartOfConstraintBase::TSetOfSetsType uniqueKeys; - for (const auto& elem : keys) { - uniqueKeys.insert(TPartOfConstraintBase::TSetType{elem}); - } if (!keys.empty()) { - input->AddConstraint(ctx.MakeConstraint<TUniqueConstraintNode>(TUniqueConstraintNode::TContentType{uniqueKeys})); - input->AddConstraint(ctx.MakeConstraint<TDistinctConstraintNode>(TDistinctConstraintNode::TContentType{uniqueKeys})); + TPartOfConstraintBase::TSetOfSetsType uniqueKeys; + for (const auto& elem : keys) { + uniqueKeys.insert(TPartOfConstraintBase::TSetType{elem}); + } + + auto newUniqueConstraint = ctx.MakeConstraint<TUniqueConstraintNode>(TUniqueConstraintNode::TContentType{uniqueKeys}); + if (uniqueConstraint) { + uniqueConstraint = TUniqueConstraintNode::Merge( + newUniqueConstraint, + dynamic_cast<const TUniqueConstraintNode*>(uniqueConstraint), + ctx); + } else { + uniqueConstraint = newUniqueConstraint; + } + + auto newDistinctConstraint = ctx.MakeConstraint<TDistinctConstraintNode>(TDistinctConstraintNode::TContentType{uniqueKeys}); + if (distinctConstraint) { + distinctConstraint = TDistinctConstraintNode::Merge( + newDistinctConstraint, + dynamic_cast<const TDistinctConstraintNode*>(distinctConstraint), + ctx); + } else { + distinctConstraint = newDistinctConstraint; + } + } + + if (uniqueConstraint) { + input->AddConstraint(uniqueConstraint); + } + if (distinctConstraint) { + input->AddConstraint(distinctConstraint); } + return TStatus::Ok; } - return FromFirst<TEmptyConstraintNode>(input, output, ctx); + return FromFirst<TEmptyConstraintNode, TUniqueConstraintNode, TDistinctConstraintNode>(input, output, ctx); } template<class TConstraint> diff --git a/yql/essentials/core/yql_opt_utils.cpp b/yql/essentials/core/yql_opt_utils.cpp index 0b956ea5de6..9c40079c224 100644 --- a/yql/essentials/core/yql_opt_utils.cpp +++ b/yql/essentials/core/yql_opt_utils.cpp @@ -2039,6 +2039,12 @@ TExprNode::TPtr KeepConstraints(TExprNode::TPtr node, const TExprNode& src, TExp return res; } +TExprNode::TPtr KeepUniqueDistinct(TExprNode::TPtr node, const TExprNode& src, TExprContext& ctx) { + auto res = KeepUniqueConstraint<true>(node, src, ctx); + res = KeepUniqueConstraint<false>(std::move(res), src, ctx); + return res; +} + bool HasOnlyOneJoinType(const TExprNode& joinTree, TStringBuf joinType) { if (joinTree.IsAtom()) { return true; diff --git a/yql/essentials/core/yql_opt_utils.h b/yql/essentials/core/yql_opt_utils.h index 447d53737d1..3e5b2090157 100644 --- a/yql/essentials/core/yql_opt_utils.h +++ b/yql/essentials/core/yql_opt_utils.h @@ -153,6 +153,7 @@ bool HasDependsOn(const TExprNode::TPtr& node, const TExprNode::TPtr& arg); TExprNode::TPtr KeepSortedConstraint(TExprNode::TPtr node, const TSortedConstraintNode* sorted, const TTypeAnnotationNode* rowType, TExprContext& ctx); TExprNode::TPtr MakeSortByConstraint(TExprNode::TPtr node, const TSortedConstraintNode* sorted, const TTypeAnnotationNode* rowType, TExprContext& ctx); TExprNode::TPtr KeepConstraints(TExprNode::TPtr node, const TExprNode& src, TExprContext& ctx); +TExprNode::TPtr KeepUniqueDistinct(TExprNode::TPtr node, const TExprNode& src, TExprContext& ctx); bool HasMissingWorlds(const TExprNode::TPtr& node, const TExprNode& src, const TTypeAnnotationContext& types); TExprNode::TPtr KeepWorld(TExprNode::TPtr node, const TExprNode& src, TExprContext& ctx, const TTypeAnnotationContext& types); diff --git a/yql/essentials/tests/sql/minirun/part3/canondata/result.json b/yql/essentials/tests/sql/minirun/part3/canondata/result.json index 0771058e6e6..baf4cd34051 100644 --- a/yql/essentials/tests/sql/minirun/part3/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part3/canondata/result.json @@ -718,6 +718,20 @@ "uri": "https://{canondata_backend}/1936273/7fdc6447866dad56167cedfaa870756d81f351c4/resource.tar.gz#test.test_join-convert_check_key_mem-default.txt-Results_/results.txt" } ], + "test.test[join-prune_keys_YQL-19979-default.txt-Debug]": [ + { + "checksum": "b96a18e5e334ac232f7c5ccebb12f7d8", + "size": 1066, + "uri": "https://{canondata_backend}/1881367/b41bcb116da4fb485fcfebc0eabbd4996673c557/resource.tar.gz#test.test_join-prune_keys_YQL-19979-default.txt-Debug_/opt.yql" + } + ], + "test.test[join-prune_keys_YQL-19979-default.txt-Results]": [ + { + "checksum": "859af0b132df00e7b9fe76b8d6902a89", + "size": 764, + "uri": "https://{canondata_backend}/1881367/b41bcb116da4fb485fcfebc0eabbd4996673c557/resource.tar.gz#test.test_join-prune_keys_YQL-19979-default.txt-Results_/results.txt" + } + ], "test.test[json-json_value/passing-default.txt-Debug]": [ { "checksum": "8dc638061f1f8f0d78d93b6c0e79b845", diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json index d13181fa778..8524b042474 100644 --- a/yql/essentials/tests/sql/sql2yql/canondata/result.json +++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json @@ -3982,6 +3982,13 @@ "uri": "https://{canondata_backend}/1599023/2a161150407124ac83c2566a6542b19a53abfccb/resource.tar.gz#test_sql2yql.test_join-prune_keys_/sql.yql" } ], + "test_sql2yql.test[join-prune_keys_YQL-19979]": [ + { + "checksum": "0dad5d395f90148805e893a30f0b4963", + "size": 3845, + "uri": "https://{canondata_backend}/1942525/94a477066ea16f69d4848bbe524485fc029978b8/resource.tar.gz#test_sql2yql.test_join-prune_keys_YQL-19979_/sql.yql" + } + ], "test_sql2yql.test[join-yql-19192]": [ { "checksum": "fffdf1cbb40643da9daf9bdf3edec121", @@ -10355,6 +10362,11 @@ "uri": "file://test_sql_format.test_join-prune_keys_/formatted.sql" } ], + "test_sql_format.test[join-prune_keys_YQL-19979]": [ + { + "uri": "file://test_sql_format.test_join-prune_keys_YQL-19979_/formatted.sql" + } + ], "test_sql_format.test[join-yql-19192]": [ { "uri": "file://test_sql_format.test_join-yql-19192_/formatted.sql" diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-prune_keys_YQL-19979_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-prune_keys_YQL-19979_/formatted.sql new file mode 100644 index 00000000000..9b5ecce4604 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-prune_keys_YQL-19979_/formatted.sql @@ -0,0 +1,46 @@ +PRAGMA config.flags('OptimizerFlags', 'EmitPruneKeys'); + +$a = ( + SELECT + * + FROM + as_table([ + <|x: 1, t: 1|>, + <|x: 1, t: 1|>, + <|x: 1, t: 2|>, + <|x: 3, t: 1|>, + <|x: 3, t: 4|>, + <|x: 3, t: 2|>, + ]) +); + +$b = ( + SELECT + * + FROM + as_table([ + <|x: 1, y: 1|>, + <|x: 1, y: 2|>, + <|x: 1, y: 3|>, + <|x: 1, y: 3|>, + <|x: 2, y: 3|>, + <|x: 2, y: 4|>, + ]) +); + +$distinct_a = ( + SELECT DISTINCT + * + FROM + $a +); + +SELECT + a.x +FROM ANY + $distinct_a AS a +JOIN ANY + $b AS b +ON + a.x == b.y +; diff --git a/yql/essentials/tests/sql/suites/join/prune_keys_YQL-19979.sql b/yql/essentials/tests/sql/suites/join/prune_keys_YQL-19979.sql new file mode 100644 index 00000000000..0b93ada1906 --- /dev/null +++ b/yql/essentials/tests/sql/suites/join/prune_keys_YQL-19979.sql @@ -0,0 +1,22 @@ +pragma config.flags('OptimizerFlags', 'EmitPruneKeys'); + +$a = select * from as_table([ + <|x:1, t:1|>, + <|x:1, t:1|>, + <|x:1, t:2|>, + <|x:3, t:1|>, + <|x:3, t:4|>, + <|x:3, t:2|>, + ]); + +$b = select * from as_table([ + <|x:1, y:1|>, + <|x:1, y:2|>, + <|x:1, y:3|>, + <|x:1, y:3|>, + <|x:2, y:3|>, + <|x:2, y:4|>, + ]); + +$distinct_a = (select distinct * from $a); +select a.x from any $distinct_a as a join any $b as b on a.x == b.y; |