diff options
author | kndrvt <[email protected]> | 2025-05-27 12:21:32 +0300 |
---|---|---|
committer | kndrvt <[email protected]> | 2025-05-27 12:35:09 +0300 |
commit | 66037646e2c2d277e0406a6b23eca63b62357916 (patch) | |
tree | 3845fbb8be3a9b91aeaa892af793aafa013a7ff2 | |
parent | 0c00e886d1f9ca57d86722925b12be1e125f878c (diff) |
YQL-17269: add DISTINCT to UNION
commit_hash:d002e9690bd7cbd1874fdbfe454c9f7a00256839
9 files changed, 74 insertions, 12 deletions
diff --git a/yql/essentials/sql/v1/SQLv1.g.in b/yql/essentials/sql/v1/SQLv1.g.in index 6ba91358316..ed04c337507 100644 --- a/yql/essentials/sql/v1/SQLv1.g.in +++ b/yql/essentials/sql/v1/SQLv1.g.in @@ -371,7 +371,7 @@ select_unparenthesized_stmt: select_kind_partial (select_op select_kind_parenthe select_kind_parenthesis: select_kind_partial | LPAREN select_kind_partial RPAREN; -select_op: UNION (ALL)? | INTERSECT | EXCEPT; +select_op: (UNION | INTERSECT | EXCEPT) (DISTINCT | ALL)?; select_kind_partial: select_kind (LIMIT expr ((OFFSET | COMMA) expr)?)? diff --git a/yql/essentials/sql/v1/SQLv1Antlr4.g.in b/yql/essentials/sql/v1/SQLv1Antlr4.g.in index 66be65e2aee..6bfdf3820f7 100644 --- a/yql/essentials/sql/v1/SQLv1Antlr4.g.in +++ b/yql/essentials/sql/v1/SQLv1Antlr4.g.in @@ -370,7 +370,7 @@ select_unparenthesized_stmt: select_kind_partial (select_op select_kind_parenthe select_kind_parenthesis: select_kind_partial | LPAREN select_kind_partial RPAREN; -select_op: UNION (ALL)? | INTERSECT | EXCEPT; +select_op: (UNION | INTERSECT | EXCEPT) (DISTINCT | ALL)?; select_kind_partial: select_kind (LIMIT expr ((OFFSET | COMMA) expr)?)? diff --git a/yql/essentials/sql/v1/format/sql_format_ut.h b/yql/essentials/sql/v1/format/sql_format_ut.h index 8bb2af27939..0a9726aa64d 100644 --- a/yql/essentials/sql/v1/format/sql_format_ut.h +++ b/yql/essentials/sql/v1/format/sql_format_ut.h @@ -1534,6 +1534,8 @@ Y_UNIT_TEST(Union) { "SELECT\n\t1\nUNION ALL\nSELECT\n\t2\nUNION\nSELECT\n\t3\nUNION ALL\nSELECT\n\t4\nUNION\nSELECT\n\t5\n;\n"}, {"select 1 union all (select 2)", "SELECT\n\t1\nUNION ALL\n(\n\tSELECT\n\t\t2\n);\n"}, + {"select 1 union distinct select 2 union select 3 union distinct select 4 union select 5", + "SELECT\n\t1\nUNION DISTINCT\nSELECT\n\t2\nUNION\nSELECT\n\t3\nUNION DISTINCT\nSELECT\n\t4\nUNION\nSELECT\n\t5\n;\n"}, }; TSetup setup; diff --git a/yql/essentials/sql/v1/sql_select.cpp b/yql/essentials/sql/v1/sql_select.cpp index 52dd399a990..8e11721c84f 100644 --- a/yql/essentials/sql/v1/sql_select.cpp +++ b/yql/essentials/sql/v1/sql_select.cpp @@ -1388,18 +1388,28 @@ TSourcePtr TSqlSelect::Build(const TRule& node, TPosition pos, TSelectKindResult outermostSettings.Label = next.Settings.Label; } - switch (b.GetRule_select_op1().Alt_case()) { - case TRule_select_op::kAltSelectOp1: - break; - case TRule_select_op::kAltSelectOp2: - case TRule_select_op::kAltSelectOp3: - Ctx.Error() << "INTERSECT and EXCEPT are not implemented yet"; - return nullptr; - case TRule_select_op::ALT_NOT_SET: - Y_ABORT("You should change implementation according to grammar changes"); + auto selectOp = b.GetRule_select_op1(); + const TString token = ToLowerUTF8(Token(selectOp.GetToken1())); + if (token == "union") { + // nothing + } else if (token == "intersect" || token == "except") { + Ctx.Error() << "INTERSECT and EXCEPT are not implemented yet"; + return nullptr; + } else { + Y_ABORT("You should change implementation according to grammar changes. Invalid token: %s", token.c_str()); } - const bool quantifier = b.GetRule_select_op1().GetAlt_select_op1().HasBlock2(); + bool quantifier = false; + if (selectOp.HasBlock2()) { + const TString token = ToLowerUTF8(Token(selectOp.GetBlock2().GetToken1())); + if (token == "all") { + quantifier = true; + } else if (token == "distinct") { + // nothing + } else { + Y_ABORT("You should change implementation according to grammar changes. Invalid token: %s", token.c_str()); + } + } if (!second && quantifier != currentQuantifier) { auto source = BuildUnion(pos, std::move(sources), currentQuantifier, {}); diff --git a/yql/essentials/sql/v1/sql_ut_common.h b/yql/essentials/sql/v1/sql_ut_common.h index 15ef58b8d86..cead4e2799e 100644 --- a/yql/essentials/sql/v1/sql_ut_common.h +++ b/yql/essentials/sql/v1/sql_ut_common.h @@ -1714,6 +1714,15 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) { UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Union"]); } + Y_UNIT_TEST(UnionDistinctTest) { + NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input UNION DISTINCT select subkey FROM plato.Input;"); + UNIT_ASSERT(res.Root); + + TWordCountHive elementStat = {{TString("Union"), 0}}; + VerifyProgram(res, elementStat, {}); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Union"]); + } + Y_UNIT_TEST(UnionAggregationTest) { NYql::TAstParseResult res = SqlToYql(R"( PRAGMA DisableEmitUnionMerge; diff --git a/yql/essentials/tests/sql/minirun/part6/canondata/result.json b/yql/essentials/tests/sql/minirun/part6/canondata/result.json index 74fb98a7cc1..b31a0804b64 100644 --- a/yql/essentials/tests/sql/minirun/part6/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part6/canondata/result.json @@ -1449,6 +1449,20 @@ "uri": "https://{canondata_backend}/995452/27fc70b9589f65bcb911832f0d505bea9e66db7f/resource.tar.gz#test.test_udf-named_args--Results_/results.txt" } ], + "test.test[union-union_distinct-default.txt-Debug]": [ + { + "checksum": "4e68e4a9a59a12794bcae33aa5d639fa", + "size": 297, + "uri": "https://{canondata_backend}/1937492/e480305ecff3ccfe7cdc193e8c9dcc11c255e2c8/resource.tar.gz#test.test_union-union_distinct-default.txt-Debug_/opt.yql" + } + ], + "test.test[union-union_distinct-default.txt-Results]": [ + { + "checksum": "c5237daf2d6c78619bf209e9a39b7a12", + "size": 976, + "uri": "https://{canondata_backend}/1937492/e480305ecff3ccfe7cdc193e8c9dcc11c255e2c8/resource.tar.gz#test.test_union-union_distinct-default.txt-Results_/results.txt" + } + ], "test.test[window-rank/nulls_legacy-default.txt-Debug]": [ { "checksum": "fe3da55ac0b8ab732b49a5d8760d80db", diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json index 920aafb870e..d13181fa778 100644 --- a/yql/essentials/tests/sql/sql2yql/canondata/result.json +++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json @@ -7286,6 +7286,13 @@ "uri": "https://{canondata_backend}/1775319/f1fa0c55bf9f13cff57cf1c990c2330caed8eb1b/resource.tar.gz#test_sql2yql.test_union-union_column_extention_/sql.yql" } ], + "test_sql2yql.test[union-union_distinct]": [ + { + "checksum": "fc0bf00619910dc8863d1e4982c07065", + "size": 1676, + "uri": "https://{canondata_backend}/1925842/1b6f19c4d4916f84ec5d150703910a73500e178a/resource.tar.gz#test_sql2yql.test_union-union_distinct_/sql.yql" + } + ], "test_sql2yql.test[union-union_mix]": [ { "checksum": "a4681f5145adcca6d4a4af7c5e164d73", @@ -11253,6 +11260,11 @@ "uri": "file://test_sql_format.test_union-union_column_extention_/formatted.sql" } ], + "test_sql_format.test[union-union_distinct]": [ + { + "uri": "file://test_sql_format.test_union-union_distinct_/formatted.sql" + } + ], "test_sql_format.test[union-union_mix]": [ { "uri": "file://test_sql_format.test_union-union_mix_/formatted.sql" diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_union-union_distinct_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_union-union_distinct_/formatted.sql new file mode 100644 index 00000000000..c3d0605b489 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_union-union_distinct_/formatted.sql @@ -0,0 +1,8 @@ +SELECT + 1 AS x, + 2 AS y +UNION DISTINCT +SELECT + 1 AS x, + 2 AS y +; diff --git a/yql/essentials/tests/sql/suites/union/union_distinct.sql b/yql/essentials/tests/sql/suites/union/union_distinct.sql new file mode 100644 index 00000000000..595d54c7284 --- /dev/null +++ b/yql/essentials/tests/sql/suites/union/union_distinct.sql @@ -0,0 +1,7 @@ +SELECT + 1 as x, + 2 as y +UNION DISTINCT +SELECT + 1 as x, + 2 as y; |