summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkndrvt <[email protected]>2025-05-27 12:21:32 +0300
committerkndrvt <[email protected]>2025-05-27 12:35:09 +0300
commit66037646e2c2d277e0406a6b23eca63b62357916 (patch)
tree3845fbb8be3a9b91aeaa892af793aafa013a7ff2
parent0c00e886d1f9ca57d86722925b12be1e125f878c (diff)
YQL-17269: add DISTINCT to UNION
commit_hash:d002e9690bd7cbd1874fdbfe454c9f7a00256839
-rw-r--r--yql/essentials/sql/v1/SQLv1.g.in2
-rw-r--r--yql/essentials/sql/v1/SQLv1Antlr4.g.in2
-rw-r--r--yql/essentials/sql/v1/format/sql_format_ut.h2
-rw-r--r--yql/essentials/sql/v1/sql_select.cpp30
-rw-r--r--yql/essentials/sql/v1/sql_ut_common.h9
-rw-r--r--yql/essentials/tests/sql/minirun/part6/canondata/result.json14
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/result.json12
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_union-union_distinct_/formatted.sql8
-rw-r--r--yql/essentials/tests/sql/suites/union/union_distinct.sql7
9 files changed, 74 insertions, 12 deletions
diff --git a/yql/essentials/sql/v1/SQLv1.g.in b/yql/essentials/sql/v1/SQLv1.g.in
index 6ba91358316..ed04c337507 100644
--- a/yql/essentials/sql/v1/SQLv1.g.in
+++ b/yql/essentials/sql/v1/SQLv1.g.in
@@ -371,7 +371,7 @@ select_unparenthesized_stmt: select_kind_partial (select_op select_kind_parenthe
select_kind_parenthesis: select_kind_partial | LPAREN select_kind_partial RPAREN;
-select_op: UNION (ALL)? | INTERSECT | EXCEPT;
+select_op: (UNION | INTERSECT | EXCEPT) (DISTINCT | ALL)?;
select_kind_partial: select_kind
(LIMIT expr ((OFFSET | COMMA) expr)?)?
diff --git a/yql/essentials/sql/v1/SQLv1Antlr4.g.in b/yql/essentials/sql/v1/SQLv1Antlr4.g.in
index 66be65e2aee..6bfdf3820f7 100644
--- a/yql/essentials/sql/v1/SQLv1Antlr4.g.in
+++ b/yql/essentials/sql/v1/SQLv1Antlr4.g.in
@@ -370,7 +370,7 @@ select_unparenthesized_stmt: select_kind_partial (select_op select_kind_parenthe
select_kind_parenthesis: select_kind_partial | LPAREN select_kind_partial RPAREN;
-select_op: UNION (ALL)? | INTERSECT | EXCEPT;
+select_op: (UNION | INTERSECT | EXCEPT) (DISTINCT | ALL)?;
select_kind_partial: select_kind
(LIMIT expr ((OFFSET | COMMA) expr)?)?
diff --git a/yql/essentials/sql/v1/format/sql_format_ut.h b/yql/essentials/sql/v1/format/sql_format_ut.h
index 8bb2af27939..0a9726aa64d 100644
--- a/yql/essentials/sql/v1/format/sql_format_ut.h
+++ b/yql/essentials/sql/v1/format/sql_format_ut.h
@@ -1534,6 +1534,8 @@ Y_UNIT_TEST(Union) {
"SELECT\n\t1\nUNION ALL\nSELECT\n\t2\nUNION\nSELECT\n\t3\nUNION ALL\nSELECT\n\t4\nUNION\nSELECT\n\t5\n;\n"},
{"select 1 union all (select 2)",
"SELECT\n\t1\nUNION ALL\n(\n\tSELECT\n\t\t2\n);\n"},
+ {"select 1 union distinct select 2 union select 3 union distinct select 4 union select 5",
+ "SELECT\n\t1\nUNION DISTINCT\nSELECT\n\t2\nUNION\nSELECT\n\t3\nUNION DISTINCT\nSELECT\n\t4\nUNION\nSELECT\n\t5\n;\n"},
};
TSetup setup;
diff --git a/yql/essentials/sql/v1/sql_select.cpp b/yql/essentials/sql/v1/sql_select.cpp
index 52dd399a990..8e11721c84f 100644
--- a/yql/essentials/sql/v1/sql_select.cpp
+++ b/yql/essentials/sql/v1/sql_select.cpp
@@ -1388,18 +1388,28 @@ TSourcePtr TSqlSelect::Build(const TRule& node, TPosition pos, TSelectKindResult
outermostSettings.Label = next.Settings.Label;
}
- switch (b.GetRule_select_op1().Alt_case()) {
- case TRule_select_op::kAltSelectOp1:
- break;
- case TRule_select_op::kAltSelectOp2:
- case TRule_select_op::kAltSelectOp3:
- Ctx.Error() << "INTERSECT and EXCEPT are not implemented yet";
- return nullptr;
- case TRule_select_op::ALT_NOT_SET:
- Y_ABORT("You should change implementation according to grammar changes");
+ auto selectOp = b.GetRule_select_op1();
+ const TString token = ToLowerUTF8(Token(selectOp.GetToken1()));
+ if (token == "union") {
+ // nothing
+ } else if (token == "intersect" || token == "except") {
+ Ctx.Error() << "INTERSECT and EXCEPT are not implemented yet";
+ return nullptr;
+ } else {
+ Y_ABORT("You should change implementation according to grammar changes. Invalid token: %s", token.c_str());
}
- const bool quantifier = b.GetRule_select_op1().GetAlt_select_op1().HasBlock2();
+ bool quantifier = false;
+ if (selectOp.HasBlock2()) {
+ const TString token = ToLowerUTF8(Token(selectOp.GetBlock2().GetToken1()));
+ if (token == "all") {
+ quantifier = true;
+ } else if (token == "distinct") {
+ // nothing
+ } else {
+ Y_ABORT("You should change implementation according to grammar changes. Invalid token: %s", token.c_str());
+ }
+ }
if (!second && quantifier != currentQuantifier) {
auto source = BuildUnion(pos, std::move(sources), currentQuantifier, {});
diff --git a/yql/essentials/sql/v1/sql_ut_common.h b/yql/essentials/sql/v1/sql_ut_common.h
index 15ef58b8d86..cead4e2799e 100644
--- a/yql/essentials/sql/v1/sql_ut_common.h
+++ b/yql/essentials/sql/v1/sql_ut_common.h
@@ -1714,6 +1714,15 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) {
UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Union"]);
}
+ Y_UNIT_TEST(UnionDistinctTest) {
+ NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input UNION DISTINCT select subkey FROM plato.Input;");
+ UNIT_ASSERT(res.Root);
+
+ TWordCountHive elementStat = {{TString("Union"), 0}};
+ VerifyProgram(res, elementStat, {});
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Union"]);
+ }
+
Y_UNIT_TEST(UnionAggregationTest) {
NYql::TAstParseResult res = SqlToYql(R"(
PRAGMA DisableEmitUnionMerge;
diff --git a/yql/essentials/tests/sql/minirun/part6/canondata/result.json b/yql/essentials/tests/sql/minirun/part6/canondata/result.json
index 74fb98a7cc1..b31a0804b64 100644
--- a/yql/essentials/tests/sql/minirun/part6/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part6/canondata/result.json
@@ -1449,6 +1449,20 @@
"uri": "https://{canondata_backend}/995452/27fc70b9589f65bcb911832f0d505bea9e66db7f/resource.tar.gz#test.test_udf-named_args--Results_/results.txt"
}
],
+ "test.test[union-union_distinct-default.txt-Debug]": [
+ {
+ "checksum": "4e68e4a9a59a12794bcae33aa5d639fa",
+ "size": 297,
+ "uri": "https://{canondata_backend}/1937492/e480305ecff3ccfe7cdc193e8c9dcc11c255e2c8/resource.tar.gz#test.test_union-union_distinct-default.txt-Debug_/opt.yql"
+ }
+ ],
+ "test.test[union-union_distinct-default.txt-Results]": [
+ {
+ "checksum": "c5237daf2d6c78619bf209e9a39b7a12",
+ "size": 976,
+ "uri": "https://{canondata_backend}/1937492/e480305ecff3ccfe7cdc193e8c9dcc11c255e2c8/resource.tar.gz#test.test_union-union_distinct-default.txt-Results_/results.txt"
+ }
+ ],
"test.test[window-rank/nulls_legacy-default.txt-Debug]": [
{
"checksum": "fe3da55ac0b8ab732b49a5d8760d80db",
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json
index 920aafb870e..d13181fa778 100644
--- a/yql/essentials/tests/sql/sql2yql/canondata/result.json
+++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json
@@ -7286,6 +7286,13 @@
"uri": "https://{canondata_backend}/1775319/f1fa0c55bf9f13cff57cf1c990c2330caed8eb1b/resource.tar.gz#test_sql2yql.test_union-union_column_extention_/sql.yql"
}
],
+ "test_sql2yql.test[union-union_distinct]": [
+ {
+ "checksum": "fc0bf00619910dc8863d1e4982c07065",
+ "size": 1676,
+ "uri": "https://{canondata_backend}/1925842/1b6f19c4d4916f84ec5d150703910a73500e178a/resource.tar.gz#test_sql2yql.test_union-union_distinct_/sql.yql"
+ }
+ ],
"test_sql2yql.test[union-union_mix]": [
{
"checksum": "a4681f5145adcca6d4a4af7c5e164d73",
@@ -11253,6 +11260,11 @@
"uri": "file://test_sql_format.test_union-union_column_extention_/formatted.sql"
}
],
+ "test_sql_format.test[union-union_distinct]": [
+ {
+ "uri": "file://test_sql_format.test_union-union_distinct_/formatted.sql"
+ }
+ ],
"test_sql_format.test[union-union_mix]": [
{
"uri": "file://test_sql_format.test_union-union_mix_/formatted.sql"
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_union-union_distinct_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_union-union_distinct_/formatted.sql
new file mode 100644
index 00000000000..c3d0605b489
--- /dev/null
+++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_union-union_distinct_/formatted.sql
@@ -0,0 +1,8 @@
+SELECT
+ 1 AS x,
+ 2 AS y
+UNION DISTINCT
+SELECT
+ 1 AS x,
+ 2 AS y
+;
diff --git a/yql/essentials/tests/sql/suites/union/union_distinct.sql b/yql/essentials/tests/sql/suites/union/union_distinct.sql
new file mode 100644
index 00000000000..595d54c7284
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/union/union_distinct.sql
@@ -0,0 +1,7 @@
+SELECT
+ 1 as x,
+ 2 as y
+UNION DISTINCT
+SELECT
+ 1 as x,
+ 2 as y;