Merge pull request #11788 from ydb-platform/mergelibs-241120-1113

Library import 241120-1113
author: Maxim Yurchuk <maxim-yurchuk@ydb.tech> 2024-11-20 17:37:57 +0000
committer: GitHub <noreply@github.com> 2024-11-20 17:37:57 +0000
commit: f76323e9b295c15751e51e3443aa47a36bee8023 (patch)
tree: 4113c8cad473a33e0f746966e0cf087252fa1d7a /yql/essentials/tests/postgresql/original/cases/select_distinct.out
parent: 753ecb8d410a4cb459c26f3a0082fb2d1724fe63 (diff)
parent: a7b9a6afea2a9d7a7bfac4c5eb4c1a8e60adb9e6 (diff)
download: ydb-f76323e9b295c15751e51e3443aa47a36bee8023.tar.gz
1 files changed, 308 insertions, 0 deletions
diff --git a/yql/essentials/tests/postgresql/original/cases/select_distinct.out b/yql/essentials/tests/postgresql/original/cases/select_distinct.out
new file mode 100644
index 0000000000..11c6f50fbf
--- /dev/null
+++ b/yql/essentials/tests/postgresql/original/cases/select_distinct.out
@@ -0,0 +1,308 @@
+--
+-- SELECT_DISTINCT
+--
+--
+-- awk '{print $3;}' onek.data | sort -n | uniq
+--
+SELECT DISTINCT two FROM tmp ORDER BY 1;
+ two 
+-----
+   0
+   1
+(2 rows)
+
+--
+-- awk '{print $5;}' onek.data | sort -n | uniq
+--
+SELECT DISTINCT ten FROM tmp ORDER BY 1;
+ ten 
+-----
+   0
+   1
+   2
+   3
+   4
+   5
+   6
+   7
+   8
+   9
+(10 rows)
+
+--
+-- awk '{print $16;}' onek.data | sort -d | uniq
+--
+SELECT DISTINCT string4 FROM tmp ORDER BY 1;
+ string4 
+---------
+ AAAAxx
+ HHHHxx
+ OOOOxx
+ VVVVxx
+(4 rows)
+
+--
+-- awk '{print $3,$16,$5;}' onek.data | sort -d | uniq |
+-- sort +0n -1 +1d -2 +2n -3
+--
+SELECT DISTINCT two, string4, ten
+   FROM tmp
+   ORDER BY two using <, string4 using <, ten using <;
+ two | string4 | ten 
+-----+---------+-----
+   0 | AAAAxx  |   0
+   0 | AAAAxx  |   2
+   0 | AAAAxx  |   4
+   0 | AAAAxx  |   6
+   0 | AAAAxx  |   8
+   0 | HHHHxx  |   0
+   0 | HHHHxx  |   2
+   0 | HHHHxx  |   4
+   0 | HHHHxx  |   6
+   0 | HHHHxx  |   8
+   0 | OOOOxx  |   0
+   0 | OOOOxx  |   2
+   0 | OOOOxx  |   4
+   0 | OOOOxx  |   6
+   0 | OOOOxx  |   8
+   0 | VVVVxx  |   0
+   0 | VVVVxx  |   2
+   0 | VVVVxx  |   4
+   0 | VVVVxx  |   6
+   0 | VVVVxx  |   8
+   1 | AAAAxx  |   1
+   1 | AAAAxx  |   3
+   1 | AAAAxx  |   5
+   1 | AAAAxx  |   7
+   1 | AAAAxx  |   9
+   1 | HHHHxx  |   1
+   1 | HHHHxx  |   3
+   1 | HHHHxx  |   5
+   1 | HHHHxx  |   7
+   1 | HHHHxx  |   9
+   1 | OOOOxx  |   1
+   1 | OOOOxx  |   3
+   1 | OOOOxx  |   5
+   1 | OOOOxx  |   7
+   1 | OOOOxx  |   9
+   1 | VVVVxx  |   1
+   1 | VVVVxx  |   3
+   1 | VVVVxx  |   5
+   1 | VVVVxx  |   7
+   1 | VVVVxx  |   9
+(40 rows)
+
+--
+-- awk '{print $2;}' person.data |
+-- awk '{if(NF!=1){print $2;}else{print;}}' - emp.data |
+-- awk '{if(NF!=1){print $2;}else{print;}}' - student.data |
+-- awk 'BEGIN{FS="      ";}{if(NF!=1){print $5;}else{print;}}' - stud_emp.data |
+-- sort -n -r | uniq
+--
+SELECT DISTINCT p.age FROM person* p ORDER BY age using >;
+ age 
+-----
+  98
+  88
+  78
+  68
+  60
+  58
+  50
+  48
+  40
+  38
+  34
+  30
+  28
+  25
+  24
+  23
+  20
+  19
+  18
+   8
+(20 rows)
+
+--
+-- Check mentioning same column more than once
+--
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT count(*) FROM
+  (SELECT DISTINCT two, four, two FROM tenk1) ss;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Aggregate
+   Output: count(*)
+   ->  HashAggregate
+         Output: tenk1.two, tenk1.four, tenk1.two
+         Group Key: tenk1.two, tenk1.four, tenk1.two
+         ->  Seq Scan on public.tenk1
+               Output: tenk1.two, tenk1.four, tenk1.two
+(7 rows)
+
+SELECT count(*) FROM
+  (SELECT DISTINCT two, four, two FROM tenk1) ss;
+ count 
+-------
+     4
+(1 row)
+
+--
+-- Compare results between plans using sorting and plans using hash
+-- aggregation. Force spilling in both cases by setting work_mem low.
+--
+SET work_mem='64kB';
+-- Produce results with sorting.
+SET enable_hashagg=FALSE;
+SET jit_above_cost=0;
+EXPLAIN (costs off)
+SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
+                   QUERY PLAN                   
+------------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: ((g % 1000))
+         ->  Function Scan on generate_series g
+(4 rows)
+
+CREATE TABLE distinct_group_1 AS
+SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
+SET jit_above_cost TO DEFAULT;
+CREATE TABLE distinct_group_2 AS
+SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
+SET enable_hashagg=TRUE;
+-- Produce results with hash aggregation.
+SET enable_sort=FALSE;
+SET jit_above_cost=0;
+EXPLAIN (costs off)
+SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
+                QUERY PLAN                
+------------------------------------------
+ HashAggregate
+   Group Key: (g % 1000)
+   ->  Function Scan on generate_series g
+(3 rows)
+
+CREATE TABLE distinct_hash_1 AS
+SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
+SET jit_above_cost TO DEFAULT;
+CREATE TABLE distinct_hash_2 AS
+SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
+SET enable_sort=TRUE;
+SET work_mem TO DEFAULT;
+-- Compare results
+(SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1)
+  UNION ALL
+(SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1);
+ ?column? 
+----------
+(0 rows)
+
+(SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1)
+  UNION ALL
+(SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1);
+ ?column? 
+----------
+(0 rows)
+
+DROP TABLE distinct_hash_1;
+DROP TABLE distinct_hash_2;
+DROP TABLE distinct_group_1;
+DROP TABLE distinct_group_2;
+--
+-- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
+-- very own regression file.
+--
+CREATE TEMP TABLE disttable (f1 integer);
+INSERT INTO DISTTABLE VALUES(1);
+INSERT INTO DISTTABLE VALUES(2);
+INSERT INTO DISTTABLE VALUES(3);
+INSERT INTO DISTTABLE VALUES(NULL);
+-- basic cases
+SELECT f1, f1 IS DISTINCT FROM 2 as "not 2" FROM disttable;
+ f1 | not 2 
+----+-------
+  1 | t
+  2 | f
+  3 | t
+    | t
+(4 rows)
+
+SELECT f1, f1 IS DISTINCT FROM NULL as "not null" FROM disttable;
+ f1 | not null 
+----+----------
+  1 | t
+  2 | t
+  3 | t
+    | f
+(4 rows)
+
+SELECT f1, f1 IS DISTINCT FROM f1 as "false" FROM disttable;
+ f1 | false 
+----+-------
+  1 | f
+  2 | f
+  3 | f
+    | f
+(4 rows)
+
+SELECT f1, f1 IS DISTINCT FROM f1+1 as "not null" FROM disttable;
+ f1 | not null 
+----+----------
+  1 | t
+  2 | t
+  3 | t
+    | f
+(4 rows)
+
+-- check that optimizer constant-folds it properly
+SELECT 1 IS DISTINCT FROM 2 as "yes";
+ yes 
+-----
+ t
+(1 row)
+
+SELECT 2 IS DISTINCT FROM 2 as "no";
+ no 
+----
+ f
+(1 row)
+
+SELECT 2 IS DISTINCT FROM null as "yes";
+ yes 
+-----
+ t
+(1 row)
+
+SELECT null IS DISTINCT FROM null as "no";
+ no 
+----
+ f
+(1 row)
+
+-- negated form
+SELECT 1 IS NOT DISTINCT FROM 2 as "no";
+ no 
+----
+ f
+(1 row)
+
+SELECT 2 IS NOT DISTINCT FROM 2 as "yes";
+ yes 
+-----
+ t
+(1 row)
+
+SELECT 2 IS NOT DISTINCT FROM null as "no";
+ no 
+----
+ f
+(1 row)
+
+SELECT null IS NOT DISTINCT FROM null as "yes";
+ yes 
+-----
+ t
+(1 row)
+
author	Maxim Yurchuk <maxim-yurchuk@ydb.tech>	2024-11-20 17:37:57 +0000
committer	GitHub <noreply@github.com>	2024-11-20 17:37:57 +0000
commit	f76323e9b295c15751e51e3443aa47a36bee8023 (patch)
tree	4113c8cad473a33e0f746966e0cf087252fa1d7a /yql/essentials/tests/postgresql/original/cases/select_distinct.out
parent	753ecb8d410a4cb459c26f3a0082fb2d1724fe63 (diff)
parent	a7b9a6afea2a9d7a7bfac4c5eb4c1a8e60adb9e6 (diff)
download	ydb-f76323e9b295c15751e51e3443aa47a36bee8023.tar.gz