diff options
author | Maxim Yurchuk <maxim-yurchuk@ydb.tech> | 2024-11-20 17:37:57 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-20 17:37:57 +0000 |
commit | f76323e9b295c15751e51e3443aa47a36bee8023 (patch) | |
tree | 4113c8cad473a33e0f746966e0cf087252fa1d7a /yql/essentials/tests/sql/suites/sampling | |
parent | 753ecb8d410a4cb459c26f3a0082fb2d1724fe63 (diff) | |
parent | a7b9a6afea2a9d7a7bfac4c5eb4c1a8e60adb9e6 (diff) | |
download | ydb-f76323e9b295c15751e51e3443aa47a36bee8023.tar.gz |
Merge pull request #11788 from ydb-platform/mergelibs-241120-1113
Library import 241120-1113
Diffstat (limited to 'yql/essentials/tests/sql/suites/sampling')
60 files changed, 462 insertions, 0 deletions
diff --git a/yql/essentials/tests/sql/suites/sampling/bind_default.sql b/yql/essentials/tests/sql/suites/sampling/bind_default.sql new file mode 100644 index 0000000000..c0f98c9dcc --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/bind_default.sql @@ -0,0 +1,8 @@ +/* syntax version 1 */ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 10 */ +USE plato; + +$var = SELECT * FROM Input; + +SELECT * FROM $var TABLESAMPLE BERNOULLI(50); diff --git a/yql/essentials/tests/sql/suites/sampling/bind_expr.sql b/yql/essentials/tests/sql/suites/sampling/bind_expr.sql new file mode 100644 index 0000000000..e0820679f7 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/bind_expr.sql @@ -0,0 +1,10 @@ +/* syntax version 1 */ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 10 */ +USE plato; + +$count = SELECT COUNT(*) FROM Input; -- $count = 10 + +$var = SELECT * FROM Input; + +SELECT * FROM $var TABLESAMPLE BERNOULLI(5 * $count); diff --git a/yql/essentials/tests/sql/suites/sampling/bind_expr_subquery.sql b/yql/essentials/tests/sql/suites/sampling/bind_expr_subquery.sql new file mode 100644 index 0000000000..a734691f70 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/bind_expr_subquery.sql @@ -0,0 +1,7 @@ +/* postgres can not */ +USE plato; + +$rc = (select count(*) from Input); +$sample_size = 10; + +select * from Input tablesample bernoulli(MIN_OF($sample_size * 100.0 / $rc, 100.0)) order by key; diff --git a/yql/essentials/tests/sql/suites/sampling/bind_expr_udf.cfg b/yql/essentials/tests/sql/suites/sampling/bind_expr_udf.cfg new file mode 100644 index 0000000000..7b2051c1bb --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/bind_expr_udf.cfg @@ -0,0 +1,3 @@ +in Input input.txt +udf math_udf +res result.txt diff --git a/yql/essentials/tests/sql/suites/sampling/bind_expr_udf.sql b/yql/essentials/tests/sql/suites/sampling/bind_expr_udf.sql new file mode 100644 index 0000000000..4a5bbf945b --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/bind_expr_udf.sql @@ -0,0 +1,7 @@ +/* syntax version 1 */ +/* postgres can not */ +USE plato; + +$percent = Math::Ceil(0.2); + +SELECT * FROM Input TABLESAMPLE BERNOULLI(Math::Ceil(100 * $percent)) ORDER BY key; -- 100% sample diff --git a/yql/essentials/tests/sql/suites/sampling/bind_join_left.sql b/yql/essentials/tests/sql/suites/sampling/bind_join_left.sql new file mode 100644 index 0000000000..8e1a9d863f --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/bind_join_left.sql @@ -0,0 +1,11 @@ +/* syntax version 1 */ +/* postgres can not */ +/* hybridfile can not YQL-17764 */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 3 */ + +use plato; +pragma DisableSimpleColumns; + +$a = select * from Input where key > "199" and value != "bbb"; + +select * from (select a.value, b.value from $a as a inner join Input as b using(subkey)) tablesample bernoulli(25); diff --git a/yql/essentials/tests/sql/suites/sampling/bind_join_right.sql b/yql/essentials/tests/sql/suites/sampling/bind_join_right.sql new file mode 100644 index 0000000000..b044495e84 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/bind_join_right.sql @@ -0,0 +1,11 @@ +/* syntax version 1 */ +/* postgres can not */ +/* hybridfile can not YQL-17764 */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 3 */ + +use plato; +pragma DisableSimpleColumns; + +$a = select * from Input where key > "199" and value != "bbb"; + +select * from (select a.value from Input as a inner join $a as b using(subkey)) tablesample bernoulli(40); diff --git a/yql/essentials/tests/sql/suites/sampling/bind_multiple_sample.sql b/yql/essentials/tests/sql/suites/sampling/bind_multiple_sample.sql new file mode 100644 index 0000000000..db9f9cfa16 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/bind_multiple_sample.sql @@ -0,0 +1,16 @@ +/* syntax version 1 */ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 20 */ +USE plato; + +$var = SELECT * FROM Input; + +INSERT INTO @tmp +SELECT * FROM $var TABLESAMPLE BERNOULLI(100); + +INSERT INTO @tmp +SELECT * FROM $var TABLESAMPLE BERNOULLI(50); + +COMMIT; + +SELECT * FROM @tmp; diff --git a/yql/essentials/tests/sql/suites/sampling/bind_small_rate.sql b/yql/essentials/tests/sql/suites/sampling/bind_small_rate.sql new file mode 100644 index 0000000000..388857feb3 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/bind_small_rate.sql @@ -0,0 +1,7 @@ +/* syntax version 1 */ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 2 */ + +$a = select * from plato.Input; + +select * from $a tablesample bernoulli(0.1) diff --git a/yql/essentials/tests/sql/suites/sampling/bind_topsort.sql b/yql/essentials/tests/sql/suites/sampling/bind_topsort.sql new file mode 100644 index 0000000000..781dd8d100 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/bind_topsort.sql @@ -0,0 +1,7 @@ +/* syntax version 1 */ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 10 */ + +$var = SELECT key, value FROM plato.Input; + +SELECT * FROM $var TABLESAMPLE BERNOULLI(10) ORDER BY key ASC, value LIMIT 10; diff --git a/yql/essentials/tests/sql/suites/sampling/default.cfg b/yql/essentials/tests/sql/suites/sampling/default.cfg new file mode 100644 index 0000000000..6f99339756 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/default.cfg @@ -0,0 +1,2 @@ +in Input input.txt +res result.txt diff --git a/yql/essentials/tests/sql/suites/sampling/direct_read-dynamic.cfg b/yql/essentials/tests/sql/suites/sampling/direct_read-dynamic.cfg new file mode 100644 index 0000000000..1908d503d1 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/direct_read-dynamic.cfg @@ -0,0 +1,2 @@ +in Input dynamic.txt +res result.txt diff --git a/yql/essentials/tests/sql/suites/sampling/direct_read.cfg b/yql/essentials/tests/sql/suites/sampling/direct_read.cfg new file mode 100644 index 0000000000..6f99339756 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/direct_read.cfg @@ -0,0 +1,2 @@ +in Input input.txt +res result.txt diff --git a/yql/essentials/tests/sql/suites/sampling/direct_read.sql b/yql/essentials/tests/sql/suites/sampling/direct_read.sql new file mode 100644 index 0000000000..5cfebc5a9f --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/direct_read.sql @@ -0,0 +1,4 @@ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 10 */ +pragma direct_read; +SELECT * FROM plato.Input TABLESAMPLE BERNOULLI(30) REPEATABLE(1); diff --git a/yql/essentials/tests/sql/suites/sampling/dynamic.txt b/yql/essentials/tests/sql/suites/sampling/dynamic.txt new file mode 100644 index 0000000000..b214aab0d9 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/dynamic.txt @@ -0,0 +1,10 @@ +{"key"="023";"subkey"="3";"value"="aaa"}; +{"key"="037";"subkey"="5";"value"="ddd"}; +{"key"="075";"subkey"="1";"value"="abc"}; +{"key"="150";"subkey"="1";"value"="aaa"}; +{"key"="150";"subkey"="3";"value"="iii"}; +{"key"="150";"subkey"="8";"value"="zzz"}; +{"key"="200";"subkey"="7";"value"="qqq"}; +{"key"="527";"subkey"="4";"value"="bbb"}; +{"key"="761";"subkey"="6";"value"="ccc"}; +{"key"="911";"subkey"="2";"value"="kkk"}; diff --git a/yql/essentials/tests/sql/suites/sampling/dynamic.txt.attr b/yql/essentials/tests/sql/suites/sampling/dynamic.txt.attr new file mode 100644 index 0000000000..9e3c33c4cb --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/dynamic.txt.attr @@ -0,0 +1,33 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key"; ["DataType"; "String"]]; + ["subkey"; ["DataType"; "String"]]; + ["value"; ["DataType"; "String"]]; + ]]; + "SortMembers"=["key"; "subkey"]; + "SortedBy"=["key"; "subkey"]; + "SortDirections"=[1; 1]; + "SortedByTypes"=[["DataType";"String"]; ["DataType";"String"]] + }; + "_yql_dynamic"=%true; + "schema"=< + "strict" = %true; + "unique_keys" = %true + >[ + { + "name" = "key"; + "type" = "string"; + "sort_order" = "ascending"; + }; + { + "name" = "subkey"; + "type" = "string"; + "sort_order" = "ascending"; + }; + { + "name" = "value"; + "type" = "string"; + }; + ] +}
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/sampling/input.txt b/yql/essentials/tests/sql/suites/sampling/input.txt new file mode 100644 index 0000000000..9e2c7c8718 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/input.txt @@ -0,0 +1,10 @@ +{"key"="023";"subkey"="3";"value"="aaa"}; +{"key"="037";"subkey"="5";"value"="ddd"}; +{"key"="075";"subkey"="1";"value"="abc"}; +{"key"="150";"subkey"="1";"value"="aaa"}; +{"key"="160";"subkey"="3";"value"="iii"}; +{"key"="170";"subkey"="8";"value"="zzz"}; +{"key"="200";"subkey"="7";"value"="qqq"}; +{"key"="527";"subkey"="4";"value"="bbb"}; +{"key"="761";"subkey"="6";"value"="ccc"}; +{"key"="911";"subkey"="2";"value"="kkk"}; diff --git a/yql/essentials/tests/sql/suites/sampling/input1.txt b/yql/essentials/tests/sql/suites/sampling/input1.txt new file mode 100644 index 0000000000..2a8e728cae --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/input1.txt @@ -0,0 +1,4 @@ +{"key"="023";"subkey"="3";"value"="aaa"}; +{"key"="037";"subkey"="5";"value"="ddd"}; +{"key"="075";"subkey"="1";"value"="abc"}; +{"key"="150";"subkey"="1";"value"="aaa"}; diff --git a/yql/essentials/tests/sql/suites/sampling/insert.cfg b/yql/essentials/tests/sql/suites/sampling/insert.cfg new file mode 100644 index 0000000000..6f99339756 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/insert.cfg @@ -0,0 +1,2 @@ +in Input input.txt +res result.txt diff --git a/yql/essentials/tests/sql/suites/sampling/insert.sql b/yql/essentials/tests/sql/suites/sampling/insert.sql new file mode 100644 index 0000000000..0d67d7dbeb --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/insert.sql @@ -0,0 +1,10 @@ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 10 */ +USE plato; + +INSERT INTO @tmp +SELECT * FROM Input TABLESAMPLE BERNOULLI(30) REPEATABLE(1); + +COMMIT; + +SELECT * FROM @tmp; diff --git a/yql/essentials/tests/sql/suites/sampling/join_left_sample.sql b/yql/essentials/tests/sql/suites/sampling/join_left_sample.sql new file mode 100644 index 0000000000..a7a00d57df --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/join_left_sample.sql @@ -0,0 +1,9 @@ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 10 */ +/* ignore plan diff */ +use plato; +pragma DisableSimpleColumns; + +select * from plato.Input as a SAMPLE 0.3 +inner join plato.Input as b +on a.key = b.key; diff --git a/yql/essentials/tests/sql/suites/sampling/join_right_sample.sql b/yql/essentials/tests/sql/suites/sampling/join_right_sample.sql new file mode 100644 index 0000000000..bc61dc247e --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/join_right_sample.sql @@ -0,0 +1,9 @@ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 10 */ +/* syntax version 1 */ +use plato; +pragma DisableSimpleColumns; + +select * from plato.Input as a +inner join plato.Input as b SAMPLE 0.3 +on a.key = b.key; diff --git a/yql/essentials/tests/sql/suites/sampling/map-dynamic.cfg b/yql/essentials/tests/sql/suites/sampling/map-dynamic.cfg new file mode 100644 index 0000000000..1908d503d1 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/map-dynamic.cfg @@ -0,0 +1,2 @@ +in Input dynamic.txt +res result.txt diff --git a/yql/essentials/tests/sql/suites/sampling/map-keyfilter.cfg b/yql/essentials/tests/sql/suites/sampling/map-keyfilter.cfg new file mode 100644 index 0000000000..3f5bebb5a2 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/map-keyfilter.cfg @@ -0,0 +1,2 @@ +in Input sorted.txt +res result.txt diff --git a/yql/essentials/tests/sql/suites/sampling/map.cfg b/yql/essentials/tests/sql/suites/sampling/map.cfg new file mode 100644 index 0000000000..6f99339756 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/map.cfg @@ -0,0 +1,2 @@ +in Input input.txt +res result.txt diff --git a/yql/essentials/tests/sql/suites/sampling/map.sql b/yql/essentials/tests/sql/suites/sampling/map.sql new file mode 100644 index 0000000000..cb2ddc5ff4 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/map.sql @@ -0,0 +1,3 @@ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 8 */ +SELECT * FROM plato.Input TABLESAMPLE BERNOULLI(30) REPEATABLE(1) WHERE subkey > "1"; diff --git a/yql/essentials/tests/sql/suites/sampling/mapjoin_left_sample.sql b/yql/essentials/tests/sql/suites/sampling/mapjoin_left_sample.sql new file mode 100644 index 0000000000..d2a55e6993 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/mapjoin_left_sample.sql @@ -0,0 +1,9 @@ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 10 */ +use plato; +pragma DisableSimpleColumns; +pragma yt.MapJoinLimit="1m"; + +select * from plato.Input as a SAMPLE 0.3 +inner join plato.Input as b +on a.key = b.key; diff --git a/yql/essentials/tests/sql/suites/sampling/mapjoin_right_sample.sql b/yql/essentials/tests/sql/suites/sampling/mapjoin_right_sample.sql new file mode 100644 index 0000000000..43f5dd455f --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/mapjoin_right_sample.sql @@ -0,0 +1,10 @@ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 10 */ +/* syntax version 1 */ +use plato; +pragma yt.MapJoinLimit="1m"; +pragma DisableSimpleColumns; + +select * from plato.Input as a +inner join plato.Input as b SAMPLE 0.3 +on a.key = b.key; diff --git a/yql/essentials/tests/sql/suites/sampling/orderedjoin_left_sample.sql b/yql/essentials/tests/sql/suites/sampling/orderedjoin_left_sample.sql new file mode 100644 index 0000000000..e38ecc0d0c --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/orderedjoin_left_sample.sql @@ -0,0 +1,11 @@ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 10 */ +/* ignore plan diff */ +/* syntax version 1 */ +use plato; +pragma yt.JoinMergeTablesLimit="2"; +pragma DisableSimpleColumns; + +select * from plato.Input as a SAMPLE 0.3 +inner join plato.Input as b +on a.key = b.key; diff --git a/yql/essentials/tests/sql/suites/sampling/orderedjoin_right_sample.sql b/yql/essentials/tests/sql/suites/sampling/orderedjoin_right_sample.sql new file mode 100644 index 0000000000..001e283204 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/orderedjoin_right_sample.sql @@ -0,0 +1,11 @@ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 10 */ +/* ignore plan diff */ +/* syntax version 1 */ +use plato; +pragma yt.JoinMergeTablesLimit="2"; +pragma DisableSimpleColumns; + +select * from plato.Input as a +inner join plato.Input as b SAMPLE 0.3 +on a.key = b.key; diff --git a/yql/essentials/tests/sql/suites/sampling/read-dynamic.cfg b/yql/essentials/tests/sql/suites/sampling/read-dynamic.cfg new file mode 100644 index 0000000000..1908d503d1 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/read-dynamic.cfg @@ -0,0 +1,2 @@ +in Input dynamic.txt +res result.txt diff --git a/yql/essentials/tests/sql/suites/sampling/read.cfg b/yql/essentials/tests/sql/suites/sampling/read.cfg new file mode 100644 index 0000000000..6f99339756 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/read.cfg @@ -0,0 +1,2 @@ +in Input input.txt +res result.txt diff --git a/yql/essentials/tests/sql/suites/sampling/read.sql b/yql/essentials/tests/sql/suites/sampling/read.sql new file mode 100644 index 0000000000..1fe51fd9e7 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/read.sql @@ -0,0 +1,3 @@ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 10 */ +SELECT * FROM plato.Input TABLESAMPLE BERNOULLI(30) REPEATABLE(1); diff --git a/yql/essentials/tests/sql/suites/sampling/reduce-with_premap.cfg b/yql/essentials/tests/sql/suites/sampling/reduce-with_premap.cfg new file mode 100644 index 0000000000..c85115a8e4 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/reduce-with_premap.cfg @@ -0,0 +1,2 @@ +in Input sorted_desc.txt +res result.txt diff --git a/yql/essentials/tests/sql/suites/sampling/reduce.cfg b/yql/essentials/tests/sql/suites/sampling/reduce.cfg new file mode 100644 index 0000000000..6f99339756 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/reduce.cfg @@ -0,0 +1,2 @@ +in Input input.txt +res result.txt diff --git a/yql/essentials/tests/sql/suites/sampling/reduce.sql b/yql/essentials/tests/sql/suites/sampling/reduce.sql new file mode 100644 index 0000000000..0f72b64380 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/reduce.sql @@ -0,0 +1,11 @@ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 8 */ +USE plato; + +$udf = YQL::@@(lambda '(key stream) (AsStruct + '('key key) '('summ (Collect (Condense stream (Uint32 '0) (lambda '(item state) (Bool 'False)) (lambda '(item state) (Add state item))))) +))@@; + +$res = (REDUCE Input TABLESAMPLE BERNOULLI(30) REPEATABLE(1) ON key USING $udf(cast(value as uint32) ?? 0)); + +select * from $res order by key; diff --git a/yql/essentials/tests/sql/suites/sampling/reduce_with_presort.sql b/yql/essentials/tests/sql/suites/sampling/reduce_with_presort.sql new file mode 100644 index 0000000000..cfdb444c16 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/reduce_with_presort.sql @@ -0,0 +1,11 @@ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 8 */ +USE plato; + +$udf = YQL::@@(lambda '(key stream) (AsStruct + '('key key) '('summ (Collect (Condense stream (Uint32 '0) (lambda '(item state) (Bool 'False)) (lambda '(item state) (Add state item))))) +))@@; + +$res = (REDUCE Input TABLESAMPLE BERNOULLI(30) REPEATABLE(1) PRESORT key || subkey ON key USING $udf(cast(value as uint32) ?? 0)); + +select * from $res order by key; diff --git a/yql/essentials/tests/sql/suites/sampling/sample.sql b/yql/essentials/tests/sql/suites/sampling/sample.sql new file mode 100644 index 0000000000..6dd6d7ee31 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/sample.sql @@ -0,0 +1,5 @@ +USE plato; + +SELECT * FROM (SELECT* from Input) +SAMPLE(0.5) +ORDER BY key; diff --git a/yql/essentials/tests/sql/suites/sampling/sort.sql b/yql/essentials/tests/sql/suites/sampling/sort.sql new file mode 100644 index 0000000000..98ea792894 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/sort.sql @@ -0,0 +1,3 @@ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 10 */ +SELECT * FROM plato.Input TABLESAMPLE BERNOULLI(30) REPEATABLE(1) ORDER BY key;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/sampling/sorted.txt b/yql/essentials/tests/sql/suites/sampling/sorted.txt new file mode 100644 index 0000000000..ebb866c3d6 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/sorted.txt @@ -0,0 +1,10 @@ +{"key"="075";"subkey"="1";"value"="abc"}; +{"key"="150";"subkey"="1";"value"="aaa"}; +{"key"="911";"subkey"="2";"value"="kkk"}; +{"key"="023";"subkey"="3";"value"="aaa"}; +{"key"="150";"subkey"="3";"value"="iii"}; +{"key"="527";"subkey"="4";"value"="bbb"}; +{"key"="037";"subkey"="5";"value"="ddd"}; +{"key"="761";"subkey"="6";"value"="ccc"}; +{"key"="200";"subkey"="7";"value"="qqq"}; +{"key"="150";"subkey"="8";"value"="zzz"}; diff --git a/yql/essentials/tests/sql/suites/sampling/sorted.txt.attr b/yql/essentials/tests/sql/suites/sampling/sorted.txt.attr new file mode 100644 index 0000000000..1d2e02ee20 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/sorted.txt.attr @@ -0,0 +1,13 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key"; ["DataType"; "String"]]; + ["subkey"; ["DataType"; "String"]]; + ["value"; ["DataType"; "String"]]; + ]]; + "SortMembers"=["subkey"]; + "SortedBy"=["subkey"]; + "SortDirections"=[1]; + "SortedByTypes"=[["DataType";"String"]] + }; +} diff --git a/yql/essentials/tests/sql/suites/sampling/sorted_desc.txt b/yql/essentials/tests/sql/suites/sampling/sorted_desc.txt new file mode 100644 index 0000000000..235fc7cd49 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/sorted_desc.txt @@ -0,0 +1,10 @@ +{"_yql_column_0"="\xE0\xC6\xCE\xCE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="911";"subkey"="2";"value"="kkk"}; +{"_yql_column_0"="\xE0\xC8\xC9\xCE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="761";"subkey"="6";"value"="ccc"}; +{"_yql_column_0"="\xE0\xCA\xCD\xC8\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="527";"subkey"="4";"value"="bbb"}; +{"_yql_column_0"="\xE0\xCD\xCF\xCF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="200";"subkey"="7";"value"="qqq"}; +{"_yql_column_0"="\xE0\xCE\xCA\xCF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="150";"subkey"="1";"value"="aaa"}; +{"_yql_column_0"="\xE0\xCE\xCA\xCF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="150";"subkey"="3";"value"="iii"}; +{"_yql_column_0"="\xE0\xCE\xCA\xCF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="150";"subkey"="8";"value"="zzz"}; +{"_yql_column_0"="\xE0\xCF\xC8\xCA\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="075";"subkey"="1";"value"="abc"}; +{"_yql_column_0"="\xE0\xCF\xCC\xC8\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="037";"subkey"="5";"value"="ddd"}; +{"_yql_column_0"="\xE0\xCF\xCD\xCC\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="023";"subkey"="3";"value"="aaa"}; diff --git a/yql/essentials/tests/sql/suites/sampling/sorted_desc.txt.attr b/yql/essentials/tests/sql/suites/sampling/sorted_desc.txt.attr new file mode 100644 index 0000000000..5a6ca5761e --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/sorted_desc.txt.attr @@ -0,0 +1,47 @@ +{ + "_yql_row_spec" = { + "SortMembers" = [ + "key" + ]; + "SortDirections" = [ + 0 + ]; + "UniqueKeys" = %false; + "SortedByTypes" = [ + [ + "DataType"; + "String" + ] + ]; + "StrictSchema" = %true; + "Type" = [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "subkey"; + [ + "DataType"; + "String" + ] + ]; + [ + "value"; + [ + "DataType"; + "String" + ] + ] + ] + ]; + "SortedBy" = [ + "_yql_column_0" + ] + } +}
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/sampling/subquery_default.sql b/yql/essentials/tests/sql/suites/sampling/subquery_default.sql new file mode 100644 index 0000000000..60e9adf0f5 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/subquery_default.sql @@ -0,0 +1,5 @@ +/* syntax version 1 */ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 10 */ + +SELECT * FROM (SELECT * FROM plato.Input) TABLESAMPLE BERNOULLI(50); diff --git a/yql/essentials/tests/sql/suites/sampling/subquery_expr.sql b/yql/essentials/tests/sql/suites/sampling/subquery_expr.sql new file mode 100644 index 0000000000..e9c339b728 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/subquery_expr.sql @@ -0,0 +1,7 @@ +/* syntax version 1 */ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 10 */ + +$count = SELECT COUNT(*) FROM plato.Input; -- $count = 10 + +SELECT * FROM (SELECT * FROM plato.Input) TABLESAMPLE BERNOULLI(5 * $count); diff --git a/yql/essentials/tests/sql/suites/sampling/subquery_filter.sql b/yql/essentials/tests/sql/suites/sampling/subquery_filter.sql new file mode 100644 index 0000000000..54c30b5787 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/subquery_filter.sql @@ -0,0 +1,5 @@ +/* syntax version 1 */ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 8 */ + +select * from (select key from plato.Input where subkey != "1") tablesample bernoulli(44) where key > "50"; diff --git a/yql/essentials/tests/sql/suites/sampling/subquery_limit.sql b/yql/essentials/tests/sql/suites/sampling/subquery_limit.sql new file mode 100644 index 0000000000..9d6d945b11 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/subquery_limit.sql @@ -0,0 +1,5 @@ +/* syntax version 1 */ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 6 */ + +select * from (select * from plato.Input) tablesample bernoulli(80) limit 5; diff --git a/yql/essentials/tests/sql/suites/sampling/subquery_mapjoin.sql b/yql/essentials/tests/sql/suites/sampling/subquery_mapjoin.sql new file mode 100644 index 0000000000..bd7c425a8e --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/subquery_mapjoin.sql @@ -0,0 +1,14 @@ +/* syntax version 1 */ +/* postgres can not */ +/* hybridfile can not YQL-17764 */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 10 */ +use plato; + +pragma DisableSimpleColumns; +pragma yt.MapJoinLimit="1m"; + +select * from( +select * from plato.Input as a +inner join plato.Input as b +on a.key = b.key +) tablesample bernoulli(30); diff --git a/yql/essentials/tests/sql/suites/sampling/subquery_multiple_sample.sql b/yql/essentials/tests/sql/suites/sampling/subquery_multiple_sample.sql new file mode 100644 index 0000000000..bc46b5a3eb --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/subquery_multiple_sample.sql @@ -0,0 +1,5 @@ +/* syntax version 1 */ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 10 */ + +SELECT * FROM (SELECT * FROM plato.Input TABLESAMPLE BERNOULLI(50)) TABLESAMPLE BERNOULLI(30); diff --git a/yql/essentials/tests/sql/suites/sampling/subquery_sort.sql b/yql/essentials/tests/sql/suites/sampling/subquery_sort.sql new file mode 100644 index 0000000000..d322a3822f --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/subquery_sort.sql @@ -0,0 +1,6 @@ +/* syntax version 1 */ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 10 */ + +select key from (select key, value from plato.Input) tablesample bernoulli(33) order by key; + diff --git a/yql/essentials/tests/sql/suites/sampling/system_sampling-io_block_size.cfg b/yql/essentials/tests/sql/suites/sampling/system_sampling-io_block_size.cfg new file mode 100644 index 0000000000..7544cd7111 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/system_sampling-io_block_size.cfg @@ -0,0 +1,5 @@ +in Input input.txt +out Output output.txt +res result.txt +pragma yt.SamplingIoBlockSize="32M" +providers yt diff --git a/yql/essentials/tests/sql/suites/sampling/system_sampling.cfg b/yql/essentials/tests/sql/suites/sampling/system_sampling.cfg new file mode 100644 index 0000000000..5d963ebb21 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/system_sampling.cfg @@ -0,0 +1,4 @@ +in Input input.txt +out Output output.txt +res result.txt +providers yt diff --git a/yql/essentials/tests/sql/suites/sampling/system_sampling.sql b/yql/essentials/tests/sql/suites/sampling/system_sampling.sql new file mode 100644 index 0000000000..edc2038653 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/system_sampling.sql @@ -0,0 +1,12 @@ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 100 */ +/* syntax version 1 */ +USE plato; + +EVALUATE FOR $_i IN ListFromRange(0, 10) DO BEGIN + INSERT INTO Output + SELECT * FROM Input; + COMMIT; +END DO; + +SELECT * FROM Output TABLESAMPLE SYSTEM(10); diff --git a/yql/essentials/tests/sql/suites/sampling/table_content.cfg b/yql/essentials/tests/sql/suites/sampling/table_content.cfg new file mode 100644 index 0000000000..6f99339756 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/table_content.cfg @@ -0,0 +1,2 @@ +in Input input.txt +res result.txt diff --git a/yql/essentials/tests/sql/suites/sampling/table_content.sql b/yql/essentials/tests/sql/suites/sampling/table_content.sql new file mode 100644 index 0000000000..b0aff15615 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/table_content.sql @@ -0,0 +1,7 @@ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) == 1 */ +use plato; + +$key = (select key from plato.Input SAMPLE(0.5)); + +select * from Input where key = $key; diff --git a/yql/essentials/tests/sql/suites/sampling/take_with_sampling.sql b/yql/essentials/tests/sql/suites/sampling/take_with_sampling.sql new file mode 100644 index 0000000000..123aabdbb0 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/take_with_sampling.sql @@ -0,0 +1,3 @@ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 10 */ +SELECT * FROM plato.Input TABLESAMPLE BERNOULLI(30) REPEATABLE(1) LIMIT 10;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/sampling/topsort.sql b/yql/essentials/tests/sql/suites/sampling/topsort.sql new file mode 100644 index 0000000000..1acbb70faa --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/topsort.sql @@ -0,0 +1,3 @@ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 10 */ +SELECT * FROM plato.Input TABLESAMPLE BERNOULLI(30) REPEATABLE(1) ORDER BY key LIMIT 10;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/sampling/yql-14664_deps.sql b/yql/essentials/tests/sql/suites/sampling/yql-14664_deps.sql new file mode 100644 index 0000000000..1ce0ee9463 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/yql-14664_deps.sql @@ -0,0 +1,20 @@ +/* postgres can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 10 */ +USE plato; + +INSERT INTO @a +SELECT + * +FROM Input +WHERE key > "020"; + +COMMIT; + +SELECT + * +FROM ( + SELECT + * + FROM @a + TABLESAMPLE BERNOULLI(50.0) REPEATABLE(1) +) LIMIT 10 diff --git a/yql/essentials/tests/sql/suites/sampling/zero_percentage.cfg b/yql/essentials/tests/sql/suites/sampling/zero_percentage.cfg new file mode 100644 index 0000000000..3f5bebb5a2 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/zero_percentage.cfg @@ -0,0 +1,2 @@ +in Input sorted.txt +res result.txt diff --git a/yql/essentials/tests/sql/suites/sampling/zero_percentage.sql b/yql/essentials/tests/sql/suites/sampling/zero_percentage.sql new file mode 100644 index 0000000000..43693ecd77 --- /dev/null +++ b/yql/essentials/tests/sql/suites/sampling/zero_percentage.sql @@ -0,0 +1,2 @@ +/* postgres can not */ +SELECT * FROM plato.Input TABLESAMPLE BERNOULLI(0) ORDER BY subkey; |