diff options
author | Maxim Yurchuk <maxim-yurchuk@ydb.tech> | 2024-11-20 17:37:57 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-20 17:37:57 +0000 |
commit | f76323e9b295c15751e51e3443aa47a36bee8023 (patch) | |
tree | 4113c8cad473a33e0f746966e0cf087252fa1d7a /yql/essentials/tests/sql/suites/hor_join | |
parent | 753ecb8d410a4cb459c26f3a0082fb2d1724fe63 (diff) | |
parent | a7b9a6afea2a9d7a7bfac4c5eb4c1a8e60adb9e6 (diff) | |
download | ydb-f76323e9b295c15751e51e3443aa47a36bee8023.tar.gz |
Merge pull request #11788 from ydb-platform/mergelibs-241120-1113
Library import 241120-1113
Diffstat (limited to 'yql/essentials/tests/sql/suites/hor_join')
66 files changed, 755 insertions, 0 deletions
diff --git a/yql/essentials/tests/sql/suites/hor_join/default.cfg b/yql/essentials/tests/sql/suites/hor_join/default.cfg new file mode 100644 index 0000000000..58878f8945 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/default.cfg @@ -0,0 +1 @@ +in Input input.txt diff --git a/yql/essentials/tests/sql/suites/hor_join/double_input.sql b/yql/essentials/tests/sql/suites/hor_join/double_input.sql new file mode 100644 index 0000000000..33eb738f9d --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/double_input.sql @@ -0,0 +1,14 @@ +/* postgres can not */ +/* kikimr can not */ + +SELECT * FROM ( + SELECT key, value || "a" as value FROM plato.Input + union all + SELECT key, "1" as value from plato.Input + union all + SELECT key, "1" as value from plato.Input + union all + SELECT key, "3" as value from plato.Input +) AS x +ORDER BY key, value +; diff --git a/yql/essentials/tests/sql/suites/hor_join/empty_out_hor_join.sql b/yql/essentials/tests/sql/suites/hor_join/empty_out_hor_join.sql new file mode 100644 index 0000000000..347407a3be --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/empty_out_hor_join.sql @@ -0,0 +1,8 @@ +/* postgres can not */ + +USE plato; + +$i = (select * from Input where key == "0" order by key limit 100); + +select key, some(value) from $i group by key; +select key, some(subkey) from $i group by key; diff --git a/yql/essentials/tests/sql/suites/hor_join/filters.cfg b/yql/essentials/tests/sql/suites/hor_join/filters.cfg new file mode 100644 index 0000000000..9b1926e035 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/filters.cfg @@ -0,0 +1,2 @@ +in Input1 input.txt +in Input2 input.txt diff --git a/yql/essentials/tests/sql/suites/hor_join/filters.sql b/yql/essentials/tests/sql/suites/hor_join/filters.sql new file mode 100644 index 0000000000..dfb4317e24 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/filters.sql @@ -0,0 +1,8 @@ +USE plato; + +SELECT * FROM ( + SELECT key, value FROM plato.Input1 where key > "010" + UNION ALL + SELECT key, value FROM plato.Input2 where key > "020" +) AS x ORDER BY key, value +; diff --git a/yql/essentials/tests/sql/suites/hor_join/fuse_multi_outs1-outlimit.cfg b/yql/essentials/tests/sql/suites/hor_join/fuse_multi_outs1-outlimit.cfg new file mode 100644 index 0000000000..35111685c0 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/fuse_multi_outs1-outlimit.cfg @@ -0,0 +1,3 @@ +in Input input.txt +providers yt +pragma yt.MaxOutputTables="2" diff --git a/yql/essentials/tests/sql/suites/hor_join/fuse_multi_outs1.cfg b/yql/essentials/tests/sql/suites/hor_join/fuse_multi_outs1.cfg new file mode 100644 index 0000000000..1f1d2747db --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/fuse_multi_outs1.cfg @@ -0,0 +1,2 @@ +in Input input.txt +providers yt diff --git a/yql/essentials/tests/sql/suites/hor_join/fuse_multi_outs1.sql b/yql/essentials/tests/sql/suites/hor_join/fuse_multi_outs1.sql new file mode 100644 index 0000000000..4846907f41 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/fuse_multi_outs1.sql @@ -0,0 +1,41 @@ +use plato; + +$udf = YQL::@@(lambda '(flow) +(Map flow + (lambda '(item) (block '( + (let varTuple (VariantType (TupleType + (StructType + '('key (DataType 'String)) + '('subkey (DataType 'String)) + '('value (DataType 'String)) + ) + (StructType + '('key (DataType 'String)) + '('subkey (DataType 'String)) + '('value (DataType 'String)) + ) + ))) + (let intValue (FromString (Member item 'key) 'Int32)) + (let res + (If (Coalesce (Equal (% intValue (Int32 '2)) (Int32 '0)) (Bool 'false)) + (Variant item '0 varTuple) + (Variant item '1 varTuple) + ) + ) + (return res) + ))) +))@@; + +$i, $j = (PROCESS Input USING $udf(TableRows())); + +select key, value from $i where key > "100" +order by key; + +insert into @a +select * from $j; + +insert into @b +select key from $i where key > "200"; + +insert into @c +select key from $j where key > "300"; diff --git a/yql/essentials/tests/sql/suites/hor_join/fuse_multi_outs2-outlimit.cfg b/yql/essentials/tests/sql/suites/hor_join/fuse_multi_outs2-outlimit.cfg new file mode 100644 index 0000000000..35111685c0 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/fuse_multi_outs2-outlimit.cfg @@ -0,0 +1,3 @@ +in Input input.txt +providers yt +pragma yt.MaxOutputTables="2" diff --git a/yql/essentials/tests/sql/suites/hor_join/fuse_multi_outs2.cfg b/yql/essentials/tests/sql/suites/hor_join/fuse_multi_outs2.cfg new file mode 100644 index 0000000000..1f1d2747db --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/fuse_multi_outs2.cfg @@ -0,0 +1,2 @@ +in Input input.txt +providers yt diff --git a/yql/essentials/tests/sql/suites/hor_join/fuse_multi_outs2.sql b/yql/essentials/tests/sql/suites/hor_join/fuse_multi_outs2.sql new file mode 100644 index 0000000000..02de4aae7b --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/fuse_multi_outs2.sql @@ -0,0 +1,38 @@ +use plato; + +$udf = YQL::@@(lambda '(flow) +(Map flow + (lambda '(item) (block '( + (let varTuple (VariantType (TupleType + (StructType + '('key (DataType 'String)) + '('subkey (DataType 'String)) + '('value (DataType 'String)) + ) + (StructType + '('key (DataType 'String)) + '('subkey (DataType 'String)) + '('value (DataType 'String)) + ) + ))) + (let intValue (FromString (Member item 'key) 'Int32)) + (let res + (If (Coalesce (Equal (% intValue (Int32 '2)) (Int32 '0)) (Bool 'false)) + (Variant item '0 varTuple) + (Variant item '1 varTuple) + ) + ) + (return res) + ))) +))@@; + +$i, $j = (PROCESS Input USING $udf(TableRows())); + +select key, value from $i where key > "100" +order by key; + +insert into @a +select * from $j; + +insert into @b +select key from $j where key > "200"; diff --git a/yql/essentials/tests/sql/suites/hor_join/fuse_multi_usage-outlimit.cfg b/yql/essentials/tests/sql/suites/hor_join/fuse_multi_usage-outlimit.cfg new file mode 100644 index 0000000000..35111685c0 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/fuse_multi_usage-outlimit.cfg @@ -0,0 +1,3 @@ +in Input input.txt +providers yt +pragma yt.MaxOutputTables="2" diff --git a/yql/essentials/tests/sql/suites/hor_join/fuse_multi_usage.cfg b/yql/essentials/tests/sql/suites/hor_join/fuse_multi_usage.cfg new file mode 100644 index 0000000000..1f1d2747db --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/fuse_multi_usage.cfg @@ -0,0 +1,2 @@ +in Input input.txt +providers yt diff --git a/yql/essentials/tests/sql/suites/hor_join/fuse_multi_usage.sql b/yql/essentials/tests/sql/suites/hor_join/fuse_multi_usage.sql new file mode 100644 index 0000000000..630c5285d0 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/fuse_multi_usage.sql @@ -0,0 +1,12 @@ +USE plato; + +$input = select key, some(subkey) as subkey, some(value) as value from Input group by key; + +select key from $input where subkey > "0" +order by key; + +insert into @a +select t.*, RandomNumber(TableRow()) as rnd from $input as t where value > "a"; + +insert into @b +select * from $input;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/hor_join/group_ranges.cfg b/yql/essentials/tests/sql/suites/hor_join/group_ranges.cfg new file mode 100644 index 0000000000..c9c5c39ac1 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/group_ranges.cfg @@ -0,0 +1,4 @@ +in Input1 input.txt +in Input2 input.txt +in Input3 input.txt +in Input4 input.txt diff --git a/yql/essentials/tests/sql/suites/hor_join/group_ranges.sql b/yql/essentials/tests/sql/suites/hor_join/group_ranges.sql new file mode 100644 index 0000000000..25d8e78cd4 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/group_ranges.sql @@ -0,0 +1,13 @@ +/* postgres can not */ +/* kikimr can not */ +USE plato; + +$i = (SELECT * FROM plato.range(``, Input1, Input4)); + +SELECT * FROM ( + SELECT 1 as key, subkey, value FROM $i + UNION ALL + SELECT 2 as key, subkey, value FROM $i +) AS x +ORDER BY key, subkey, value +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/hor_join/group_sampling.cfg b/yql/essentials/tests/sql/suites/hor_join/group_sampling.cfg new file mode 100644 index 0000000000..c9c5c39ac1 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/group_sampling.cfg @@ -0,0 +1,4 @@ +in Input1 input.txt +in Input2 input.txt +in Input3 input.txt +in Input4 input.txt diff --git a/yql/essentials/tests/sql/suites/hor_join/group_sampling.sql b/yql/essentials/tests/sql/suites/hor_join/group_sampling.sql new file mode 100644 index 0000000000..255d3b5811 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/group_sampling.sql @@ -0,0 +1,17 @@ +/* postgres can not */ +/* kikimr can not */ +/* hybridfile can not YQL-17284 */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 16 */ +USE plato; + +SELECT * FROM ( + SELECT CAST(key AS int) as key, '' as subkey, '' as value FROM plato.Input1 SAMPLE 0.1 + UNION ALL + SELECT CAST(key AS int) as key, subkey, '' as value FROM plato.Input2 SAMPLE 0.1 + UNION ALL + SELECT 1 as key, subkey, '' as value from plato.Input3 + UNION ALL + SELECT 1 as key, '' as subkey, value from plato.Input4 +) AS x +ORDER BY key, subkey, value +; diff --git a/yql/essentials/tests/sql/suites/hor_join/group_yamr.cfg b/yql/essentials/tests/sql/suites/hor_join/group_yamr.cfg new file mode 100644 index 0000000000..280feec6cc --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/group_yamr.cfg @@ -0,0 +1,4 @@ +in Input1 input.txt +in Input2 input.txt +in Input3 yamred_dsv.txt yamred_dsv +in Input4 yamred_dsv.txt yamred_dsv diff --git a/yql/essentials/tests/sql/suites/hor_join/group_yamr.sql b/yql/essentials/tests/sql/suites/hor_join/group_yamr.sql new file mode 100644 index 0000000000..d781971f1e --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/group_yamr.sql @@ -0,0 +1,13 @@ +USE plato; + +SELECT * FROM ( + SELECT 1 as key, subkey, '' as value from plato.Input1 + UNION ALL + SELECT 2 as key, '' as subkey, value from plato.Input2 + UNION ALL + SELECT 3 as key, subkey, '' as value from plato.Input3 + UNION ALL + SELECT 4 as key, '' as subkey, value from plato.Input4 +) AS x +ORDER BY key, subkey, value +; diff --git a/yql/essentials/tests/sql/suites/hor_join/input.txt b/yql/essentials/tests/sql/suites/hor_join/input.txt new file mode 100644 index 0000000000..65949ea745 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/input.txt @@ -0,0 +1,4 @@ +{"key"="075";"subkey"="1";"value"="abc"}; +{"key"="800";"subkey"="2";"value"="ddd"}; +{"key"="020";"subkey"="3";"value"="q"}; +{"key"="150";"subkey"="4";"value"="qzz"}; diff --git a/yql/essentials/tests/sql/suites/hor_join/input.txt.attr b/yql/essentials/tests/sql/suites/hor_join/input.txt.attr new file mode 100644 index 0000000000..b6100e5fd0 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/input.txt.attr @@ -0,0 +1,30 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "subkey"; + [ + "DataType"; + "String" + ] + ]; + [ + "value"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +}
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/hor_join/input1.txt b/yql/essentials/tests/sql/suites/hor_join/input1.txt new file mode 100644 index 0000000000..9e2c7c8718 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/input1.txt @@ -0,0 +1,10 @@ +{"key"="023";"subkey"="3";"value"="aaa"}; +{"key"="037";"subkey"="5";"value"="ddd"}; +{"key"="075";"subkey"="1";"value"="abc"}; +{"key"="150";"subkey"="1";"value"="aaa"}; +{"key"="160";"subkey"="3";"value"="iii"}; +{"key"="170";"subkey"="8";"value"="zzz"}; +{"key"="200";"subkey"="7";"value"="qqq"}; +{"key"="527";"subkey"="4";"value"="bbb"}; +{"key"="761";"subkey"="6";"value"="ccc"}; +{"key"="911";"subkey"="2";"value"="kkk"}; diff --git a/yql/essentials/tests/sql/suites/hor_join/less_outs.cfg b/yql/essentials/tests/sql/suites/hor_join/less_outs.cfg new file mode 100644 index 0000000000..a06e20d35e --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/less_outs.cfg @@ -0,0 +1,7 @@ +in Input1 input.txt +in Input2 input.txt +in Input3 input.txt +in Input4 input.txt +in Input5 input.txt +providers yt + diff --git a/yql/essentials/tests/sql/suites/hor_join/less_outs.sql b/yql/essentials/tests/sql/suites/hor_join/less_outs.sql new file mode 100644 index 0000000000..a8594fe2dd --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/less_outs.sql @@ -0,0 +1,10 @@ +/* syntax version 1 */ +/* postgres can not */ + +$i1 = SELECT CAST(key AS Int32) ?? 0 as key, '' as value FROM plato.Input1; +$i2 = SELECT 0 as key, value from plato.Input2 UNION ALL SELECT 1 as key, value from plato.Input3; +$i3 = (SELECT 2 as key, value from plato.Input4 UNION ALL SELECT 3 as key, value from plato.Input5); + +$udf = ($x) -> { return Yql::VariantItem($x) }; + +PROCESS $i1, $i2, $i3 using $udf(TableRow()); diff --git a/yql/essentials/tests/sql/suites/hor_join/max_in_tables.cfg b/yql/essentials/tests/sql/suites/hor_join/max_in_tables.cfg new file mode 100644 index 0000000000..1d540ef27f --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/max_in_tables.cfg @@ -0,0 +1,5 @@ +in Input1 input.txt +in Input2 input.txt +in Input3 input.txt +in Input4 input.txt +in Input5 input.txt diff --git a/yql/essentials/tests/sql/suites/hor_join/max_in_tables.sql b/yql/essentials/tests/sql/suites/hor_join/max_in_tables.sql new file mode 100644 index 0000000000..684ceb32fc --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/max_in_tables.sql @@ -0,0 +1,17 @@ +/* postgres can not */ +/* kikimr can not */ +pragma yt.MaxInputTables="3"; + +SELECT * FROM ( + SELECT CAST(key AS int) as key, '' as value FROM plato.Input1 + UNION ALL + SELECT 0 as key, value from plato.Input2 + UNION ALL + SELECT 1 as key, value from plato.Input3 + UNION ALL + SELECT 2 as key, value from plato.Input4 + UNION ALL + SELECT 3 as key, value from plato.Input5 +) AS x +ORDER BY key, value +; diff --git a/yql/essentials/tests/sql/suites/hor_join/max_outtables.cfg b/yql/essentials/tests/sql/suites/hor_join/max_outtables.cfg new file mode 100644 index 0000000000..a06e20d35e --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/max_outtables.cfg @@ -0,0 +1,7 @@ +in Input1 input.txt +in Input2 input.txt +in Input3 input.txt +in Input4 input.txt +in Input5 input.txt +providers yt + diff --git a/yql/essentials/tests/sql/suites/hor_join/max_outtables.sql b/yql/essentials/tests/sql/suites/hor_join/max_outtables.sql new file mode 100644 index 0000000000..0ce459d466 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/max_outtables.sql @@ -0,0 +1,15 @@ +/* syntax version 1 */ +/* postgres can not */ +/* kikimr can not */ +pragma yt.MaxOutputTables="3"; + +$i1 = (SELECT CAST(key AS Int32) ?? 0 as key, '' as value FROM plato.Input1); +$i2 = (SELECT 0 as key, value from plato.Input2); +$i3 = (SELECT 1 as key, value from plato.Input3); +$i4 = (SELECT 2 as key, value from plato.Input4); +$i5 = (SELECT 3 as key, value from plato.Input5); + + +$udf = ($x) -> { return Yql::VariantItem($x) }; + +PROCESS $i1, $i2, $i3, $i4, $i5 using $udf(TableRow()); diff --git a/yql/essentials/tests/sql/suites/hor_join/merge_multiouts_all.cfg b/yql/essentials/tests/sql/suites/hor_join/merge_multiouts_all.cfg new file mode 100644 index 0000000000..b9e77a0bd0 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/merge_multiouts_all.cfg @@ -0,0 +1,5 @@ +in Input input.txt +out Output output.txt +res result.txt +udf python2_udf +providers yt diff --git a/yql/essentials/tests/sql/suites/hor_join/merge_multiouts_all.sql b/yql/essentials/tests/sql/suites/hor_join/merge_multiouts_all.sql new file mode 100644 index 0000000000..f413af69ee --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/merge_multiouts_all.sql @@ -0,0 +1,43 @@ +use plato; + +/* syntax version 1 */ +/* postgres can not */ +$udfScript = @@ +def MyFunc(list): + return [(int(x.key) % 4, x) for x in list] +@@; + +$record = (SELECT TableRow() FROM Input); +$recordType =TypeOf(Unwrap($record)); + +$udf = Python::MyFunc( + CallableType(0, + StreamType( + VariantType(TupleType($recordType, $recordType, $recordType, $recordType)) + ), + StreamType($recordType)), + $udfScript +); + +$i0, $i1, $i2, $i3 = (PROCESS Input USING $udf(TableRows())); + +select * from ( + select * from $i0 + union all + select * from $i1 + union all + select * from $i2 + union all + select * from $i3 +) order by key; + +insert into Output +select * from ( + select * from $i0 + union all + select * from $i1 + union all + select * from $i2 + union all + select * from $i3 +); diff --git a/yql/essentials/tests/sql/suites/hor_join/merge_multiouts_part.cfg b/yql/essentials/tests/sql/suites/hor_join/merge_multiouts_part.cfg new file mode 100644 index 0000000000..b9e77a0bd0 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/merge_multiouts_part.cfg @@ -0,0 +1,5 @@ +in Input input.txt +out Output output.txt +res result.txt +udf python2_udf +providers yt diff --git a/yql/essentials/tests/sql/suites/hor_join/merge_multiouts_part.sql b/yql/essentials/tests/sql/suites/hor_join/merge_multiouts_part.sql new file mode 100644 index 0000000000..640fd4a086 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/merge_multiouts_part.sql @@ -0,0 +1,39 @@ +use plato; + +/* syntax version 1 */ +/* postgres can not */ +$udfScript = @@ +def MyFunc(list): + return [(int(x.key) % 4, x) for x in list] +@@; + +$record = (SELECT TableRow() FROM plato.Input); +$recordType =TypeOf(Unwrap($record)); + +$udf = Python::MyFunc( + CallableType(0, + StreamType( + VariantType(TupleType($recordType, $recordType, $recordType, $recordType)) + ), + StreamType($recordType)), + $udfScript +); + +$i0, $i1, $i2, $i3 = (PROCESS plato.Input USING $udf(TableRows())); + +select * from ( + select * from $i0 + union all + select * from $i1 + union all + select * from $i2 +) order by key; + +insert into Output +select * from ( + select * from $i1 + union all + select * from $i2 + union all + select * from $i3 +); diff --git a/yql/essentials/tests/sql/suites/hor_join/merge_multiouts_reuse.cfg b/yql/essentials/tests/sql/suites/hor_join/merge_multiouts_reuse.cfg new file mode 100644 index 0000000000..b9e77a0bd0 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/merge_multiouts_reuse.cfg @@ -0,0 +1,5 @@ +in Input input.txt +out Output output.txt +res result.txt +udf python2_udf +providers yt diff --git a/yql/essentials/tests/sql/suites/hor_join/merge_multiouts_reuse.sql b/yql/essentials/tests/sql/suites/hor_join/merge_multiouts_reuse.sql new file mode 100644 index 0000000000..d0068e031c --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/merge_multiouts_reuse.sql @@ -0,0 +1,43 @@ +use plato; + +/* syntax version 1 */ +/* postgres can not */ +$udfScript = @@ +def MyFunc(list): + return [(int(x.key) % 4, x) for x in list] +@@; + +$record = (SELECT TableRow() FROM Input); +$recordType =TypeOf(Unwrap($record)); + +$udf = Python::MyFunc( + CallableType(0, + StreamType( + VariantType(TupleType($recordType, $recordType, $recordType, $recordType)) + ), + StreamType($recordType)), + $udfScript +); + +$i0, $i1, $i2, $i3 = (PROCESS Input USING $udf(TableRows())); + +select * from ( + select * from $i0 + union all + select * from $i1 + union all + select * from $i2 + union all + select * from $i0 +) order by key; + +insert into Output +select * from ( + select * from $i3 + union all + select * from $i1 + union all + select * from $i2 + union all + select * from $i3 +); diff --git a/yql/essentials/tests/sql/suites/hor_join/nonstrict.txt b/yql/essentials/tests/sql/suites/hor_join/nonstrict.txt new file mode 100644 index 0000000000..42c5393923 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/nonstrict.txt @@ -0,0 +1,4 @@ +{"key"="020";"subkey"="3";"value1"="q"}; +{"key"="075";"subkey"="1";"value1"="abc"}; +{"key"="150";"subkey"="4";"value1"="qzz"}; +{"key"="800";"subkey"="2";"value1"="ddd"}; diff --git a/yql/essentials/tests/sql/suites/hor_join/nonstrict.txt.attr b/yql/essentials/tests/sql/suites/hor_join/nonstrict.txt.attr new file mode 100644 index 0000000000..49d98771a8 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/nonstrict.txt.attr @@ -0,0 +1,17 @@ +{ + "_read_schema" = <"strict" = %false>[ + {"type"="string";"name"="key"}; + {"type"="string";"name"="subkey"}; + ]; + "schema" = <"strict" = %false; "unique_keys" = %false> + [ + { + "name" = "key"; + "type" = "string" + }; + { + "name" = "subkey"; + "type" = "string" + } + ] +} diff --git a/yql/essentials/tests/sql/suites/hor_join/out_hor_join.sql b/yql/essentials/tests/sql/suites/hor_join/out_hor_join.sql new file mode 100644 index 0000000000..0e410fbb55 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/out_hor_join.sql @@ -0,0 +1,10 @@ +/* postgres can not */ +USE plato; + +$i = (SELECT * FROM Input WHERE key < "900"); + +SELECT key, some(value) as s FROM $i GROUP BY key ORDER BY key, s; + +SELECT key, sum(cast(subkey as Int32)) as s FROM $i GROUP BY key ORDER BY key, s; + +SELECT key, some(subkey) as s FROM $i GROUP BY key ORDER BY key, s; diff --git a/yql/essentials/tests/sql/suites/hor_join/out_max_outtables.sql b/yql/essentials/tests/sql/suites/hor_join/out_max_outtables.sql new file mode 100644 index 0000000000..399028196d --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/out_max_outtables.sql @@ -0,0 +1,18 @@ +/* postgres can not */ +/* kikimr can not */ +pragma yt.MaxOutputTables="3"; +pragma yt.DisableOptimizers="HorizontalJoin,MultiHorizontalJoin"; + +SELECT * FROM ( + SELECT CAST(key AS int) as key, '' as value FROM plato.Input + UNION ALL + SELECT 0 as key, value from plato.Input + UNION ALL + SELECT 1 as key, value from plato.Input + UNION ALL + SELECT 2 as key, value from plato.Input + UNION ALL + SELECT 3 as key, value from plato.Input +) AS x +ORDER BY key, value +; diff --git a/yql/essentials/tests/sql/suites/hor_join/out_mem_limit.sql b/yql/essentials/tests/sql/suites/hor_join/out_mem_limit.sql new file mode 100644 index 0000000000..508ddfd6cd --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/out_mem_limit.sql @@ -0,0 +1,14 @@ +/* postgres can not */ +/* kikimr can not - yt pragma */ +USE plato; + +PRAGMA yt.MaxExtraJobMemoryToFuseOperations="550m"; +PRAGMA yt.CombineCoreLimit="128m"; + +$i = (SELECT * FROM Input WHERE key < "900"); + +SELECT key, sum(cast(subkey as Int32)) as s FROM $i GROUP BY key ORDER BY key, s; + +SELECT key, some(subkey) as s FROM $i GROUP BY key ORDER by key, s; + +SELECT key, some(value) as s FROM $i GROUP BY key ORDER BY key, s; diff --git a/yql/essentials/tests/sql/suites/hor_join/out_range.sql b/yql/essentials/tests/sql/suites/hor_join/out_range.sql new file mode 100644 index 0000000000..c734fd9441 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/out_range.sql @@ -0,0 +1,8 @@ +/* postgres can not */ +USE plato; + +SELECT key, some(value) as s FROM Input GROUP BY key ORDER BY key, s; + +SELECT key, sum(cast(subkey as Int32)) as s FROM Input WHERE key > "100" GROUP BY key ORDER BY key, s; + +SELECT key, some(subkey) as s FROM Input WHERE key > "100" GROUP BY key ORDER BY key, s; diff --git a/yql/essentials/tests/sql/suites/hor_join/out_sampling.cfg b/yql/essentials/tests/sql/suites/hor_join/out_sampling.cfg new file mode 100644 index 0000000000..a654f9117d --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/out_sampling.cfg @@ -0,0 +1 @@ +in Input input1.txt diff --git a/yql/essentials/tests/sql/suites/hor_join/out_sampling.sql b/yql/essentials/tests/sql/suites/hor_join/out_sampling.sql new file mode 100644 index 0000000000..7619a391fc --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/out_sampling.sql @@ -0,0 +1,12 @@ +/* postgres can not */ +/* kikimr can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) == 10 and len(yt_res_yson[1]['Write'][0]['Data']) < 10 and len(yt_res_yson[2]['Write'][0]['Data']) < 10 and len(yt_res_yson[3]['Write'][0]['Data']) == 10 */ +USE plato; + +SELECT key, some(value) FROM Input GROUP BY key; + +SELECT key, sum(cast(subkey as Int32)) FROM Input SAMPLE 0.3 GROUP BY key; + +SELECT key, some(subkey) FROM Input SAMPLE 0.3 GROUP BY key; + +SELECT key, sum(length(value)) FROM Input GROUP BY key; diff --git a/yql/essentials/tests/sql/suites/hor_join/out_table_record.sql b/yql/essentials/tests/sql/suites/hor_join/out_table_record.sql new file mode 100644 index 0000000000..25cc1dcc62 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/out_table_record.sql @@ -0,0 +1,13 @@ +/* syntax version 1 */ +/* postgres can not */ +USE plato; + +pragma yt.DisableOptimizers="HorizontalJoin,MultiHorizontalJoin"; + +SELECT * FROM ( + select key, TableRecordIndex() as record, TablePath() as path from Input + union all + select key, TableRecordIndex() as record, "d" as path from Input +) AS x +ORDER BY key, record, path +; diff --git a/yql/essentials/tests/sql/suites/hor_join/row_num_per_sect.cfg b/yql/essentials/tests/sql/suites/hor_join/row_num_per_sect.cfg new file mode 100644 index 0000000000..2d1eb20046 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/row_num_per_sect.cfg @@ -0,0 +1,2 @@ +in Input1 nonstrict.txt +in Input2 input.txt
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/hor_join/row_num_per_sect.sql b/yql/essentials/tests/sql/suites/hor_join/row_num_per_sect.sql new file mode 100644 index 0000000000..e3477cc693 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/row_num_per_sect.sql @@ -0,0 +1,9 @@ +/* postgres can not */ +/* kikimr can not */ +/* syntax version 1 */ +USE plato; + +$with_row1 = (SELECT t.*, ROW_NUMBER() OVER () as row_num FROM Input1 as t); +$with_row2 = (SELECT t.*, ROW_NUMBER() OVER () as row_num FROM Input2 as t); + +SELECT a.key as key, b.subkey as subkey, b.value as value FROM $with_row1 as a LEFT JOIN $with_row2 as b USING(row_num); diff --git a/yql/essentials/tests/sql/suites/hor_join/runtime_dep.sql b/yql/essentials/tests/sql/suites/hor_join/runtime_dep.sql new file mode 100644 index 0000000000..183635ba58 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/runtime_dep.sql @@ -0,0 +1,15 @@ +/* postgres can not */ +use plato; + +INSERT INTO @ttt WITH TRUNCATE +SELECT CAST(key AS int) as key, subkey, value FROM Input; + +COMMIT; + +SELECT * FROM ( + SELECT key, '' as value FROM @ttt + UNION ALL + SELECT 0 as key, value from @ttt +) AS x +ORDER BY key, value +; diff --git a/yql/essentials/tests/sql/suites/hor_join/skip_sampling.cfg b/yql/essentials/tests/sql/suites/hor_join/skip_sampling.cfg new file mode 100644 index 0000000000..15b84c0426 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/skip_sampling.cfg @@ -0,0 +1,3 @@ +in Input1 input.txt +in Input2 input.txt +in Input3 input.txt diff --git a/yql/essentials/tests/sql/suites/hor_join/skip_sampling.sql b/yql/essentials/tests/sql/suites/hor_join/skip_sampling.sql new file mode 100644 index 0000000000..c00c6f9627 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/skip_sampling.sql @@ -0,0 +1,14 @@ +/* postgres can not */ +/* kikimr can not */ +/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 12 */ +USE plato; + +SELECT * FROM ( + SELECT CAST(key AS int) as key, '' as subkey, '' as value FROM plato.Input1 SAMPLE 0.1 + UNION ALL + SELECT 1 as key, subkey, '' as value from plato.Input2 + UNION ALL + SELECT 1 as key, '' as subkey, value from plato.Input3 +) AS x +ORDER BY key, subkey, value +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/hor_join/skip_yamr.cfg b/yql/essentials/tests/sql/suites/hor_join/skip_yamr.cfg new file mode 100644 index 0000000000..d16bb7a691 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/skip_yamr.cfg @@ -0,0 +1,3 @@ +in Input1 input.txt +in Input2 input.txt +in Input3 yamred_dsv.txt yamred_dsv diff --git a/yql/essentials/tests/sql/suites/hor_join/skip_yamr.sql b/yql/essentials/tests/sql/suites/hor_join/skip_yamr.sql new file mode 100644 index 0000000000..8161e83e83 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/skip_yamr.sql @@ -0,0 +1,10 @@ +USE plato; + +SELECT * FROM ( + SELECT 1 as key, subkey, '' as value from plato.Input1 + UNION ALL + SELECT 1 as key, '' as subkey, value from plato.Input2 + UNION ALL + SELECT CAST(key as Int32) as key, '' as subkey, value from plato.Input3 +) ORDER BY key, subkey, value +; diff --git a/yql/essentials/tests/sql/suites/hor_join/sorted.txt b/yql/essentials/tests/sql/suites/hor_join/sorted.txt new file mode 100644 index 0000000000..2a8e728cae --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/sorted.txt @@ -0,0 +1,4 @@ +{"key"="023";"subkey"="3";"value"="aaa"}; +{"key"="037";"subkey"="5";"value"="ddd"}; +{"key"="075";"subkey"="1";"value"="abc"}; +{"key"="150";"subkey"="1";"value"="aaa"}; diff --git a/yql/essentials/tests/sql/suites/hor_join/sorted.txt.attr b/yql/essentials/tests/sql/suites/hor_join/sorted.txt.attr new file mode 100644 index 0000000000..36f279e4a0 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/sorted.txt.attr @@ -0,0 +1,11 @@ +{"_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"String"]]; + ["subkey";["DataType";"String"]]; + ["value";["DataType";"String"]]] + ]; + "SortDirections"=[1;1;]; + "SortedBy"=["key";"subkey";]; + "SortedByTypes"=[["DataType";"String";];["DataType";"String";];]; + "SortMembers"=["key";"subkey";]; +}} diff --git a/yql/essentials/tests/sql/suites/hor_join/sorted_out.cfg b/yql/essentials/tests/sql/suites/hor_join/sorted_out.cfg new file mode 100644 index 0000000000..186d94fcae --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/sorted_out.cfg @@ -0,0 +1,5 @@ +in Input1 sorted.txt +in Input2 input.txt +in Input3 input.txt +in Input4 input.txt +in Input5 input.txt diff --git a/yql/essentials/tests/sql/suites/hor_join/sorted_out.sql b/yql/essentials/tests/sql/suites/hor_join/sorted_out.sql new file mode 100644 index 0000000000..d0a017508a --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/sorted_out.sql @@ -0,0 +1,17 @@ +/* postgres can not */ +/* kikimr can not */ +pragma yt.DisableOptimizers="UnorderedOuts"; + +SELECT * FROM ( + SELECT key, value || "a" as value FROM plato.Input1 + union all + SELECT key, "1" as value from plato.Input2 + union all + SELECT key, "2" as value from plato.Input3 + union all + SELECT key, "3" as value from plato.Input4 + union all + SELECT key, "4" as value from plato.Input5 +) AS x +ORDER BY key, value +;
\ No newline at end of file diff --git a/yql/essentials/tests/sql/suites/hor_join/sorted_out_mix.cfg b/yql/essentials/tests/sql/suites/hor_join/sorted_out_mix.cfg new file mode 100644 index 0000000000..8bc2f58496 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/sorted_out_mix.cfg @@ -0,0 +1,5 @@ +in Input1 input.txt +in Input2 sorted.txt +in Input3 input.txt +providers yt + diff --git a/yql/essentials/tests/sql/suites/hor_join/sorted_out_mix.sql b/yql/essentials/tests/sql/suites/hor_join/sorted_out_mix.sql new file mode 100644 index 0000000000..19aec3111b --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/sorted_out_mix.sql @@ -0,0 +1,18 @@ +/* syntax version 1 */ +/* postgres can not */ +/* kikimr can not */ +pragma yt.DisableOptimizers="UnorderedOuts"; + +$i1 = (SELECT key, value || "a" as value1 FROM plato.Input1); +$i2 = (SELECT key, "1" as value2 from plato.Input2); +$i3 = (SELECT key, "2" as value3 from plato.Input3); + +$udf = ($x) -> { + return AsStruct(Yql::Visit($x + , AsAtom("0"), ($i) -> { return Yql::Member($i, AsAtom("key")) } + , AsAtom("1"), ($i) -> { return Yql::Member($i, AsAtom("key")) } + , AsAtom("2"), ($i) -> { return Yql::Member($i, AsAtom("key")) } + ) AS key) +}; + +SELECT * FROM (PROCESS $i1, $i2, $i3 using $udf(TableRow())) ORDER BY key; diff --git a/yql/essentials/tests/sql/suites/hor_join/table_record.cfg b/yql/essentials/tests/sql/suites/hor_join/table_record.cfg new file mode 100644 index 0000000000..9b1926e035 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/table_record.cfg @@ -0,0 +1,2 @@ +in Input1 input.txt +in Input2 input.txt diff --git a/yql/essentials/tests/sql/suites/hor_join/table_record.sql b/yql/essentials/tests/sql/suites/hor_join/table_record.sql new file mode 100644 index 0000000000..af5f42281a --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/table_record.sql @@ -0,0 +1,11 @@ +/* syntax version 1 */ +/* postgres can not */ +USE plato; + +SELECT * FROM ( + select key, TableRecordIndex() as record, TablePath() as path from Input1 + union all + select key, TableRecordIndex() as record, "d" as path from Input2 +) AS x +ORDER BY key, record, path +; diff --git a/yql/essentials/tests/sql/suites/hor_join/yamred_dsv.txt b/yql/essentials/tests/sql/suites/hor_join/yamred_dsv.txt new file mode 100644 index 0000000000..f2882a08fb --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/yamred_dsv.txt @@ -0,0 +1,4 @@ +{"key"="3";"subkey"="s3";"a"="2";"b"="075";"c"="1";"d"="abc"}; +{"key"="1";"subkey"="s1";"a"="1";"b"="800";"c"="2";"d"="ddd"}; +{"key"="2";"subkey"="s2";"a"="4";"b"="020";"c"="3";"d"="q"}; +{"key"="5";"subkey"="s5";"a"="3";"b"="150";"c"="4";"d"="qzz"}; diff --git a/yql/essentials/tests/sql/suites/hor_join/yamred_dsv.txt.attr b/yql/essentials/tests/sql/suites/hor_join/yamred_dsv.txt.attr new file mode 100644 index 0000000000..f52f445c2c --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/yamred_dsv.txt.attr @@ -0,0 +1,10 @@ +{ + "_format"=< + "fs"="\t"; + "rs"="\n"; + "has_subkey"=%true; + "escape_carriage_return"=%true; + "key_column_names"=["key"]; + "subkey_column_names"=["subkey"] + >"yamred_dsv"; +} diff --git a/yql/essentials/tests/sql/suites/hor_join/yield_off.sql b/yql/essentials/tests/sql/suites/hor_join/yield_off.sql new file mode 100644 index 0000000000..a1635ae3e2 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/yield_off.sql @@ -0,0 +1,23 @@ +/* syntax version 1 */ +/* postgres can not */ +USE plato; +pragma config.flags("UdfSupportsYield","false"); + +$s = @@ +import yql + +def f(input, a): + for x in input: + yield x +@@; + +$f = Python::f(Callable<(Stream<Struct<key:String,subkey:String,value:String>>,Int32)->Stream<Struct<key:String,subkey:String,value:String>>>, $s); + + +SELECT * FROM ( + PROCESS Input using $f(TableRows(), 1) + UNION ALL + PROCESS Input using $f(TableRows(), 2) +) AS x +ORDER BY key, subkey, value +; diff --git a/yql/essentials/tests/sql/suites/hor_join/yield_on.sql b/yql/essentials/tests/sql/suites/hor_join/yield_on.sql new file mode 100644 index 0000000000..62efa85744 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/yield_on.sql @@ -0,0 +1,20 @@ +/* syntax version 1 */ +/* postgres can not */ +USE plato; +pragma config.flags("UdfSupportsYield","true"); + +$s = @@ +def f(input, a): + for x in input: + yield x +@@; + +$f = Python::f(Callable<(Stream<Struct<key:String,subkey:String,value:String>>,Int32)->Stream<Struct<key:String,subkey:String,value:String>>>, $s); + +SELECT * FROM ( + PROCESS Input using $f(TableRows(), 1) + UNION ALL + PROCESS Input using $f(TableRows(), 2) +) AS x +ORDER BY key, subkey, value +; diff --git a/yql/essentials/tests/sql/suites/hor_join/yql-12610_old_table_props.cfg b/yql/essentials/tests/sql/suites/hor_join/yql-12610_old_table_props.cfg new file mode 100644 index 0000000000..1f1d2747db --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/yql-12610_old_table_props.cfg @@ -0,0 +1,2 @@ +in Input input.txt +providers yt diff --git a/yql/essentials/tests/sql/suites/hor_join/yql-12610_old_table_props.sql b/yql/essentials/tests/sql/suites/hor_join/yql-12610_old_table_props.sql new file mode 100644 index 0000000000..3f36d0e6e3 --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/yql-12610_old_table_props.sql @@ -0,0 +1,13 @@ +/* postgres can not */ +/* syntax version 1 */ +/* kikimr can not - yt pragma */ +USE plato; +pragma yt.UseSystemColumns="0"; + +SELECT * FROM ( + select key, TableRecordIndex() as record, TablePath() as path from Input + union all + select key, TableRecordIndex() as record, "d" as path from Input +) AS x +ORDER BY key, record, path +; diff --git a/yql/essentials/tests/sql/suites/hor_join/yql-6477_table_path.sql b/yql/essentials/tests/sql/suites/hor_join/yql-6477_table_path.sql new file mode 100644 index 0000000000..893cd776de --- /dev/null +++ b/yql/essentials/tests/sql/suites/hor_join/yql-6477_table_path.sql @@ -0,0 +1,18 @@ +/* postgres can not */ +/* syntax version 1 */ +/* kikimr can not - yt pragma */ +USE plato; + +pragma yt.MaxExtraJobMemoryToFuseOperations="512M"; + +$udfScript = @@ +def AsIs(s): + return s +@@; + +$udf = Python::AsIs(Callable<(String)->String>, $udfScript); + +select TablePath() as path from Input +union all +select TablePath() as path from Input where $udf(key) > "080" +; |