Moved udf_test and refactored test_framework

Локально упавший тест выполняется %% vvvv@mr-nvme-testing-08:~/repo/arcadia/statbox/nile/tests/yql/py2/part_2$ arc checkout move_udf_test_and_refactor_tf Switched to branch 'move_udf_test_and_refactor_tf' vvvv@mr-nvme-testing-08:~/repo/arcadia/statbox/nile/tests/yql/py2/part_2$ ya make -tA -F '*test_unchanged_table*' Warn[-WPluginErr]: in $B/statbox/nile/tests/yql/py2/part_2/libpy2-part_2.so: Requirement cpu is redefined 2 -> 4 Warn[-WPluginErr]: in $B/statbox/nile/tests/yql/py2/part_2/libpy2-part_2.so: Requirement ram is redefined 16 -> 9 Number of suites skipped by name: 2, by filter *test_unchanged_table* Total 1 suite: 1 - GOOD Total 4 tests: 4 - GOOD Ok %% судя по ошибке он flaky
author: vvvv <vvvv@ydb.tech> 2023-08-30 20:49:53 +0300
committer: vvvv <vvvv@ydb.tech> 2023-08-30 21:17:44 +0300
commit: f154e22342f327342effe873b0a00ad80c975e76 (patch)
tree: fff231496c10fbfcff025ed953b512bf2a82d7c0 /library/python/cyson/ut/test_control_attributes.py
parent: 4ebafdd49d8b0706c5af76ef7c2d0b3b498d0310 (diff)
download: ydb-f154e22342f327342effe873b0a00ad80c975e76.tar.gz
1 files changed, 258 insertions, 0 deletions
diff --git a/library/python/cyson/ut/test_control_attributes.py b/library/python/cyson/ut/test_control_attributes.py
new file mode 100644
index 0000000000..221542b12d
--- /dev/null
+++ b/library/python/cyson/ut/test_control_attributes.py
@@ -0,0 +1,258 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import print_function, absolute_import, division
+
+import itertools
+from functools import partial
+
+import pytest
+import six
+
+from cyson import (
+    YsonEntity, InputStream,
+    list_fragments, key_switched_list_fragments,
+    Reader, UnicodeReader
+)
+
+
+def filter_control_records(list):
+    return [
+        _ for _ in list
+        if not isinstance(_[2], YsonEntity)
+    ]
+
+
+def canonize(val, as_unicode):
+    _canonize = partial(canonize, as_unicode=as_unicode)
+
+    if isinstance(val, six.binary_type) and as_unicode:
+        return val.decode('utf8')
+    elif isinstance(val, six.text_type) and not as_unicode:
+        return val.encode('utf8')
+    elif isinstance(val, (list, tuple)):
+        return [_canonize(elem) for elem in val]
+    elif isinstance(val, dict):
+        return {_canonize(k): _canonize(v) for k, v in val.items()}
+    return val
+
+
+@pytest.mark.parametrize(
+    'reader, as_unicode', [
+        [Reader, False],
+        [UnicodeReader, True],
+    ],
+)
+@pytest.mark.parametrize(
+    'keep_control_records', [True, False]
+)
+def test_row_index(keep_control_records, reader, as_unicode):
+    _ = partial(canonize, as_unicode=as_unicode)
+
+    data = b"""
+        <row_index=0>#;
+        {a=1;b=2};
+        {a=2;b=3};
+        {a=3;b=4};
+        <row_index=10000>#;
+        {a=-1;b=-1};
+        {a=-2;b=-2};
+    """
+
+    iter = list_fragments(
+        stream=InputStream.from_string(data),
+        Reader=reader,
+        process_attributes=True,
+        keep_control_records=keep_control_records,
+    )
+    records = [(iter.range_index, iter.row_index, __) for __ in iter]
+
+    etalon = [
+        (None, -1, YsonEntity(attributes={b'row_index': 0})),
+        (None, 0, _({b'a': 1, b'b': 2})),
+        (None, 1, _({b'a': 2, b'b': 3})),
+        (None, 2, _({b'a': 3, b'b': 4})),
+        (None, 9999, YsonEntity(attributes={b'row_index': 10000})),
+        (None, 10000, _({b'a': -1, b'b': -1})),
+        (None, 10001, _({b'a': -2, b'b': -2})),
+    ]
+
+    if not keep_control_records:
+        etalon = filter_control_records(etalon)
+
+    assert records == etalon
+
+
+@pytest.mark.parametrize(
+    'reader, as_unicode', [
+        [Reader, False],
+        [UnicodeReader, True],
+    ]
+)
+@pytest.mark.parametrize(
+    'keep_control_records', [True, False],
+)
+@pytest.mark.parametrize(
+    'parameter_name',
+    ['process_attributes', 'process_table_index']
+)
+def test_range_index(parameter_name, keep_control_records, reader, as_unicode):
+    _ = partial(canonize, as_unicode=as_unicode)
+
+    data = b"""
+        <range_index=2; row_index=0>#;
+        {a=1;b=2};
+        {a=2;b=3};
+        {a=3;b=4};
+        <range_index=0; row_index=10000>#;
+        {a=-1;b=-1};
+        {a=-2;b=-2};
+    """
+
+    iter = list_fragments(
+        stream=InputStream.from_string(data),
+        Reader=reader,
+        **{parameter_name: True, 'keep_control_records': keep_control_records}
+    )
+    records = [(iter.range_index, iter.row_index, __) for __ in iter]
+
+    etalon = [
+        (2, -1, YsonEntity(attributes={b'range_index': 2, b'row_index': 0})),
+        (2, 0, _({b'a': 1, b'b': 2})),
+        (2, 1, _({b'a': 2, b'b': 3})),
+        (2, 2, _({b'a': 3, b'b': 4})),
+        (0, 9999, YsonEntity(attributes={b'range_index': 0, b'row_index': 10000})),
+        (0, 10000, _({b'a': -1, b'b': -1})),
+        (0, 10001, _({b'a': -2, b'b': -2})),
+    ]
+
+    if not keep_control_records:
+        etalon = filter_control_records(etalon)
+
+    assert records == etalon
+
+
+@pytest.mark.parametrize(
+    'reader, as_unicode', [
+        [Reader, False],
+        [UnicodeReader, True],
+    ]
+)
+def test_key_switch_first(reader, as_unicode):
+    _ = partial(canonize, as_unicode=as_unicode)
+
+    data = b"""
+        <key_switch=True>#;
+        {k=1;a=1;b=2};
+        {k=1;a=2;b=3};
+        {k=1;a=3;b=4};
+        <key_switch=True>#;
+        {k=2;a=-1;b=-1};
+        {k=2;a=-2;b=-2};
+    """
+
+    iter = key_switched_list_fragments(
+        stream=InputStream.from_string(data),
+        Reader=reader,
+    )
+    records = [list(__) for __ in iter]
+
+    assert records == [
+        [
+            _({b'k': 1, b'a': 1, b'b': 2}),
+            _({b'k': 1, b'a': 2, b'b': 3}),
+            _({b'k': 1, b'a': 3, b'b': 4}),
+        ],
+        [
+            _({b'k': 2, b'a': -1, b'b': -1}),
+            _({b'k': 2, b'a': -2, b'b': -2}),
+        ]
+    ]
+
+
+@pytest.mark.parametrize(
+    'reader, as_unicode', [
+        [Reader, False],
+        [UnicodeReader, True],
+    ]
+)
+def test_key_switch_nofirst(reader, as_unicode):
+    _ = partial(canonize, as_unicode=as_unicode)
+
+    data = b"""
+        {k=1;a=1;b=2};
+        {k=1;a=2;b=3};
+        {k=1;a=3;b=4};
+        <key_switch=True>#;
+        {k=2;a=-1;b=-1};
+        {k=2;a=-2;b=-2};
+    """
+
+    iter = key_switched_list_fragments(
+        stream=InputStream.from_string(data),
+        Reader=reader
+    )
+    records = [list(__) for __ in iter]
+
+    assert records == [
+        [
+            _({b'k': 1, b'a': 1, b'b': 2}),
+            _({b'k': 1, b'a': 2, b'b': 3}),
+            _({b'k': 1, b'a': 3, b'b': 4}),
+        ],
+        [
+            _({b'k': 2, b'a': -1, b'b': -1}),
+            _({b'k': 2, b'a': -2, b'b': -2}),
+        ]
+    ]
+
+
+@pytest.mark.parametrize(
+    'reader, as_unicode', [
+        [Reader, False],
+        [UnicodeReader, True],
+    ]
+)
+def test_key_switch_exhaust_unused_records(reader, as_unicode):
+    _ = partial(canonize, as_unicode=as_unicode)
+
+    data = b"""
+        {k=1;a=1;b=2};
+        {k=1;a=2;b=3};
+        {k=1;a=3;b=4};
+        <key_switch=True>#;
+        {k=2;a=-1;b=-1};
+        {k=2;a=-2;b=-2};
+    """
+
+    iter = key_switched_list_fragments(
+        stream=InputStream.from_string(data),
+        Reader=reader,
+    )
+
+    records = []
+
+    for group in iter:
+        records.append(
+            list(itertools.islice(group, 2))
+        )
+
+    assert records == [
+        [
+            _({b'k': 1, b'a': 1, b'b': 2}),
+            _({b'k': 1, b'a': 2, b'b': 3}),
+        ],
+        [
+            _({b'k': 2, b'a': -1, b'b': -1}),
+            _({b'k': 2, b'a': -2, b'b': -2}),
+        ]
+    ]
+
+
+@pytest.mark.parametrize('reader', [Reader, UnicodeReader])
+def test_key_switch_empty(reader):
+    assert list(
+        key_switched_list_fragments(
+            stream=InputStream.from_string(""),
+            Reader=reader,
+        )
+    ) == []
author	vvvv <vvvv@ydb.tech>	2023-08-30 20:49:53 +0300
committer	vvvv <vvvv@ydb.tech>	2023-08-30 21:17:44 +0300
commit	f154e22342f327342effe873b0a00ad80c975e76 (patch)
tree	fff231496c10fbfcff025ed953b512bf2a82d7c0 /library/python/cyson/ut/test_control_attributes.py
parent	4ebafdd49d8b0706c5af76ef7c2d0b3b498d0310 (diff)
download	ydb-f154e22342f327342effe873b0a00ad80c975e76.tar.gz