aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoruzhas <uzhas@ydb.tech>2023-11-16 20:59:28 +0300
committeruzhas <uzhas@ydb.tech>2023-11-16 21:21:04 +0300
commit326d86765643437e4986dc3a1318f9d975848415 (patch)
tree5d9cfdc41e218b098f4cda514af278da909bef32
parente41da361751eec0b148c9bd85a4b6a1a862e5da5 (diff)
downloadydb-326d86765643437e4986dc3a1318f9d975848415.tar.gz
add fq compression tests
-rw-r--r--ydb/tests/fq/s3/test_compression_data/test.json.brbin0 -> 66 bytes
-rw-r--r--ydb/tests/fq/s3/test_compression_data/test.json.bz2bin0 -> 92 bytes
-rw-r--r--ydb/tests/fq/s3/test_compression_data/test.json.gzbin0 -> 81 bytes
-rw-r--r--ydb/tests/fq/s3/test_compression_data/test.json.lz4bin0 -> 79 bytes
-rw-r--r--ydb/tests/fq/s3/test_compression_data/test.json.xzbin0 -> 116 bytes
-rw-r--r--ydb/tests/fq/s3/test_compression_data/test.json.zstbin0 -> 71 bytes
-rw-r--r--ydb/tests/fq/s3/test_compressions.py109
-rw-r--r--ydb/tests/fq/s3/ya.make1
8 files changed, 110 insertions, 0 deletions
diff --git a/ydb/tests/fq/s3/test_compression_data/test.json.br b/ydb/tests/fq/s3/test_compression_data/test.json.br
new file mode 100644
index 0000000000..ad9f8da80b
--- /dev/null
+++ b/ydb/tests/fq/s3/test_compression_data/test.json.br
Binary files differ
diff --git a/ydb/tests/fq/s3/test_compression_data/test.json.bz2 b/ydb/tests/fq/s3/test_compression_data/test.json.bz2
new file mode 100644
index 0000000000..f5b217efb4
--- /dev/null
+++ b/ydb/tests/fq/s3/test_compression_data/test.json.bz2
Binary files differ
diff --git a/ydb/tests/fq/s3/test_compression_data/test.json.gz b/ydb/tests/fq/s3/test_compression_data/test.json.gz
new file mode 100644
index 0000000000..ebd73dc216
--- /dev/null
+++ b/ydb/tests/fq/s3/test_compression_data/test.json.gz
Binary files differ
diff --git a/ydb/tests/fq/s3/test_compression_data/test.json.lz4 b/ydb/tests/fq/s3/test_compression_data/test.json.lz4
new file mode 100644
index 0000000000..f78d6a71ee
--- /dev/null
+++ b/ydb/tests/fq/s3/test_compression_data/test.json.lz4
Binary files differ
diff --git a/ydb/tests/fq/s3/test_compression_data/test.json.xz b/ydb/tests/fq/s3/test_compression_data/test.json.xz
new file mode 100644
index 0000000000..588fb1dd01
--- /dev/null
+++ b/ydb/tests/fq/s3/test_compression_data/test.json.xz
Binary files differ
diff --git a/ydb/tests/fq/s3/test_compression_data/test.json.zst b/ydb/tests/fq/s3/test_compression_data/test.json.zst
new file mode 100644
index 0000000000..3f0bf6862f
--- /dev/null
+++ b/ydb/tests/fq/s3/test_compression_data/test.json.zst
Binary files differ
diff --git a/ydb/tests/fq/s3/test_compressions.py b/ydb/tests/fq/s3/test_compressions.py
new file mode 100644
index 0000000000..cecb71c093
--- /dev/null
+++ b/ydb/tests/fq/s3/test_compressions.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import boto3
+import logging
+
+import pytest
+
+import ydb.public.api.protos.ydb_value_pb2 as ydb
+import ydb.public.api.protos.draft.fq_pb2 as fq
+
+import ydb.tests.fq.s3.s3_helpers as s3_helpers
+from ydb.tests.tools.fq_runner.kikimr_utils import yq_all
+
+
+class TestS3Compressions:
+ def create_bucket_and_upload_file(self, filename, s3, kikimr):
+ s3_helpers.create_bucket_and_upload_file(filename, s3.s3_url, "fbucket", "ydb/tests/fq/s3/test_compression_data")
+ kikimr.control_plane.wait_bootstrap(1)
+
+ def validate_result(self, result_set):
+ logging.debug(str(result_set))
+ assert len(result_set.columns) == 3
+ assert result_set.columns[0].name == "description"
+ assert result_set.columns[0].type.type_id == ydb.Type.STRING
+ assert result_set.columns[1].name == "id"
+ assert result_set.columns[1].type.type_id == ydb.Type.INT32
+ assert result_set.columns[2].name == "info"
+ assert result_set.columns[2].type.type_id == ydb.Type.STRING
+ assert len(result_set.rows) == 1
+ assert result_set.rows[0].items[0].bytes_value == b"yq"
+ assert result_set.rows[0].items[1].int32_value == 0
+ assert result_set.rows[0].items[2].bytes_value == b"abc"
+
+ @yq_all
+ @pytest.mark.parametrize("filename, compression", [
+ ("test.json.gz", "gzip"),
+ ("test.json.lz4", "lz4"),
+ ("test.json.br", "brotli"),
+ ("test.json.bz2", "bzip2"),
+ ("test.json.zst", "zstd"),
+ ("test.json.xz", "xz")
+ ])
+ def test_compression(self, kikimr, s3, client, filename, compression):
+ self.create_bucket_and_upload_file(filename, s3, kikimr)
+ client.create_storage_connection("fruitbucket", "fbucket")
+
+ sql = '''
+ SELECT *
+ FROM fruitbucket.`{}`
+ WITH (format=json_each_row, compression="{}", SCHEMA (
+ id Int32 NOT NULL,
+ description String NOT NULL,
+ info String NOT NULL
+ ));
+ '''.format(filename, compression)
+
+ query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.ANALYTICS).result.query_id
+ client.wait_query_status(query_id, fq.QueryMeta.COMPLETED)
+
+ data = client.get_result_data(query_id)
+ result_set = data.result.result_set
+ self.validate_result(result_set)
+
+ @yq_all
+ @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True)
+ def test_invalid_compression(self, kikimr, s3, client):
+ resource = boto3.resource(
+ "s3",
+ endpoint_url=s3.s3_url,
+ aws_access_key_id="key",
+ aws_secret_access_key="secret_key"
+ )
+
+ bucket = resource.Bucket("fbucket")
+ bucket.create(ACL='public-read')
+
+ s3_client = boto3.client(
+ "s3",
+ endpoint_url=s3.s3_url,
+ aws_access_key_id="key",
+ aws_secret_access_key="secret_key"
+ )
+
+ fruits = R'''Fruit,Price,Weight
+Banana,3,100
+Apple,2,22
+Pear,15,33'''
+ s3_client.put_object(Body=fruits, Bucket='fbucket', Key='fruits.csv', ContentType='text/plain')
+ kikimr.control_plane.wait_bootstrap(1)
+
+ client.create_storage_connection("fruitbucket", "fbucket")
+
+ sql = R'''
+ SELECT *
+ FROM fruitbucket.`fruits.csv`
+ WITH (format=csv_with_names, compression="some_compression", SCHEMA (
+ Fruit String,
+ Price Int,
+ Weight Int
+ ));
+ '''
+
+ query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.ANALYTICS).result.query_id
+ client.wait_query_status(query_id, fq.QueryMeta.FAILED)
+ describe_result = client.describe_query(query_id).result
+ logging.debug("Describe result: {}".format(describe_result))
+ describe_string = "{}".format(describe_result)
+ assert "Unknown compression: some_compression. Use one of: gzip, zstd, lz4, brotli, bzip2, xz" in describe_string
diff --git a/ydb/tests/fq/s3/ya.make b/ydb/tests/fq/s3/ya.make
index 8bf632c446..3c679fd81e 100644
--- a/ydb/tests/fq/s3/ya.make
+++ b/ydb/tests/fq/s3/ya.make
@@ -21,6 +21,7 @@ DEPENDS(
TEST_SRCS(
test_bindings.py
+ test_compressions.py
test_early_finish.py
test_explicit_partitioning.py
test_format_setting.py