aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIgor Munkin <imunkin@ydb.tech>2024-04-12 15:19:15 +0500
committerGitHub <noreply@github.com>2024-04-12 15:19:15 +0500
commit9f940d68dd657ee2a04c742a439ade92707ad8ca (patch)
treeea748c63aff4f2d01044b2193cd27fb4e5e2042f
parent9a884130f53d0dc5fb887c073b763ad804dbb9a1 (diff)
downloadydb-9f940d68dd657ee2a04c742a439ade92707ad8ca.tar.gz
YQL-18053: Introduce support for Struct type in block engine (#3516)
-rw-r--r--ydb/library/yql/minikql/computation/mkql_block_impl.cpp10
-rw-r--r--ydb/library/yql/minikql/mkql_type_builder.cpp26
-rw-r--r--ydb/library/yql/public/udf/arrow/block_builder.h12
-rw-r--r--ydb/library/yql/public/udf/arrow/block_reader.h18
-rw-r--r--ydb/library/yql/tests/sql/dq_file/part14/canondata/result.json22
-rw-r--r--ydb/library/yql/tests/sql/hybrid_file/part3/canondata/result.json14
-rw-r--r--ydb/library/yql/tests/sql/sql2yql/canondata/result.json14
-rw-r--r--ydb/library/yql/tests/sql/suites/blocks/input_struct.txt9
-rw-r--r--ydb/library/yql/tests/sql/suites/blocks/input_struct.txt.attr10
-rw-r--r--ydb/library/yql/tests/sql/suites/blocks/struct_type.cfg1
-rw-r--r--ydb/library/yql/tests/sql/suites/blocks/struct_type.sql10
-rw-r--r--ydb/library/yql/tests/sql/yt_native_file/part14/canondata/result.json28
12 files changed, 174 insertions, 0 deletions
diff --git a/ydb/library/yql/minikql/computation/mkql_block_impl.cpp b/ydb/library/yql/minikql/computation/mkql_block_impl.cpp
index 1cf86e7bc6e..da5cb8ec59c 100644
--- a/ydb/library/yql/minikql/computation/mkql_block_impl.cpp
+++ b/ydb/library/yql/minikql/computation/mkql_block_impl.cpp
@@ -55,6 +55,16 @@ arrow::Datum DoConvertScalar(TType* type, const T& value, arrow::MemoryPool& poo
return arrow::Datum(std::make_shared<arrow::StructScalar>(arrowValue, arrowType));
}
+ if (type->IsStruct()) {
+ auto structType = AS_TYPE(TStructType, type);
+ std::vector<std::shared_ptr<arrow::Scalar>> arrowValue;
+ for (ui32 i = 0; i < structType->GetMembersCount(); ++i) {
+ arrowValue.emplace_back(DoConvertScalar(structType->GetMemberType(i), value.GetElement(i), pool).scalar());
+ }
+
+ return arrow::Datum(std::make_shared<arrow::StructScalar>(arrowValue, arrowType));
+ }
+
if (type->IsTuple()) {
auto tupleType = AS_TYPE(TTupleType, type);
std::vector<std::shared_ptr<arrow::Scalar>> arrowValue;
diff --git a/ydb/library/yql/minikql/mkql_type_builder.cpp b/ydb/library/yql/minikql/mkql_type_builder.cpp
index d0f4649c617..e863449ccc9 100644
--- a/ydb/library/yql/minikql/mkql_type_builder.cpp
+++ b/ydb/library/yql/minikql/mkql_type_builder.cpp
@@ -1496,6 +1496,23 @@ bool ConvertArrowType(TType* itemType, std::shared_ptr<arrow::DataType>& type) {
return true;
}
+ if (unpacked->IsStruct()) {
+ auto structType = AS_TYPE(TStructType, unpacked);
+ std::vector<std::shared_ptr<arrow::Field>> members;
+ for (ui32 i = 0; i < structType->GetMembersCount(); i++) {
+ std::shared_ptr<arrow::DataType> childType;
+ const TString memberName(structType->GetMemberName(i));
+ auto memberType = structType->GetMemberType(i);
+ if (!ConvertArrowType(memberType, childType)) {
+ return false;
+ }
+ members.emplace_back(std::make_shared<arrow::Field>(memberName, childType, memberType->IsOptional()));
+ }
+
+ type = std::make_shared<arrow::StructType>(members);
+ return true;
+ }
+
if (unpacked->IsTuple()) {
auto tupleType = AS_TYPE(TTupleType, unpacked);
std::vector<std::shared_ptr<arrow::Field>> fields;
@@ -2331,6 +2348,15 @@ size_t CalcMaxBlockItemSize(const TType* type) {
return CalcMaxBlockItemSize(AS_TYPE(TOptionalType, type)->GetItemType());
}
+ if (type->IsStruct()) {
+ auto structType = AS_TYPE(TStructType, type);
+ size_t result = 0;
+ for (ui32 i = 0; i < structType->GetMembersCount(); i++) {
+ result = std::max(result, CalcMaxBlockItemSize(structType->GetMemberType(i)));
+ }
+ return result;
+ }
+
if (type->IsTuple()) {
auto tupleType = AS_TYPE(TTupleType, type);
size_t result = 0;
diff --git a/ydb/library/yql/public/udf/arrow/block_builder.h b/ydb/library/yql/public/udf/arrow/block_builder.h
index 8e82b46cbcb..632fa576a69 100644
--- a/ydb/library/yql/public/udf/arrow/block_builder.h
+++ b/ydb/library/yql/public/udf/arrow/block_builder.h
@@ -1180,6 +1180,18 @@ inline std::unique_ptr<TArrayBuilderBase> MakeArrayBuilderImpl(
type = typeOpt.GetItemType();
}
+ TStructTypeInspector typeStruct(typeInfoHelper, type);
+ if (typeStruct) {
+ TVector<std::unique_ptr<TArrayBuilderBase>> members;
+ for (ui32 i = 0; i < typeStruct.GetMembersCount(); i++) {
+ const TType* memberType = typeStruct.GetMemberType(i);
+ auto memberBuilder = MakeArrayBuilderBase(typeInfoHelper, memberType, pool, maxLen, pgBuilder);
+ members.push_back(std::move(memberBuilder));
+ }
+ // XXX: Use Tuple array builder for Struct.
+ return std::make_unique<TTupleArrayBuilder<Nullable>>(typeInfoHelper, type, pool, maxLen, std::move(members));
+ }
+
TTupleTypeInspector typeTuple(typeInfoHelper, type);
if (typeTuple) {
TVector<std::unique_ptr<TArrayBuilderBase>> children;
diff --git a/ydb/library/yql/public/udf/arrow/block_reader.h b/ydb/library/yql/public/udf/arrow/block_reader.h
index 3f6c958ddef..d42c9f16c7e 100644
--- a/ydb/library/yql/public/udf/arrow/block_reader.h
+++ b/ydb/library/yql/public/udf/arrow/block_reader.h
@@ -481,6 +481,16 @@ std::unique_ptr<typename TTraits::TResult> MakeBlockReaderImpl(const ITypeInfoHe
type = unpacked;
}
+ TStructTypeInspector typeStruct(typeInfoHelper, type);
+ if (typeStruct) {
+ TVector<std::unique_ptr<typename TTraits::TResult>> members;
+ for (ui32 i = 0; i < typeStruct.GetMembersCount(); i++) {
+ members.emplace_back(MakeBlockReaderImpl<TTraits>(typeInfoHelper, typeStruct.GetMemberType(i), pgBuilder));
+ }
+ // XXX: Use Tuple block reader for Struct.
+ return MakeTupleBlockReaderImpl<TTraits>(isOptional, std::move(members));
+ }
+
TTupleTypeInspector typeTuple(typeInfoHelper, type);
if (typeTuple) {
TVector<std::unique_ptr<typename TTraits::TResult>> children;
@@ -572,6 +582,14 @@ inline void UpdateBlockItemSerializeProps(const ITypeInfoHelper& typeInfoHelper,
type = typeOpt.GetItemType();
}
+ TStructTypeInspector typeStruct(typeInfoHelper, type);
+ if (typeStruct) {
+ for (ui32 i = 0; i < typeStruct.GetMembersCount(); ++i) {
+ UpdateBlockItemSerializeProps(typeInfoHelper, typeStruct.GetMemberType(i), props);
+ }
+ return;
+ }
+
TTupleTypeInspector typeTuple(typeInfoHelper, type);
if (typeTuple) {
for (ui32 i = 0; i < typeTuple.GetElementsCount(); ++i) {
diff --git a/ydb/library/yql/tests/sql/dq_file/part14/canondata/result.json b/ydb/library/yql/tests/sql/dq_file/part14/canondata/result.json
index babe6af6d60..a7d13a5f47e 100644
--- a/ydb/library/yql/tests/sql/dq_file/part14/canondata/result.json
+++ b/ydb/library/yql/tests/sql/dq_file/part14/canondata/result.json
@@ -696,6 +696,28 @@
}
],
"test.test[blocks-string_with--Results]": [],
+ "test.test[blocks-struct_type--Analyze]": [
+ {
+ "checksum": "760588f4e3ad2eab5b27c89aa8fb7483",
+ "size": 6081,
+ "uri": "https://{canondata_backend}/1784826/ba008ae161ea4b06568a6c56c60fb71f4a81924a/resource.tar.gz#test.test_blocks-struct_type--Analyze_/plan.txt"
+ }
+ ],
+ "test.test[blocks-struct_type--Debug]": [
+ {
+ "checksum": "cf235ebd78cc092e22825f8f9dd6f374",
+ "size": 2529,
+ "uri": "https://{canondata_backend}/1784826/ba008ae161ea4b06568a6c56c60fb71f4a81924a/resource.tar.gz#test.test_blocks-struct_type--Debug_/opt.yql_patched"
+ }
+ ],
+ "test.test[blocks-struct_type--Plan]": [
+ {
+ "checksum": "760588f4e3ad2eab5b27c89aa8fb7483",
+ "size": 6081,
+ "uri": "https://{canondata_backend}/1784826/ba008ae161ea4b06568a6c56c60fb71f4a81924a/resource.tar.gz#test.test_blocks-struct_type--Plan_/plan.txt"
+ }
+ ],
+ "test.test[blocks-struct_type--Results]": [],
"test.test[column_order-insert_with_reorder_cols--Analyze]": [
{
"checksum": "60b979d2ae5fec69190a4ea74c2fdef4",
diff --git a/ydb/library/yql/tests/sql/hybrid_file/part3/canondata/result.json b/ydb/library/yql/tests/sql/hybrid_file/part3/canondata/result.json
index 4e09c6e0afd..fa54da497b4 100644
--- a/ydb/library/yql/tests/sql/hybrid_file/part3/canondata/result.json
+++ b/ydb/library/yql/tests/sql/hybrid_file/part3/canondata/result.json
@@ -755,6 +755,20 @@
"uri": "https://{canondata_backend}/1942671/a6ef6234ecec8bdd9b5f7ec30206378c9f7268ef/resource.tar.gz#test.test_blocks-pg--Plan_/plan.txt"
}
],
+ "test.test[blocks-struct_type--Debug]": [
+ {
+ "checksum": "fb0a5fc2350560cf9a36dacbb4be566d",
+ "size": 5019,
+ "uri": "https://{canondata_backend}/1031349/13e5aab746c6cd5980248c22efa25f0a35339781/resource.tar.gz#test.test_blocks-struct_type--Debug_/opt.yql_patched"
+ }
+ ],
+ "test.test[blocks-struct_type--Plan]": [
+ {
+ "checksum": "baf68896b5253a0ca389c06477f42ebd",
+ "size": 8875,
+ "uri": "https://{canondata_backend}/1031349/13e5aab746c6cd5980248c22efa25f0a35339781/resource.tar.gz#test.test_blocks-struct_type--Plan_/plan.txt"
+ }
+ ],
"test.test[case-case_then_else-default.txt-Debug]": [
{
"checksum": "3a7296b84e359596dbb10e9a84454532",
diff --git a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json
index c39fe6dde09..e67583a3e44 100644
--- a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json
+++ b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json
@@ -3779,6 +3779,13 @@
"uri": "https://{canondata_backend}/1936997/00f46808be87e2ae2d4ac3ac45675b659c5ace45/resource.tar.gz#test_sql2yql.test_blocks-string_with_/sql.yql"
}
],
+ "test_sql2yql.test[blocks-struct_type]": [
+ {
+ "checksum": "bdba49991b760a6c8a80372e01974bb1",
+ "size": 1574,
+ "uri": "https://{canondata_backend}/1937424/54617edb3406f9afecca65147da35af20da61c56/resource.tar.gz#test_sql2yql.test_blocks-struct_type_/sql.yql"
+ }
+ ],
"test_sql2yql.test[blocks-sub_uint64_opt2]": [
{
"checksum": "4fce2aeba26b10fbfc172ffaa5e5f86d",
@@ -22007,6 +22014,13 @@
"uri": "https://{canondata_backend}/1880306/64654158d6bfb1289c66c626a8162239289559d0/resource.tar.gz#test_sql_format.test_blocks-string_with_/formatted.sql"
}
],
+ "test_sql_format.test[blocks-struct_type]": [
+ {
+ "checksum": "ebbb03c893d23934cf9f061390184143",
+ "size": 199,
+ "uri": "https://{canondata_backend}/1937424/54617edb3406f9afecca65147da35af20da61c56/resource.tar.gz#test_sql_format.test_blocks-struct_type_/formatted.sql"
+ }
+ ],
"test_sql_format.test[blocks-sub_uint64_opt2]": [
{
"checksum": "312b28a171886b9d9b6dd6a46b5fa538",
diff --git a/ydb/library/yql/tests/sql/suites/blocks/input_struct.txt b/ydb/library/yql/tests/sql/suites/blocks/input_struct.txt
new file mode 100644
index 00000000000..1c423049466
--- /dev/null
+++ b/ydb/library/yql/tests/sql/suites/blocks/input_struct.txt
@@ -0,0 +1,9 @@
+{"key"=1;"val"={"a"=11;"x"=1111;"o"=111;};};
+{"key"=2;"val"={"a"=22;"x"=2222;"o"=222;};};
+{"key"=3;"val"={"a"=33;"x"=3333;"o"=333;};};
+{"key"=4;"val"={"a"=44;"x"=4444;"o"=#;};};
+{"key"=5;"val"={"a"=55;"x"=5555;"o"=555;};};
+{"key"=6;"val"={"a"=66;"x"=6666;"o"=#;};};
+{"key"=7;"val"={"a"=77;"x"=7777;"o"=#;};};
+{"key"=8;"val"={"a"=88;"x"=8888;"o"=888;};};
+{"key"=9;"val"={"a"=99;"x"=9999;"o"=#;};};
diff --git a/ydb/library/yql/tests/sql/suites/blocks/input_struct.txt.attr b/ydb/library/yql/tests/sql/suites/blocks/input_struct.txt.attr
new file mode 100644
index 00000000000..a28cc27044d
--- /dev/null
+++ b/ydb/library/yql/tests/sql/suites/blocks/input_struct.txt.attr
@@ -0,0 +1,10 @@
+{"_yql_row_spec"={
+ "Type"=["StructType";[
+ ["key";["DataType";"Int32"]];
+ ["val";["StructType";[
+ ["a";["DataType";"Int32"]];
+ ["x";["DataType";"Int32"]];
+ ["o";["OptionalType";["DataType";"Int32"]]];
+ ]]];
+ ]];
+}}
diff --git a/ydb/library/yql/tests/sql/suites/blocks/struct_type.cfg b/ydb/library/yql/tests/sql/suites/blocks/struct_type.cfg
new file mode 100644
index 00000000000..ed506aaf28d
--- /dev/null
+++ b/ydb/library/yql/tests/sql/suites/blocks/struct_type.cfg
@@ -0,0 +1 @@
+in Input input_struct.txt
diff --git a/ydb/library/yql/tests/sql/suites/blocks/struct_type.sql b/ydb/library/yql/tests/sql/suites/blocks/struct_type.sql
new file mode 100644
index 00000000000..8351872baea
--- /dev/null
+++ b/ydb/library/yql/tests/sql/suites/blocks/struct_type.sql
@@ -0,0 +1,10 @@
+USE plato;
+/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */
+pragma UseBlocks;
+
+SELECT
+ key,
+ SOME(val) as someVal,
+FROM Input
+GROUP BY key
+ORDER BY key
diff --git a/ydb/library/yql/tests/sql/yt_native_file/part14/canondata/result.json b/ydb/library/yql/tests/sql/yt_native_file/part14/canondata/result.json
index 851dc23f8b6..ba90d234db4 100644
--- a/ydb/library/yql/tests/sql/yt_native_file/part14/canondata/result.json
+++ b/ydb/library/yql/tests/sql/yt_native_file/part14/canondata/result.json
@@ -686,6 +686,34 @@
"uri": "https://{canondata_backend}/1923547/5154c8bd8ef9ead4f609771f831f20c15e795571/resource.tar.gz#test.test_blocks-string_with--Results_/results.txt"
}
],
+ "test.test[blocks-struct_type--Debug]": [
+ {
+ "checksum": "1e0c9e2b0888d18ce58aa1ddb8d91e84",
+ "size": 3378,
+ "uri": "https://{canondata_backend}/937458/7df130b78c774e40ad55f2137fbe4f97e6e81f36/resource.tar.gz#test.test_blocks-struct_type--Debug_/opt.yql"
+ }
+ ],
+ "test.test[blocks-struct_type--Peephole]": [
+ {
+ "checksum": "30369b25e3319f1f2ba30a264c8c4565",
+ "size": 3132,
+ "uri": "https://{canondata_backend}/937458/7df130b78c774e40ad55f2137fbe4f97e6e81f36/resource.tar.gz#test.test_blocks-struct_type--Peephole_/opt.yql"
+ }
+ ],
+ "test.test[blocks-struct_type--Plan]": [
+ {
+ "checksum": "377a2eef525225b2889f669dee538975",
+ "size": 7527,
+ "uri": "https://{canondata_backend}/937458/7df130b78c774e40ad55f2137fbe4f97e6e81f36/resource.tar.gz#test.test_blocks-struct_type--Plan_/plan.txt"
+ }
+ ],
+ "test.test[blocks-struct_type--Results]": [
+ {
+ "checksum": "6c0816041ea4529c72dc80617259cd0c",
+ "size": 4457,
+ "uri": "https://{canondata_backend}/937458/7df130b78c774e40ad55f2137fbe4f97e6e81f36/resource.tar.gz#test.test_blocks-struct_type--Results_/results.txt"
+ }
+ ],
"test.test[column_order-insert_with_reorder_cols--Debug]": [
{
"checksum": "7cf09c6ecfa17012411765a384799d3c",