diff options
author | Igor Munkin <imunkin@ydb.tech> | 2024-04-12 15:19:15 +0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-12 15:19:15 +0500 |
commit | 9f940d68dd657ee2a04c742a439ade92707ad8ca (patch) | |
tree | ea748c63aff4f2d01044b2193cd27fb4e5e2042f | |
parent | 9a884130f53d0dc5fb887c073b763ad804dbb9a1 (diff) | |
download | ydb-9f940d68dd657ee2a04c742a439ade92707ad8ca.tar.gz |
YQL-18053: Introduce support for Struct type in block engine (#3516)
12 files changed, 174 insertions, 0 deletions
diff --git a/ydb/library/yql/minikql/computation/mkql_block_impl.cpp b/ydb/library/yql/minikql/computation/mkql_block_impl.cpp index 1cf86e7bc6e..da5cb8ec59c 100644 --- a/ydb/library/yql/minikql/computation/mkql_block_impl.cpp +++ b/ydb/library/yql/minikql/computation/mkql_block_impl.cpp @@ -55,6 +55,16 @@ arrow::Datum DoConvertScalar(TType* type, const T& value, arrow::MemoryPool& poo return arrow::Datum(std::make_shared<arrow::StructScalar>(arrowValue, arrowType)); } + if (type->IsStruct()) { + auto structType = AS_TYPE(TStructType, type); + std::vector<std::shared_ptr<arrow::Scalar>> arrowValue; + for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { + arrowValue.emplace_back(DoConvertScalar(structType->GetMemberType(i), value.GetElement(i), pool).scalar()); + } + + return arrow::Datum(std::make_shared<arrow::StructScalar>(arrowValue, arrowType)); + } + if (type->IsTuple()) { auto tupleType = AS_TYPE(TTupleType, type); std::vector<std::shared_ptr<arrow::Scalar>> arrowValue; diff --git a/ydb/library/yql/minikql/mkql_type_builder.cpp b/ydb/library/yql/minikql/mkql_type_builder.cpp index d0f4649c617..e863449ccc9 100644 --- a/ydb/library/yql/minikql/mkql_type_builder.cpp +++ b/ydb/library/yql/minikql/mkql_type_builder.cpp @@ -1496,6 +1496,23 @@ bool ConvertArrowType(TType* itemType, std::shared_ptr<arrow::DataType>& type) { return true; } + if (unpacked->IsStruct()) { + auto structType = AS_TYPE(TStructType, unpacked); + std::vector<std::shared_ptr<arrow::Field>> members; + for (ui32 i = 0; i < structType->GetMembersCount(); i++) { + std::shared_ptr<arrow::DataType> childType; + const TString memberName(structType->GetMemberName(i)); + auto memberType = structType->GetMemberType(i); + if (!ConvertArrowType(memberType, childType)) { + return false; + } + members.emplace_back(std::make_shared<arrow::Field>(memberName, childType, memberType->IsOptional())); + } + + type = std::make_shared<arrow::StructType>(members); + return true; + } + if (unpacked->IsTuple()) { auto tupleType = AS_TYPE(TTupleType, unpacked); std::vector<std::shared_ptr<arrow::Field>> fields; @@ -2331,6 +2348,15 @@ size_t CalcMaxBlockItemSize(const TType* type) { return CalcMaxBlockItemSize(AS_TYPE(TOptionalType, type)->GetItemType()); } + if (type->IsStruct()) { + auto structType = AS_TYPE(TStructType, type); + size_t result = 0; + for (ui32 i = 0; i < structType->GetMembersCount(); i++) { + result = std::max(result, CalcMaxBlockItemSize(structType->GetMemberType(i))); + } + return result; + } + if (type->IsTuple()) { auto tupleType = AS_TYPE(TTupleType, type); size_t result = 0; diff --git a/ydb/library/yql/public/udf/arrow/block_builder.h b/ydb/library/yql/public/udf/arrow/block_builder.h index 8e82b46cbcb..632fa576a69 100644 --- a/ydb/library/yql/public/udf/arrow/block_builder.h +++ b/ydb/library/yql/public/udf/arrow/block_builder.h @@ -1180,6 +1180,18 @@ inline std::unique_ptr<TArrayBuilderBase> MakeArrayBuilderImpl( type = typeOpt.GetItemType(); } + TStructTypeInspector typeStruct(typeInfoHelper, type); + if (typeStruct) { + TVector<std::unique_ptr<TArrayBuilderBase>> members; + for (ui32 i = 0; i < typeStruct.GetMembersCount(); i++) { + const TType* memberType = typeStruct.GetMemberType(i); + auto memberBuilder = MakeArrayBuilderBase(typeInfoHelper, memberType, pool, maxLen, pgBuilder); + members.push_back(std::move(memberBuilder)); + } + // XXX: Use Tuple array builder for Struct. + return std::make_unique<TTupleArrayBuilder<Nullable>>(typeInfoHelper, type, pool, maxLen, std::move(members)); + } + TTupleTypeInspector typeTuple(typeInfoHelper, type); if (typeTuple) { TVector<std::unique_ptr<TArrayBuilderBase>> children; diff --git a/ydb/library/yql/public/udf/arrow/block_reader.h b/ydb/library/yql/public/udf/arrow/block_reader.h index 3f6c958ddef..d42c9f16c7e 100644 --- a/ydb/library/yql/public/udf/arrow/block_reader.h +++ b/ydb/library/yql/public/udf/arrow/block_reader.h @@ -481,6 +481,16 @@ std::unique_ptr<typename TTraits::TResult> MakeBlockReaderImpl(const ITypeInfoHe type = unpacked; } + TStructTypeInspector typeStruct(typeInfoHelper, type); + if (typeStruct) { + TVector<std::unique_ptr<typename TTraits::TResult>> members; + for (ui32 i = 0; i < typeStruct.GetMembersCount(); i++) { + members.emplace_back(MakeBlockReaderImpl<TTraits>(typeInfoHelper, typeStruct.GetMemberType(i), pgBuilder)); + } + // XXX: Use Tuple block reader for Struct. + return MakeTupleBlockReaderImpl<TTraits>(isOptional, std::move(members)); + } + TTupleTypeInspector typeTuple(typeInfoHelper, type); if (typeTuple) { TVector<std::unique_ptr<typename TTraits::TResult>> children; @@ -572,6 +582,14 @@ inline void UpdateBlockItemSerializeProps(const ITypeInfoHelper& typeInfoHelper, type = typeOpt.GetItemType(); } + TStructTypeInspector typeStruct(typeInfoHelper, type); + if (typeStruct) { + for (ui32 i = 0; i < typeStruct.GetMembersCount(); ++i) { + UpdateBlockItemSerializeProps(typeInfoHelper, typeStruct.GetMemberType(i), props); + } + return; + } + TTupleTypeInspector typeTuple(typeInfoHelper, type); if (typeTuple) { for (ui32 i = 0; i < typeTuple.GetElementsCount(); ++i) { diff --git a/ydb/library/yql/tests/sql/dq_file/part14/canondata/result.json b/ydb/library/yql/tests/sql/dq_file/part14/canondata/result.json index babe6af6d60..a7d13a5f47e 100644 --- a/ydb/library/yql/tests/sql/dq_file/part14/canondata/result.json +++ b/ydb/library/yql/tests/sql/dq_file/part14/canondata/result.json @@ -696,6 +696,28 @@ } ], "test.test[blocks-string_with--Results]": [], + "test.test[blocks-struct_type--Analyze]": [ + { + "checksum": "760588f4e3ad2eab5b27c89aa8fb7483", + "size": 6081, + "uri": "https://{canondata_backend}/1784826/ba008ae161ea4b06568a6c56c60fb71f4a81924a/resource.tar.gz#test.test_blocks-struct_type--Analyze_/plan.txt" + } + ], + "test.test[blocks-struct_type--Debug]": [ + { + "checksum": "cf235ebd78cc092e22825f8f9dd6f374", + "size": 2529, + "uri": "https://{canondata_backend}/1784826/ba008ae161ea4b06568a6c56c60fb71f4a81924a/resource.tar.gz#test.test_blocks-struct_type--Debug_/opt.yql_patched" + } + ], + "test.test[blocks-struct_type--Plan]": [ + { + "checksum": "760588f4e3ad2eab5b27c89aa8fb7483", + "size": 6081, + "uri": "https://{canondata_backend}/1784826/ba008ae161ea4b06568a6c56c60fb71f4a81924a/resource.tar.gz#test.test_blocks-struct_type--Plan_/plan.txt" + } + ], + "test.test[blocks-struct_type--Results]": [], "test.test[column_order-insert_with_reorder_cols--Analyze]": [ { "checksum": "60b979d2ae5fec69190a4ea74c2fdef4", diff --git a/ydb/library/yql/tests/sql/hybrid_file/part3/canondata/result.json b/ydb/library/yql/tests/sql/hybrid_file/part3/canondata/result.json index 4e09c6e0afd..fa54da497b4 100644 --- a/ydb/library/yql/tests/sql/hybrid_file/part3/canondata/result.json +++ b/ydb/library/yql/tests/sql/hybrid_file/part3/canondata/result.json @@ -755,6 +755,20 @@ "uri": "https://{canondata_backend}/1942671/a6ef6234ecec8bdd9b5f7ec30206378c9f7268ef/resource.tar.gz#test.test_blocks-pg--Plan_/plan.txt" } ], + "test.test[blocks-struct_type--Debug]": [ + { + "checksum": "fb0a5fc2350560cf9a36dacbb4be566d", + "size": 5019, + "uri": "https://{canondata_backend}/1031349/13e5aab746c6cd5980248c22efa25f0a35339781/resource.tar.gz#test.test_blocks-struct_type--Debug_/opt.yql_patched" + } + ], + "test.test[blocks-struct_type--Plan]": [ + { + "checksum": "baf68896b5253a0ca389c06477f42ebd", + "size": 8875, + "uri": "https://{canondata_backend}/1031349/13e5aab746c6cd5980248c22efa25f0a35339781/resource.tar.gz#test.test_blocks-struct_type--Plan_/plan.txt" + } + ], "test.test[case-case_then_else-default.txt-Debug]": [ { "checksum": "3a7296b84e359596dbb10e9a84454532", diff --git a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json index c39fe6dde09..e67583a3e44 100644 --- a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json +++ b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json @@ -3779,6 +3779,13 @@ "uri": "https://{canondata_backend}/1936997/00f46808be87e2ae2d4ac3ac45675b659c5ace45/resource.tar.gz#test_sql2yql.test_blocks-string_with_/sql.yql" } ], + "test_sql2yql.test[blocks-struct_type]": [ + { + "checksum": "bdba49991b760a6c8a80372e01974bb1", + "size": 1574, + "uri": "https://{canondata_backend}/1937424/54617edb3406f9afecca65147da35af20da61c56/resource.tar.gz#test_sql2yql.test_blocks-struct_type_/sql.yql" + } + ], "test_sql2yql.test[blocks-sub_uint64_opt2]": [ { "checksum": "4fce2aeba26b10fbfc172ffaa5e5f86d", @@ -22007,6 +22014,13 @@ "uri": "https://{canondata_backend}/1880306/64654158d6bfb1289c66c626a8162239289559d0/resource.tar.gz#test_sql_format.test_blocks-string_with_/formatted.sql" } ], + "test_sql_format.test[blocks-struct_type]": [ + { + "checksum": "ebbb03c893d23934cf9f061390184143", + "size": 199, + "uri": "https://{canondata_backend}/1937424/54617edb3406f9afecca65147da35af20da61c56/resource.tar.gz#test_sql_format.test_blocks-struct_type_/formatted.sql" + } + ], "test_sql_format.test[blocks-sub_uint64_opt2]": [ { "checksum": "312b28a171886b9d9b6dd6a46b5fa538", diff --git a/ydb/library/yql/tests/sql/suites/blocks/input_struct.txt b/ydb/library/yql/tests/sql/suites/blocks/input_struct.txt new file mode 100644 index 00000000000..1c423049466 --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/blocks/input_struct.txt @@ -0,0 +1,9 @@ +{"key"=1;"val"={"a"=11;"x"=1111;"o"=111;};}; +{"key"=2;"val"={"a"=22;"x"=2222;"o"=222;};}; +{"key"=3;"val"={"a"=33;"x"=3333;"o"=333;};}; +{"key"=4;"val"={"a"=44;"x"=4444;"o"=#;};}; +{"key"=5;"val"={"a"=55;"x"=5555;"o"=555;};}; +{"key"=6;"val"={"a"=66;"x"=6666;"o"=#;};}; +{"key"=7;"val"={"a"=77;"x"=7777;"o"=#;};}; +{"key"=8;"val"={"a"=88;"x"=8888;"o"=888;};}; +{"key"=9;"val"={"a"=99;"x"=9999;"o"=#;};}; diff --git a/ydb/library/yql/tests/sql/suites/blocks/input_struct.txt.attr b/ydb/library/yql/tests/sql/suites/blocks/input_struct.txt.attr new file mode 100644 index 00000000000..a28cc27044d --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/blocks/input_struct.txt.attr @@ -0,0 +1,10 @@ +{"_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Int32"]]; + ["val";["StructType";[ + ["a";["DataType";"Int32"]]; + ["x";["DataType";"Int32"]]; + ["o";["OptionalType";["DataType";"Int32"]]]; + ]]]; + ]]; +}} diff --git a/ydb/library/yql/tests/sql/suites/blocks/struct_type.cfg b/ydb/library/yql/tests/sql/suites/blocks/struct_type.cfg new file mode 100644 index 00000000000..ed506aaf28d --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/blocks/struct_type.cfg @@ -0,0 +1 @@ +in Input input_struct.txt diff --git a/ydb/library/yql/tests/sql/suites/blocks/struct_type.sql b/ydb/library/yql/tests/sql/suites/blocks/struct_type.sql new file mode 100644 index 00000000000..8351872baea --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/blocks/struct_type.sql @@ -0,0 +1,10 @@ +USE plato; +/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */ +pragma UseBlocks; + +SELECT + key, + SOME(val) as someVal, +FROM Input +GROUP BY key +ORDER BY key diff --git a/ydb/library/yql/tests/sql/yt_native_file/part14/canondata/result.json b/ydb/library/yql/tests/sql/yt_native_file/part14/canondata/result.json index 851dc23f8b6..ba90d234db4 100644 --- a/ydb/library/yql/tests/sql/yt_native_file/part14/canondata/result.json +++ b/ydb/library/yql/tests/sql/yt_native_file/part14/canondata/result.json @@ -686,6 +686,34 @@ "uri": "https://{canondata_backend}/1923547/5154c8bd8ef9ead4f609771f831f20c15e795571/resource.tar.gz#test.test_blocks-string_with--Results_/results.txt" } ], + "test.test[blocks-struct_type--Debug]": [ + { + "checksum": "1e0c9e2b0888d18ce58aa1ddb8d91e84", + "size": 3378, + "uri": "https://{canondata_backend}/937458/7df130b78c774e40ad55f2137fbe4f97e6e81f36/resource.tar.gz#test.test_blocks-struct_type--Debug_/opt.yql" + } + ], + "test.test[blocks-struct_type--Peephole]": [ + { + "checksum": "30369b25e3319f1f2ba30a264c8c4565", + "size": 3132, + "uri": "https://{canondata_backend}/937458/7df130b78c774e40ad55f2137fbe4f97e6e81f36/resource.tar.gz#test.test_blocks-struct_type--Peephole_/opt.yql" + } + ], + "test.test[blocks-struct_type--Plan]": [ + { + "checksum": "377a2eef525225b2889f669dee538975", + "size": 7527, + "uri": "https://{canondata_backend}/937458/7df130b78c774e40ad55f2137fbe4f97e6e81f36/resource.tar.gz#test.test_blocks-struct_type--Plan_/plan.txt" + } + ], + "test.test[blocks-struct_type--Results]": [ + { + "checksum": "6c0816041ea4529c72dc80617259cd0c", + "size": 4457, + "uri": "https://{canondata_backend}/937458/7df130b78c774e40ad55f2137fbe4f97e6e81f36/resource.tar.gz#test.test_blocks-struct_type--Results_/results.txt" + } + ], "test.test[column_order-insert_with_reorder_cols--Debug]": [ { "checksum": "7cf09c6ecfa17012411765a384799d3c", |