diff options
author | atarasov5 <[email protected]> | 2025-03-03 16:31:45 +0300 |
---|---|---|
committer | atarasov5 <[email protected]> | 2025-03-03 16:49:51 +0300 |
commit | 35c4c93230d28f35ca37296c064a1b8807146307 (patch) | |
tree | 42beb0c25393375f86a4e2af97a9d2db8427601b /yql/essentials/minikql/computation | |
parent | ca1e2aef23c33eb024704bdf3568f131a2763eaa (diff) |
YQL-18276: Block singular type implementation
commit_hash:1334e5c0fb5ff26fd40681059409f46cf49ec025
Diffstat (limited to 'yql/essentials/minikql/computation')
4 files changed, 100 insertions, 3 deletions
diff --git a/yql/essentials/minikql/computation/mkql_block_reader.cpp b/yql/essentials/minikql/computation/mkql_block_reader.cpp index 4e2060e7395..5886e121c40 100644 --- a/yql/essentials/minikql/computation/mkql_block_reader.cpp +++ b/yql/essentials/minikql/computation/mkql_block_reader.cpp @@ -162,6 +162,19 @@ private: i32 TypeLen = 0; }; +class TSingularTypeItemConverter: public IBlockItemConverter { +public: + NUdf::TUnboxedValuePod MakeValue(TBlockItem item, const THolderFactory& holderFactory) const final { + Y_UNUSED(item, holderFactory); + return NUdf::TUnboxedValuePod::Zero(); + } + + TBlockItem MakeItem(const NUdf::TUnboxedValuePod& value) const final { + Y_UNUSED(value); + return TBlockItem::Zero(); + } +}; + template <bool Nullable> class TTupleBlockItemConverter : public IBlockItemConverter { public: @@ -285,6 +298,7 @@ struct TConverterTraits { using TExtOptional = TExternalOptionalBlockItemConverter; template<typename TTzDate, bool Nullable> using TTzDateConverter = TTzDateBlockItemConverter<TTzDate, Nullable>; + using TSingularType = TSingularTypeItemConverter; constexpr static bool PassType = false; @@ -325,6 +339,10 @@ struct TConverterTraits { return std::make_unique<TTzDateConverter<TTzDate, false>>(); } } + + static std::unique_ptr<TResult> MakeSingular() { + return std::make_unique<TSingularType>(); + } }; } // namespace diff --git a/yql/essentials/minikql/computation/mkql_block_transport.cpp b/yql/essentials/minikql/computation/mkql_block_transport.cpp index a03a5027e86..2a37245f9d2 100644 --- a/yql/essentials/minikql/computation/mkql_block_transport.cpp +++ b/yql/essentials/minikql/computation/mkql_block_transport.cpp @@ -429,6 +429,49 @@ private: const std::unique_ptr<TBlockDeserializerBase> Inner_; }; +class TSingularTypeBlockSerializer final: public IBlockSerializer { +private: + size_t ArrayMetadataCount() const final { + return 0; + } + + void StoreMetadata(const arrow::ArrayData& data, const IBlockSerializer::TMetadataSink& metaSink) const final { + Y_UNUSED(data, metaSink); + } + + void StoreArray(const arrow::ArrayData& data, TChunkedBuffer& dst) const final { + Y_UNUSED(data, dst); + } +}; + +class TSingularTypeBlockDeserializer final: public TBlockDeserializerBase { +private: + void DoLoadMetadata(const TMetadataSource& metaSource) final { + Y_UNUSED(metaSource); + } + + std::shared_ptr<arrow::ArrayData> DoMakeDefaultValue(const std::shared_ptr<arrow::Buffer>& nulls, i64 nullsCount, ui64 blockLen, ui64 offset) const final { + Y_UNUSED(offset); + Y_ENSURE(nullsCount == 0); + Y_ENSURE(!nulls || nulls->size() == 0); + return arrow::NullArray(blockLen).data(); + } + + std::shared_ptr<arrow::ArrayData> DoLoadArray(TChunkedBuffer& src, const std::shared_ptr<arrow::Buffer>& nulls, i64 nullsCount, ui64 blockLen, ui64 offset) final { + Y_UNUSED(offset, src); + Y_ENSURE(nullsCount == 0); + Y_ENSURE(!nulls || nulls->size() == 0); + return arrow::NullArray(blockLen).data(); + } + + bool IsNullable() const final { + return false; + } + + void DoResetMetadata() final { + } +}; + template<bool Nullable, typename TDerived> class TTupleBlockSerializerBase : public IBlockSerializer { size_t ArrayMetadataCount() const final { @@ -632,7 +675,7 @@ struct TSerializerTraits { using TExtOptional = TExtOptionalBlockSerializer; template<typename TTzDateType, bool Nullable> using TTzDate = TTzDateBlockSerializer<TTzDateType, Nullable>; - + using TSingularType = TSingularTypeBlockSerializer; constexpr static bool PassType = false; static std::unique_ptr<TResult> MakePg(const NUdf::TPgTypeDescription& desc, const NUdf::IPgBuilder* pgBuilder) { @@ -648,6 +691,10 @@ struct TSerializerTraits { ythrow yexception() << "Serializer not implemented for block resources"; } + static std::unique_ptr<TResult> MakeSingular() { + return std::make_unique<TSingularType>(); + } + template<typename TTzDateType> static std::unique_ptr<TResult> MakeTzDate(bool isOptional) { if (isOptional) { @@ -670,6 +717,7 @@ struct TDeserializerTraits { using TExtOptional = TExtOptionalBlockDeserializer; template<typename TTzDateType, bool Nullable> using TTzDate = TTzDateBlockDeserializer<TTzDateType, Nullable>; + using TSingularType = TSingularTypeBlockDeserializer; constexpr static bool PassType = false; @@ -686,6 +734,10 @@ struct TDeserializerTraits { ythrow yexception() << "Deserializer not implemented for block resources"; } + static std::unique_ptr<TResult> MakeSingular() { + return std::make_unique<TSingularType>(); + } + template<typename TTzDateType> static std::unique_ptr<TResult> MakeTzDate(bool isOptional) { if (isOptional) { diff --git a/yql/essentials/minikql/computation/mkql_block_trimmer.cpp b/yql/essentials/minikql/computation/mkql_block_trimmer.cpp index b53a3890a4b..0b53f914525 100644 --- a/yql/essentials/minikql/computation/mkql_block_trimmer.cpp +++ b/yql/essentials/minikql/computation/mkql_block_trimmer.cpp @@ -98,6 +98,17 @@ public: } }; +class TSingularBlockTrimmer: public TBlockTrimmerBase { +public: + TSingularBlockTrimmer(arrow::MemoryPool* pool) + : TBlockTrimmerBase(pool) { + } + + std::shared_ptr<arrow::ArrayData> Trim(const std::shared_ptr<arrow::ArrayData>& array) override { + return array; + } +}; + template<typename TStringType, bool Nullable> class TStringBlockTrimmer : public TBlockTrimmerBase { using TOffset = typename TStringType::offset_type; @@ -217,6 +228,7 @@ struct TTrimmerTraits { using TResource = TResourceBlockTrimmer<Nullable>; template<typename TTzDate, bool Nullable> using TTzDateReader = TTzDateBlockTrimmer<TTzDate, Nullable>; + using TSingular = TSingularBlockTrimmer; constexpr static bool PassType = false; @@ -237,6 +249,10 @@ struct TTrimmerTraits { } } + static TResult::TPtr MakeSingular(arrow::MemoryPool* pool) { + return std::make_unique<TSingular>(pool); + } + template<typename TTzDate> static TResult::TPtr MakeTzDate(bool isOptional, arrow::MemoryPool* pool) { if (isOptional) { diff --git a/yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp b/yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp index b689e4cf8b1..cbff1c5722d 100644 --- a/yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp +++ b/yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp @@ -674,6 +674,8 @@ protected: auto tzDateType = PgmBuilder.NewDataType(NUdf::EDataSlot::TzDate); auto blockTzDateType = PgmBuilder.NewBlockType(tzDateType, TBlockType::EShape::Many); + auto nullType = PgmBuilder.NewNullType(); + auto blockNullType = PgmBuilder.NewBlockType(nullType, TBlockType::EShape::Many); auto rowType = legacyStruct @@ -683,11 +685,12 @@ protected: {"_yql_block_length", scalarUi64Type}, {"a", scalarOptStrType}, {"b", blockOptTupleOptUi32StrType}, - {"c", blockTzDateType} + {"c", blockTzDateType}, + {"nill", blockNullType}, }) : PgmBuilder.NewMultiType( {blockUi32Type, blockOptStrType, scalarOptStrType, - blockOptTupleOptUi32StrType, blockTzDateType, scalarUi64Type}); + blockOptTupleOptUi32StrType, blockTzDateType, blockNullType, scalarUi64Type}); ui64 blockLen = 1000; UNIT_ASSERT_LE(offset + len, blockLen); @@ -696,6 +699,8 @@ protected: auto builder2 = MakeArrayBuilder(TTypeInfoHelper(), optStrType, *ArrowPool_, CalcBlockLen(CalcMaxBlockItemSize(optStrType)), nullptr); auto builder3 = MakeArrayBuilder(TTypeInfoHelper(), optTupleOptUi32StrType, *ArrowPool_, CalcBlockLen(CalcMaxBlockItemSize(optTupleOptUi32StrType)), nullptr); auto builder4 = MakeArrayBuilder(TTypeInfoHelper(), tzDateType, *ArrowPool_, CalcBlockLen(CalcMaxBlockItemSize(tzDateType)), nullptr); + auto builder5 = MakeArrayBuilder(TTypeInfoHelper(), nullType, *ArrowPool_, CalcBlockLen(CalcMaxBlockItemSize(nullType)), nullptr); + for (ui32 i = 0; i < blockLen; ++i) { TBlockItem b1(i); @@ -712,6 +717,7 @@ protected: TBlockItem tzDate {i}; tzDate.SetTimezoneId(i % 100); builder4->Add(tzDate); + builder5->Add(TBlockItem::Zero()); } std::string_view testScalarString = "foobar"; @@ -725,12 +731,14 @@ protected: datums.emplace_back(arrow::Datum(std::make_shared<arrow::BinaryScalar>(strbuf))); datums.emplace_back(builder3->Build(true)); datums.emplace_back(builder4->Build(true)); + datums.emplace_back(builder5->Build(true)); } else { datums.emplace_back(builder1->Build(true)); datums.emplace_back(builder2->Build(true)); datums.emplace_back(arrow::Datum(std::make_shared<arrow::BinaryScalar>(strbuf))); datums.emplace_back(builder3->Build(true)); datums.emplace_back(builder4->Build(true)); + datums.emplace_back(builder5->Build(true)); datums.emplace_back(arrow::Datum(std::make_shared<arrow::UInt64Scalar>(blockLen))); } @@ -785,6 +793,7 @@ protected: auto reader2 = MakeBlockReader(TTypeInfoHelper(), optStrType); auto reader3 = MakeBlockReader(TTypeInfoHelper(), optTupleOptUi32StrType); auto reader4 = MakeBlockReader(TTypeInfoHelper(), tzDateType); + auto reader5 = MakeBlockReader(TTypeInfoHelper(), nullType); for (ui32 i = offset; i < len; ++i) { TBlockItem b1 = reader1->GetItem(*TArrowBlock::From(unpackedColumns[0]).GetDatum().array(), i - offset); @@ -814,6 +823,8 @@ protected: TBlockItem b4 = reader4->GetItem(*TArrowBlock::From(unpackedColumns[legacyStruct ? 5 : 4]).GetDatum().array(), i - offset); UNIT_ASSERT(b4.Get<ui16>() == i); UNIT_ASSERT(b4.GetTimezoneId() == (i % 100)); + TBlockItem b5 = reader5->GetItem(*TArrowBlock::From(unpackedColumns[legacyStruct ? 6 : 5]).GetDatum().array(), i - offset); + UNIT_ASSERT(b5); } } } |