summaryrefslogtreecommitdiffstats
path: root/yql/essentials/minikql/computation
diff options
context:
space:
mode:
authoratarasov5 <[email protected]>2025-03-03 16:31:45 +0300
committeratarasov5 <[email protected]>2025-03-03 16:49:51 +0300
commit35c4c93230d28f35ca37296c064a1b8807146307 (patch)
tree42beb0c25393375f86a4e2af97a9d2db8427601b /yql/essentials/minikql/computation
parentca1e2aef23c33eb024704bdf3568f131a2763eaa (diff)
YQL-18276: Block singular type implementation
commit_hash:1334e5c0fb5ff26fd40681059409f46cf49ec025
Diffstat (limited to 'yql/essentials/minikql/computation')
-rw-r--r--yql/essentials/minikql/computation/mkql_block_reader.cpp18
-rw-r--r--yql/essentials/minikql/computation/mkql_block_transport.cpp54
-rw-r--r--yql/essentials/minikql/computation/mkql_block_trimmer.cpp16
-rw-r--r--yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp15
4 files changed, 100 insertions, 3 deletions
diff --git a/yql/essentials/minikql/computation/mkql_block_reader.cpp b/yql/essentials/minikql/computation/mkql_block_reader.cpp
index 4e2060e7395..5886e121c40 100644
--- a/yql/essentials/minikql/computation/mkql_block_reader.cpp
+++ b/yql/essentials/minikql/computation/mkql_block_reader.cpp
@@ -162,6 +162,19 @@ private:
i32 TypeLen = 0;
};
+class TSingularTypeItemConverter: public IBlockItemConverter {
+public:
+ NUdf::TUnboxedValuePod MakeValue(TBlockItem item, const THolderFactory& holderFactory) const final {
+ Y_UNUSED(item, holderFactory);
+ return NUdf::TUnboxedValuePod::Zero();
+ }
+
+ TBlockItem MakeItem(const NUdf::TUnboxedValuePod& value) const final {
+ Y_UNUSED(value);
+ return TBlockItem::Zero();
+ }
+};
+
template <bool Nullable>
class TTupleBlockItemConverter : public IBlockItemConverter {
public:
@@ -285,6 +298,7 @@ struct TConverterTraits {
using TExtOptional = TExternalOptionalBlockItemConverter;
template<typename TTzDate, bool Nullable>
using TTzDateConverter = TTzDateBlockItemConverter<TTzDate, Nullable>;
+ using TSingularType = TSingularTypeItemConverter;
constexpr static bool PassType = false;
@@ -325,6 +339,10 @@ struct TConverterTraits {
return std::make_unique<TTzDateConverter<TTzDate, false>>();
}
}
+
+ static std::unique_ptr<TResult> MakeSingular() {
+ return std::make_unique<TSingularType>();
+ }
};
} // namespace
diff --git a/yql/essentials/minikql/computation/mkql_block_transport.cpp b/yql/essentials/minikql/computation/mkql_block_transport.cpp
index a03a5027e86..2a37245f9d2 100644
--- a/yql/essentials/minikql/computation/mkql_block_transport.cpp
+++ b/yql/essentials/minikql/computation/mkql_block_transport.cpp
@@ -429,6 +429,49 @@ private:
const std::unique_ptr<TBlockDeserializerBase> Inner_;
};
+class TSingularTypeBlockSerializer final: public IBlockSerializer {
+private:
+ size_t ArrayMetadataCount() const final {
+ return 0;
+ }
+
+ void StoreMetadata(const arrow::ArrayData& data, const IBlockSerializer::TMetadataSink& metaSink) const final {
+ Y_UNUSED(data, metaSink);
+ }
+
+ void StoreArray(const arrow::ArrayData& data, TChunkedBuffer& dst) const final {
+ Y_UNUSED(data, dst);
+ }
+};
+
+class TSingularTypeBlockDeserializer final: public TBlockDeserializerBase {
+private:
+ void DoLoadMetadata(const TMetadataSource& metaSource) final {
+ Y_UNUSED(metaSource);
+ }
+
+ std::shared_ptr<arrow::ArrayData> DoMakeDefaultValue(const std::shared_ptr<arrow::Buffer>& nulls, i64 nullsCount, ui64 blockLen, ui64 offset) const final {
+ Y_UNUSED(offset);
+ Y_ENSURE(nullsCount == 0);
+ Y_ENSURE(!nulls || nulls->size() == 0);
+ return arrow::NullArray(blockLen).data();
+ }
+
+ std::shared_ptr<arrow::ArrayData> DoLoadArray(TChunkedBuffer& src, const std::shared_ptr<arrow::Buffer>& nulls, i64 nullsCount, ui64 blockLen, ui64 offset) final {
+ Y_UNUSED(offset, src);
+ Y_ENSURE(nullsCount == 0);
+ Y_ENSURE(!nulls || nulls->size() == 0);
+ return arrow::NullArray(blockLen).data();
+ }
+
+ bool IsNullable() const final {
+ return false;
+ }
+
+ void DoResetMetadata() final {
+ }
+};
+
template<bool Nullable, typename TDerived>
class TTupleBlockSerializerBase : public IBlockSerializer {
size_t ArrayMetadataCount() const final {
@@ -632,7 +675,7 @@ struct TSerializerTraits {
using TExtOptional = TExtOptionalBlockSerializer;
template<typename TTzDateType, bool Nullable>
using TTzDate = TTzDateBlockSerializer<TTzDateType, Nullable>;
-
+ using TSingularType = TSingularTypeBlockSerializer;
constexpr static bool PassType = false;
static std::unique_ptr<TResult> MakePg(const NUdf::TPgTypeDescription& desc, const NUdf::IPgBuilder* pgBuilder) {
@@ -648,6 +691,10 @@ struct TSerializerTraits {
ythrow yexception() << "Serializer not implemented for block resources";
}
+ static std::unique_ptr<TResult> MakeSingular() {
+ return std::make_unique<TSingularType>();
+ }
+
template<typename TTzDateType>
static std::unique_ptr<TResult> MakeTzDate(bool isOptional) {
if (isOptional) {
@@ -670,6 +717,7 @@ struct TDeserializerTraits {
using TExtOptional = TExtOptionalBlockDeserializer;
template<typename TTzDateType, bool Nullable>
using TTzDate = TTzDateBlockDeserializer<TTzDateType, Nullable>;
+ using TSingularType = TSingularTypeBlockDeserializer;
constexpr static bool PassType = false;
@@ -686,6 +734,10 @@ struct TDeserializerTraits {
ythrow yexception() << "Deserializer not implemented for block resources";
}
+ static std::unique_ptr<TResult> MakeSingular() {
+ return std::make_unique<TSingularType>();
+ }
+
template<typename TTzDateType>
static std::unique_ptr<TResult> MakeTzDate(bool isOptional) {
if (isOptional) {
diff --git a/yql/essentials/minikql/computation/mkql_block_trimmer.cpp b/yql/essentials/minikql/computation/mkql_block_trimmer.cpp
index b53a3890a4b..0b53f914525 100644
--- a/yql/essentials/minikql/computation/mkql_block_trimmer.cpp
+++ b/yql/essentials/minikql/computation/mkql_block_trimmer.cpp
@@ -98,6 +98,17 @@ public:
}
};
+class TSingularBlockTrimmer: public TBlockTrimmerBase {
+public:
+ TSingularBlockTrimmer(arrow::MemoryPool* pool)
+ : TBlockTrimmerBase(pool) {
+ }
+
+ std::shared_ptr<arrow::ArrayData> Trim(const std::shared_ptr<arrow::ArrayData>& array) override {
+ return array;
+ }
+};
+
template<typename TStringType, bool Nullable>
class TStringBlockTrimmer : public TBlockTrimmerBase {
using TOffset = typename TStringType::offset_type;
@@ -217,6 +228,7 @@ struct TTrimmerTraits {
using TResource = TResourceBlockTrimmer<Nullable>;
template<typename TTzDate, bool Nullable>
using TTzDateReader = TTzDateBlockTrimmer<TTzDate, Nullable>;
+ using TSingular = TSingularBlockTrimmer;
constexpr static bool PassType = false;
@@ -237,6 +249,10 @@ struct TTrimmerTraits {
}
}
+ static TResult::TPtr MakeSingular(arrow::MemoryPool* pool) {
+ return std::make_unique<TSingular>(pool);
+ }
+
template<typename TTzDate>
static TResult::TPtr MakeTzDate(bool isOptional, arrow::MemoryPool* pool) {
if (isOptional) {
diff --git a/yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp b/yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp
index b689e4cf8b1..cbff1c5722d 100644
--- a/yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp
+++ b/yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp
@@ -674,6 +674,8 @@ protected:
auto tzDateType = PgmBuilder.NewDataType(NUdf::EDataSlot::TzDate);
auto blockTzDateType = PgmBuilder.NewBlockType(tzDateType, TBlockType::EShape::Many);
+ auto nullType = PgmBuilder.NewNullType();
+ auto blockNullType = PgmBuilder.NewBlockType(nullType, TBlockType::EShape::Many);
auto rowType =
legacyStruct
@@ -683,11 +685,12 @@ protected:
{"_yql_block_length", scalarUi64Type},
{"a", scalarOptStrType},
{"b", blockOptTupleOptUi32StrType},
- {"c", blockTzDateType}
+ {"c", blockTzDateType},
+ {"nill", blockNullType},
})
: PgmBuilder.NewMultiType(
{blockUi32Type, blockOptStrType, scalarOptStrType,
- blockOptTupleOptUi32StrType, blockTzDateType, scalarUi64Type});
+ blockOptTupleOptUi32StrType, blockTzDateType, blockNullType, scalarUi64Type});
ui64 blockLen = 1000;
UNIT_ASSERT_LE(offset + len, blockLen);
@@ -696,6 +699,8 @@ protected:
auto builder2 = MakeArrayBuilder(TTypeInfoHelper(), optStrType, *ArrowPool_, CalcBlockLen(CalcMaxBlockItemSize(optStrType)), nullptr);
auto builder3 = MakeArrayBuilder(TTypeInfoHelper(), optTupleOptUi32StrType, *ArrowPool_, CalcBlockLen(CalcMaxBlockItemSize(optTupleOptUi32StrType)), nullptr);
auto builder4 = MakeArrayBuilder(TTypeInfoHelper(), tzDateType, *ArrowPool_, CalcBlockLen(CalcMaxBlockItemSize(tzDateType)), nullptr);
+ auto builder5 = MakeArrayBuilder(TTypeInfoHelper(), nullType, *ArrowPool_, CalcBlockLen(CalcMaxBlockItemSize(nullType)), nullptr);
+
for (ui32 i = 0; i < blockLen; ++i) {
TBlockItem b1(i);
@@ -712,6 +717,7 @@ protected:
TBlockItem tzDate {i};
tzDate.SetTimezoneId(i % 100);
builder4->Add(tzDate);
+ builder5->Add(TBlockItem::Zero());
}
std::string_view testScalarString = "foobar";
@@ -725,12 +731,14 @@ protected:
datums.emplace_back(arrow::Datum(std::make_shared<arrow::BinaryScalar>(strbuf)));
datums.emplace_back(builder3->Build(true));
datums.emplace_back(builder4->Build(true));
+ datums.emplace_back(builder5->Build(true));
} else {
datums.emplace_back(builder1->Build(true));
datums.emplace_back(builder2->Build(true));
datums.emplace_back(arrow::Datum(std::make_shared<arrow::BinaryScalar>(strbuf)));
datums.emplace_back(builder3->Build(true));
datums.emplace_back(builder4->Build(true));
+ datums.emplace_back(builder5->Build(true));
datums.emplace_back(arrow::Datum(std::make_shared<arrow::UInt64Scalar>(blockLen)));
}
@@ -785,6 +793,7 @@ protected:
auto reader2 = MakeBlockReader(TTypeInfoHelper(), optStrType);
auto reader3 = MakeBlockReader(TTypeInfoHelper(), optTupleOptUi32StrType);
auto reader4 = MakeBlockReader(TTypeInfoHelper(), tzDateType);
+ auto reader5 = MakeBlockReader(TTypeInfoHelper(), nullType);
for (ui32 i = offset; i < len; ++i) {
TBlockItem b1 = reader1->GetItem(*TArrowBlock::From(unpackedColumns[0]).GetDatum().array(), i - offset);
@@ -814,6 +823,8 @@ protected:
TBlockItem b4 = reader4->GetItem(*TArrowBlock::From(unpackedColumns[legacyStruct ? 5 : 4]).GetDatum().array(), i - offset);
UNIT_ASSERT(b4.Get<ui16>() == i);
UNIT_ASSERT(b4.GetTimezoneId() == (i % 100));
+ TBlockItem b5 = reader5->GetItem(*TArrowBlock::From(unpackedColumns[legacyStruct ? 6 : 5]).GetDatum().array(), i - offset);
+ UNIT_ASSERT(b5);
}
}
}