diff options
author | atarasov5 <[email protected]> | 2025-03-03 16:31:45 +0300 |
---|---|---|
committer | atarasov5 <[email protected]> | 2025-03-03 16:49:51 +0300 |
commit | 35c4c93230d28f35ca37296c064a1b8807146307 (patch) | |
tree | 42beb0c25393375f86a4e2af97a9d2db8427601b /yql/essentials | |
parent | ca1e2aef23c33eb024704bdf3568f131a2763eaa (diff) |
YQL-18276: Block singular type implementation
commit_hash:1334e5c0fb5ff26fd40681059409f46cf49ec025
Diffstat (limited to 'yql/essentials')
26 files changed, 786 insertions, 14 deletions
diff --git a/yql/essentials/minikql/computation/mkql_block_reader.cpp b/yql/essentials/minikql/computation/mkql_block_reader.cpp index 4e2060e7395..5886e121c40 100644 --- a/yql/essentials/minikql/computation/mkql_block_reader.cpp +++ b/yql/essentials/minikql/computation/mkql_block_reader.cpp @@ -162,6 +162,19 @@ private: i32 TypeLen = 0; }; +class TSingularTypeItemConverter: public IBlockItemConverter { +public: + NUdf::TUnboxedValuePod MakeValue(TBlockItem item, const THolderFactory& holderFactory) const final { + Y_UNUSED(item, holderFactory); + return NUdf::TUnboxedValuePod::Zero(); + } + + TBlockItem MakeItem(const NUdf::TUnboxedValuePod& value) const final { + Y_UNUSED(value); + return TBlockItem::Zero(); + } +}; + template <bool Nullable> class TTupleBlockItemConverter : public IBlockItemConverter { public: @@ -285,6 +298,7 @@ struct TConverterTraits { using TExtOptional = TExternalOptionalBlockItemConverter; template<typename TTzDate, bool Nullable> using TTzDateConverter = TTzDateBlockItemConverter<TTzDate, Nullable>; + using TSingularType = TSingularTypeItemConverter; constexpr static bool PassType = false; @@ -325,6 +339,10 @@ struct TConverterTraits { return std::make_unique<TTzDateConverter<TTzDate, false>>(); } } + + static std::unique_ptr<TResult> MakeSingular() { + return std::make_unique<TSingularType>(); + } }; } // namespace diff --git a/yql/essentials/minikql/computation/mkql_block_transport.cpp b/yql/essentials/minikql/computation/mkql_block_transport.cpp index a03a5027e86..2a37245f9d2 100644 --- a/yql/essentials/minikql/computation/mkql_block_transport.cpp +++ b/yql/essentials/minikql/computation/mkql_block_transport.cpp @@ -429,6 +429,49 @@ private: const std::unique_ptr<TBlockDeserializerBase> Inner_; }; +class TSingularTypeBlockSerializer final: public IBlockSerializer { +private: + size_t ArrayMetadataCount() const final { + return 0; + } + + void StoreMetadata(const arrow::ArrayData& data, const IBlockSerializer::TMetadataSink& metaSink) const final { + Y_UNUSED(data, metaSink); + } + + void StoreArray(const arrow::ArrayData& data, TChunkedBuffer& dst) const final { + Y_UNUSED(data, dst); + } +}; + +class TSingularTypeBlockDeserializer final: public TBlockDeserializerBase { +private: + void DoLoadMetadata(const TMetadataSource& metaSource) final { + Y_UNUSED(metaSource); + } + + std::shared_ptr<arrow::ArrayData> DoMakeDefaultValue(const std::shared_ptr<arrow::Buffer>& nulls, i64 nullsCount, ui64 blockLen, ui64 offset) const final { + Y_UNUSED(offset); + Y_ENSURE(nullsCount == 0); + Y_ENSURE(!nulls || nulls->size() == 0); + return arrow::NullArray(blockLen).data(); + } + + std::shared_ptr<arrow::ArrayData> DoLoadArray(TChunkedBuffer& src, const std::shared_ptr<arrow::Buffer>& nulls, i64 nullsCount, ui64 blockLen, ui64 offset) final { + Y_UNUSED(offset, src); + Y_ENSURE(nullsCount == 0); + Y_ENSURE(!nulls || nulls->size() == 0); + return arrow::NullArray(blockLen).data(); + } + + bool IsNullable() const final { + return false; + } + + void DoResetMetadata() final { + } +}; + template<bool Nullable, typename TDerived> class TTupleBlockSerializerBase : public IBlockSerializer { size_t ArrayMetadataCount() const final { @@ -632,7 +675,7 @@ struct TSerializerTraits { using TExtOptional = TExtOptionalBlockSerializer; template<typename TTzDateType, bool Nullable> using TTzDate = TTzDateBlockSerializer<TTzDateType, Nullable>; - + using TSingularType = TSingularTypeBlockSerializer; constexpr static bool PassType = false; static std::unique_ptr<TResult> MakePg(const NUdf::TPgTypeDescription& desc, const NUdf::IPgBuilder* pgBuilder) { @@ -648,6 +691,10 @@ struct TSerializerTraits { ythrow yexception() << "Serializer not implemented for block resources"; } + static std::unique_ptr<TResult> MakeSingular() { + return std::make_unique<TSingularType>(); + } + template<typename TTzDateType> static std::unique_ptr<TResult> MakeTzDate(bool isOptional) { if (isOptional) { @@ -670,6 +717,7 @@ struct TDeserializerTraits { using TExtOptional = TExtOptionalBlockDeserializer; template<typename TTzDateType, bool Nullable> using TTzDate = TTzDateBlockDeserializer<TTzDateType, Nullable>; + using TSingularType = TSingularTypeBlockDeserializer; constexpr static bool PassType = false; @@ -686,6 +734,10 @@ struct TDeserializerTraits { ythrow yexception() << "Deserializer not implemented for block resources"; } + static std::unique_ptr<TResult> MakeSingular() { + return std::make_unique<TSingularType>(); + } + template<typename TTzDateType> static std::unique_ptr<TResult> MakeTzDate(bool isOptional) { if (isOptional) { diff --git a/yql/essentials/minikql/computation/mkql_block_trimmer.cpp b/yql/essentials/minikql/computation/mkql_block_trimmer.cpp index b53a3890a4b..0b53f914525 100644 --- a/yql/essentials/minikql/computation/mkql_block_trimmer.cpp +++ b/yql/essentials/minikql/computation/mkql_block_trimmer.cpp @@ -98,6 +98,17 @@ public: } }; +class TSingularBlockTrimmer: public TBlockTrimmerBase { +public: + TSingularBlockTrimmer(arrow::MemoryPool* pool) + : TBlockTrimmerBase(pool) { + } + + std::shared_ptr<arrow::ArrayData> Trim(const std::shared_ptr<arrow::ArrayData>& array) override { + return array; + } +}; + template<typename TStringType, bool Nullable> class TStringBlockTrimmer : public TBlockTrimmerBase { using TOffset = typename TStringType::offset_type; @@ -217,6 +228,7 @@ struct TTrimmerTraits { using TResource = TResourceBlockTrimmer<Nullable>; template<typename TTzDate, bool Nullable> using TTzDateReader = TTzDateBlockTrimmer<TTzDate, Nullable>; + using TSingular = TSingularBlockTrimmer; constexpr static bool PassType = false; @@ -237,6 +249,10 @@ struct TTrimmerTraits { } } + static TResult::TPtr MakeSingular(arrow::MemoryPool* pool) { + return std::make_unique<TSingular>(pool); + } + template<typename TTzDate> static TResult::TPtr MakeTzDate(bool isOptional, arrow::MemoryPool* pool) { if (isOptional) { diff --git a/yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp b/yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp index b689e4cf8b1..cbff1c5722d 100644 --- a/yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp +++ b/yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp @@ -674,6 +674,8 @@ protected: auto tzDateType = PgmBuilder.NewDataType(NUdf::EDataSlot::TzDate); auto blockTzDateType = PgmBuilder.NewBlockType(tzDateType, TBlockType::EShape::Many); + auto nullType = PgmBuilder.NewNullType(); + auto blockNullType = PgmBuilder.NewBlockType(nullType, TBlockType::EShape::Many); auto rowType = legacyStruct @@ -683,11 +685,12 @@ protected: {"_yql_block_length", scalarUi64Type}, {"a", scalarOptStrType}, {"b", blockOptTupleOptUi32StrType}, - {"c", blockTzDateType} + {"c", blockTzDateType}, + {"nill", blockNullType}, }) : PgmBuilder.NewMultiType( {blockUi32Type, blockOptStrType, scalarOptStrType, - blockOptTupleOptUi32StrType, blockTzDateType, scalarUi64Type}); + blockOptTupleOptUi32StrType, blockTzDateType, blockNullType, scalarUi64Type}); ui64 blockLen = 1000; UNIT_ASSERT_LE(offset + len, blockLen); @@ -696,6 +699,8 @@ protected: auto builder2 = MakeArrayBuilder(TTypeInfoHelper(), optStrType, *ArrowPool_, CalcBlockLen(CalcMaxBlockItemSize(optStrType)), nullptr); auto builder3 = MakeArrayBuilder(TTypeInfoHelper(), optTupleOptUi32StrType, *ArrowPool_, CalcBlockLen(CalcMaxBlockItemSize(optTupleOptUi32StrType)), nullptr); auto builder4 = MakeArrayBuilder(TTypeInfoHelper(), tzDateType, *ArrowPool_, CalcBlockLen(CalcMaxBlockItemSize(tzDateType)), nullptr); + auto builder5 = MakeArrayBuilder(TTypeInfoHelper(), nullType, *ArrowPool_, CalcBlockLen(CalcMaxBlockItemSize(nullType)), nullptr); + for (ui32 i = 0; i < blockLen; ++i) { TBlockItem b1(i); @@ -712,6 +717,7 @@ protected: TBlockItem tzDate {i}; tzDate.SetTimezoneId(i % 100); builder4->Add(tzDate); + builder5->Add(TBlockItem::Zero()); } std::string_view testScalarString = "foobar"; @@ -725,12 +731,14 @@ protected: datums.emplace_back(arrow::Datum(std::make_shared<arrow::BinaryScalar>(strbuf))); datums.emplace_back(builder3->Build(true)); datums.emplace_back(builder4->Build(true)); + datums.emplace_back(builder5->Build(true)); } else { datums.emplace_back(builder1->Build(true)); datums.emplace_back(builder2->Build(true)); datums.emplace_back(arrow::Datum(std::make_shared<arrow::BinaryScalar>(strbuf))); datums.emplace_back(builder3->Build(true)); datums.emplace_back(builder4->Build(true)); + datums.emplace_back(builder5->Build(true)); datums.emplace_back(arrow::Datum(std::make_shared<arrow::UInt64Scalar>(blockLen))); } @@ -785,6 +793,7 @@ protected: auto reader2 = MakeBlockReader(TTypeInfoHelper(), optStrType); auto reader3 = MakeBlockReader(TTypeInfoHelper(), optTupleOptUi32StrType); auto reader4 = MakeBlockReader(TTypeInfoHelper(), tzDateType); + auto reader5 = MakeBlockReader(TTypeInfoHelper(), nullType); for (ui32 i = offset; i < len; ++i) { TBlockItem b1 = reader1->GetItem(*TArrowBlock::From(unpackedColumns[0]).GetDatum().array(), i - offset); @@ -814,6 +823,8 @@ protected: TBlockItem b4 = reader4->GetItem(*TArrowBlock::From(unpackedColumns[legacyStruct ? 5 : 4]).GetDatum().array(), i - offset); UNIT_ASSERT(b4.Get<ui16>() == i); UNIT_ASSERT(b4.GetTimezoneId() == (i % 100)); + TBlockItem b5 = reader5->GetItem(*TArrowBlock::From(unpackedColumns[legacyStruct ? 6 : 5]).GetDatum().array(), i - offset); + UNIT_ASSERT(b5); } } } diff --git a/yql/essentials/minikql/mkql_type_builder.cpp b/yql/essentials/minikql/mkql_type_builder.cpp index d1df31a97d8..dfbdd3f89c0 100644 --- a/yql/essentials/minikql/mkql_type_builder.cpp +++ b/yql/essentials/minikql/mkql_type_builder.cpp @@ -1522,6 +1522,17 @@ bool ConvertArrowTypeImpl(NUdf::EDataSlot slot, std::shared_ptr<arrow::DataType> } } +inline bool IsSingularType(const TType* type) { + return type->IsNull() || + type->IsVoid() || + type->IsEmptyDict() || + type->IsEmptyList(); +} + +inline bool NeedWrapWithExternalOptional(const TType* type) { + return type->IsPg() || IsSingularType(type); +} + bool ConvertArrowTypeImpl(TType* itemType, std::shared_ptr<arrow::DataType>& type, const TArrowConvertFailedCallback& onFail, bool output) { bool isOptional; auto unpacked = UnpackOptional(itemType, isOptional); @@ -1534,8 +1545,7 @@ bool ConvertArrowTypeImpl(TType* itemType, std::shared_ptr<arrow::DataType>& typ return false; } - if (unpacked->IsOptional() || isOptional && unpacked->IsPg()) { - // at least 2 levels of optionals + if (unpacked->IsOptional() || isOptional && NeedWrapWithExternalOptional(unpacked)) { ui32 nestLevel = 0; auto currentType = itemType; auto previousType = itemType; @@ -1545,12 +1555,11 @@ bool ConvertArrowTypeImpl(TType* itemType, std::shared_ptr<arrow::DataType>& typ currentType = AS_TYPE(TOptionalType, currentType)->GetItemType(); } while (currentType->IsOptional()); - if (currentType->IsPg()) { + if (NeedWrapWithExternalOptional(currentType)) { previousType = currentType; ++nestLevel; } - // previousType is always Optional std::shared_ptr<arrow::DataType> innerArrowType; if (!ConvertArrowTypeImpl(previousType, innerArrowType, onFail, output)) { return false; @@ -1618,6 +1627,11 @@ bool ConvertArrowTypeImpl(TType* itemType, std::shared_ptr<arrow::DataType>& typ return true; } + if (IsSingularType(unpacked)) { + type = arrow::null(); + return true; + } + if (!unpacked->IsData()) { if (onFail) { onFail(unpacked); @@ -2479,6 +2493,10 @@ size_t CalcMaxBlockItemSize(const TType* type) { return sizeof(NYql::NUdf::TUnboxedValue); } + if (IsSingularType(type)) { + return 0; + } + if (type->IsData()) { auto slot = *AS_TYPE(TDataType, type)->GetDataSlot(); switch (slot) { @@ -2552,6 +2570,7 @@ struct TComparatorTraits { using TExtOptional = NUdf::TExternalOptionalBlockItemComparator; template <typename T, bool Nullable> using TTzDateComparator = NUdf::TTzDateBlockItemComparator<T, Nullable>; + using TSingularType = NUdf::TSingularTypeBlockItemComparator; constexpr static bool PassType = false; @@ -2565,6 +2584,10 @@ struct TComparatorTraits { ythrow yexception() << "Comparator not implemented for block resources: "; } + static std::unique_ptr<TResult> MakeSingular() { + return std::make_unique<TSingularType>(); + } + template<typename TTzDate> static std::unique_ptr<TResult> MakeTzDate(bool isOptional) { if (isOptional) { @@ -2586,6 +2609,7 @@ struct THasherTraits { using TExtOptional = NUdf::TExternalOptionalBlockItemHasher; template <typename T, bool Nullable> using TTzDateHasher = NYql::NUdf::TTzDateBlockItemHasher<T, Nullable>; + using TSingularType = NUdf::TSingularTypeBlockItemHaser; constexpr static bool PassType = false; @@ -2607,6 +2631,10 @@ struct THasherTraits { return std::make_unique<TTzDateHasher<TTzDate, false>>(); } } + + static std::unique_ptr<TResult> MakeSingular() { + return std::make_unique<TSingularType>(); + } }; NUdf::IBlockItemComparator::TPtr TBlockTypeHelper::MakeComparator(NUdf::TType* type) const { diff --git a/yql/essentials/public/udf/arrow/block_builder.h b/yql/essentials/public/udf/arrow/block_builder.h index 92f4f7e123d..baac1842b90 100644 --- a/yql/essentials/public/udf/arrow/block_builder.h +++ b/yql/essentials/public/udf/arrow/block_builder.h @@ -10,6 +10,7 @@ #include <yql/essentials/public/udf/udf_value_builder.h> #include <yql/essentials/public/udf/udf_type_inspection.h> +#include <arrow/array/array_base.h> #include <arrow/datum.h> #include <arrow/c/bridge.h> @@ -1358,6 +1359,53 @@ private: std::unique_ptr<TTypedBufferBuilder<ui8>> NullBuilder; }; +class TSingularBlockBuilder final: public TArrayBuilderBase { +public: + TSingularBlockBuilder(const TType* type, const ITypeInfoHelper& typeInfoHelper, arrow::MemoryPool& pool, + size_t maxLen, const TParams& params = {}) + : TArrayBuilderBase(typeInfoHelper, type, pool, maxLen, params) { + Reserve(); + } + + void DoAdd(NUdf::TUnboxedValuePod value) final { + Y_UNUSED(value); + } + + void DoAdd(TBlockItem value) final { + Y_UNUSED(value); + } + + void DoAdd(TInputBuffer& input) final { + Y_UNUSED(input.PopChar()); + } + + void DoAddDefault() final {} + + void DoAddMany(const arrow::ArrayData& array, const ui8* sparseBitmap, size_t popCount) final { + Y_UNUSED(array, sparseBitmap, popCount); + } + + void DoAddMany(const arrow::ArrayData& array, ui64 beginIndex, size_t count) final { + Y_UNUSED(array, beginIndex, count); + } + + void DoAddMany(const arrow::ArrayData& array, const ui64* indexes, size_t count) final { + Y_UNUSED(array, indexes, count); + } + + TBlockArrayTree::Ptr DoBuildTree(bool finish) final { + TBlockArrayTree::Ptr result = std::make_shared<TBlockArrayTree>(); + Y_UNUSED(finish); + result->Payload.push_back(arrow::NullArray(GetCurrLen()).data()); + return result; + } + +private: + size_t DoReserve() final { + return 0; + } +}; + using TArrayBuilderParams = TArrayBuilderBase::TParams; struct TBuilderTraits { @@ -1373,6 +1421,7 @@ struct TBuilderTraits { using TResource = TResourceArrayBuilder<Nullable>; template<typename TTzDate, bool Nullable> using TTzDateReader = TTzDateArrayBuilder<TTzDate, Nullable>; + using TSingular = TSingularBlockBuilder; constexpr static bool PassType = true; @@ -1412,6 +1461,10 @@ struct TBuilderTraits { return std::make_unique<TTzDateReader<TTzDate, false>>(type, typeInfoHelper, pool, maxLen, params); } } + + static std::unique_ptr<TResult> MakeSingular(const TType* type, const ITypeInfoHelper& typeInfoHelper, arrow::MemoryPool& pool, size_t maxLen, const TArrayBuilderParams& params) { + return std::make_unique<TSingular>(type, typeInfoHelper, pool, maxLen, params); + } }; inline std::unique_ptr<IArrayBuilder> MakeArrayBuilder( diff --git a/yql/essentials/public/udf/arrow/block_item.h b/yql/essentials/public/udf/arrow/block_item.h index 2f9784cd3c4..79686b3094f 100644 --- a/yql/essentials/public/udf/arrow/block_item.h +++ b/yql/essentials/public/udf/arrow/block_item.h @@ -166,6 +166,18 @@ public: return &Raw; } + static inline TBlockItem Void() { + TBlockItem v; + v.Raw.Simple.Meta = static_cast<ui8>(EMarkers::Embedded); + return v; + } + + static inline TBlockItem Zero() { + TBlockItem v; + v.Raw.Simple.Meta = static_cast<ui8>(EMarkers::Embedded); + return v; + } + inline const void* GetRawPtr() const { return &Raw; diff --git a/yql/essentials/public/udf/arrow/block_item_comparator.h b/yql/essentials/public/udf/arrow/block_item_comparator.h index e185b63f664..ad803799c63 100644 --- a/yql/essentials/public/udf/arrow/block_item_comparator.h +++ b/yql/essentials/public/udf/arrow/block_item_comparator.h @@ -169,6 +169,24 @@ public: } }; +class TSingularTypeBlockItemComparator: public TBlockItemComparatorBase<TSingularTypeBlockItemComparator, /*Nullable=*/false> { +public: + i64 DoCompare(TBlockItem lhs, TBlockItem rhs) const { + Y_UNUSED(lhs, rhs); + return 0; + } + + bool DoEquals(TBlockItem lhs, TBlockItem rhs) const { + Y_UNUSED(lhs, rhs); + return true; + } + + bool DoLess(TBlockItem lhs, TBlockItem rhs) const { + Y_UNUSED(lhs, rhs); + return false; + } +}; + template<typename TTzType, bool Nullable> class TTzDateBlockItemComparator : public TBlockItemComparatorBase<TTzDateBlockItemComparator<TTzType, Nullable>, Nullable> { using TLayout = typename TDataType<TTzType>::TLayout; diff --git a/yql/essentials/public/udf/arrow/block_item_hasher.h b/yql/essentials/public/udf/arrow/block_item_hasher.h index 3f77e27b6f1..9108d7b06e8 100644 --- a/yql/essentials/public/udf/arrow/block_item_hasher.h +++ b/yql/essentials/public/udf/arrow/block_item_hasher.h @@ -76,6 +76,14 @@ public: } }; +class TSingularTypeBlockItemHaser : public TBlockItemHasherBase<TSingularTypeBlockItemHaser, /*Nullable=*/false> { +public: + ui64 DoHash(TBlockItem value) const { + Y_UNUSED(value); + return 0; + } +}; + template <bool Nullable> class TTupleBlockItemHasher : public TBlockItemHasherBase<TTupleBlockItemHasher<Nullable>, Nullable> { public: diff --git a/yql/essentials/public/udf/arrow/block_reader.h b/yql/essentials/public/udf/arrow/block_reader.h index 05dd3ce4409..6652df2ac67 100644 --- a/yql/essentials/public/udf/arrow/block_reader.h +++ b/yql/essentials/public/udf/arrow/block_reader.h @@ -424,6 +424,48 @@ private: TFixedSizeBlockReader<ui16, /* Nullable */false> TimezoneReader_; }; +// NOTE: For any singular type we use arrow::null() data type. +// This data type DOES NOT support bit mask so for optional type +// we have to use |TExternalOptional| wrapper. +class TSingularTypeBlockReader: public IBlockReader { +public: + TSingularTypeBlockReader() = default; + + ~TSingularTypeBlockReader() override = default; + + TBlockItem GetItem(const arrow::ArrayData& data, size_t index) override { + Y_UNUSED(data, index); + return TBlockItem::Zero(); + } + + TBlockItem GetScalarItem(const arrow::Scalar& scalar) override { + Y_UNUSED(scalar); + return TBlockItem::Zero(); + } + + ui64 GetDataWeight(const arrow::ArrayData& data) const override { + Y_UNUSED(data); + return 0; + } + + ui64 GetDataWeight(TBlockItem item) const override { + Y_UNUSED(item); + return 0; + } + + ui64 GetDefaultValueWeight() const override { + return 0; + } + + void SaveItem(const arrow::ArrayData& data, size_t index, TOutputBuffer& out) const override { + Y_UNUSED(index, data, out); + } + + void SaveScalarItem(const arrow::Scalar& scalar, TOutputBuffer& out) const override { + Y_UNUSED(scalar, out); + } +}; + class TExternalOptionalBlockReader final : public IBlockReader { public: TExternalOptionalBlockReader(std::unique_ptr<IBlockReader>&& inner) @@ -498,6 +540,7 @@ struct TReaderTraits { using TResource = TResourceBlockReader<Nullable>; template<typename TTzDate, bool Nullable> using TTzDateReader = TTzDateBlockReader<TTzDate, Nullable>; + using TSingularType = TSingularTypeBlockReader; constexpr static bool PassType = false; @@ -518,6 +561,10 @@ struct TReaderTraits { } } + static std::unique_ptr<TResult> MakeSingular() { + return std::make_unique<TSingularType>(); + } + template<typename TTzDate> static std::unique_ptr<TResult> MakeTzDate(bool isOptional) { if (isOptional) { @@ -595,6 +642,10 @@ inline void UpdateBlockItemSerializeProps(const ITypeInfoHelper& typeInfoHelper, return; } + if (IsSingularType(typeInfoHelper, type)) { + return; + } + Y_ENSURE(false, "Unsupported type"); } diff --git a/yql/essentials/public/udf/arrow/dispatch_traits.h b/yql/essentials/public/udf/arrow/dispatch_traits.h index 88c303cc874..87c25b93f56 100644 --- a/yql/essentials/public/udf/arrow/dispatch_traits.h +++ b/yql/essentials/public/udf/arrow/dispatch_traits.h @@ -1,5 +1,6 @@ #pragma once +#include <yql/essentials/public/udf/arrow/util.h> #include <yql/essentials/public/udf/udf_type_inspection.h> #include <yql/essentials/public/udf/udf_value_builder.h> @@ -85,8 +86,7 @@ std::unique_ptr<typename TTraits::TResult> DispatchByArrowTraits(const ITypeInfo TOptionalTypeInspector unpackedOpt(typeInfoHelper, unpacked); TPgTypeInspector unpackedPg(typeInfoHelper, unpacked); - if (unpackedOpt || typeOpt && unpackedPg) { - // at least 2 levels of optionals + if (unpackedOpt || (typeOpt && NeedWrapWithExternalOptional(typeInfoHelper, unpacked))) { ui32 nestLevel = 0; auto currentType = type; auto previousType = type; @@ -103,7 +103,7 @@ std::unique_ptr<typename TTraits::TResult> DispatchByArrowTraits(const ITypeInfo } } - if (TPgTypeInspector(typeInfoHelper, currentType)) { + if (NeedWrapWithExternalOptional(typeInfoHelper, currentType)) { previousType = currentType; ++nestLevel; } @@ -118,8 +118,7 @@ std::unique_ptr<typename TTraits::TResult> DispatchByArrowTraits(const ITypeInfo } return reader; - } - else { + } else { type = unpacked; } @@ -230,6 +229,15 @@ std::unique_ptr<typename TTraits::TResult> DispatchByArrowTraits(const ITypeInfo } } + if (IsSingularType(typeInfoHelper, type)) { + Y_ENSURE(!isOptional, "Optional data types are not supported directly for singular type. Please use TExternalOptional wrapper."); + if constexpr (TTraits::PassType) { + return TTraits::MakeSingular(type, std::forward<TArgs>(args)...); + } else { + return TTraits::MakeSingular(std::forward<TArgs>(args)...); + } + } + Y_ENSURE(false, "Unsupported type"); } diff --git a/yql/essentials/public/udf/arrow/ut/array_builder_ut.cpp b/yql/essentials/public/udf/arrow/ut/array_builder_ut.cpp index bbb4c134c86..d0851c5e869 100644 --- a/yql/essentials/public/udf/arrow/ut/array_builder_ut.cpp +++ b/yql/essentials/public/udf/arrow/ut/array_builder_ut.cpp @@ -220,6 +220,46 @@ Y_UNIT_TEST_SUITE(TArrayBuilderTest) { UNIT_ASSERT_VALUES_EQUAL(item2AfterRead.GetStringRefFromValue(), "234"); } + Y_UNIT_TEST(TestSingularTypeValueBuilderReader) { + TArrayBuilderTestData data; + const auto nullType = data.PgmBuilder.NewNullType(); + + std::shared_ptr<arrow::ArrayData> arrayData = arrow::NullArray{42}.data(); + IArrayBuilder::TArrayDataItem arrayDataItem = {.Data = arrayData.get(), .StartOffset = 0}; + { + const auto arrayBuilder = MakeArrayBuilder(NMiniKQL::TTypeInfoHelper(), nullType, *data.ArrowPool, MAX_BLOCK_SIZE, /*pgBuilder=*/nullptr); + // Check builder. + arrayBuilder->Add(TUnboxedValuePod::Zero()); + arrayBuilder->Add(TBlockItem::Zero()); + arrayBuilder->Add(TBlockItem::Zero(), 4); + TInputBuffer inputBuffer("Just arbitrary string"); + arrayBuilder->Add(inputBuffer); + arrayBuilder->AddMany(*arrayData, /*popCount=*/3u, /*sparseBitmat=*/nullptr, /*bitmapSize=*/arrayData->length); + arrayBuilder->AddMany(&arrayDataItem, /*arrayCount=*/1, /*beginIndex=*/1, /*count=*/3u); + std::vector<ui64> indexes = {1, 5, 7, 10}; + arrayBuilder->AddMany(&arrayDataItem, /*arrayCount=*/1, /*beginIndex=*/indexes.data(), /*count=*/4u); + UNIT_ASSERT_VALUES_EQUAL(arrayBuilder->Build(true).array()->length, 1 + 1 + 4 + 1 + 3 + 3 + 4); + } + + { + // Check reader. + const auto blockReader = MakeBlockReader(NMiniKQL::TTypeInfoHelper(), nullType); + + UNIT_ASSERT(blockReader->GetItem(*arrayData, 0)); + UNIT_ASSERT(blockReader->GetScalarItem(arrow::Scalar(arrow::null()))); + UNIT_ASSERT_EQUAL(blockReader->GetDataWeight(*arrayData), 0); + UNIT_ASSERT_EQUAL(blockReader->GetDataWeight(TBlockItem::Zero()), 0); + UNIT_ASSERT_EQUAL(blockReader->GetDefaultValueWeight(), 0); + UNIT_ASSERT_EQUAL(blockReader->GetDefaultValueWeight(), 0); + + TOutputBuffer outputBuffer; + blockReader->SaveItem(*arrayData, 1, outputBuffer); + UNIT_ASSERT(outputBuffer.Finish().empty()); + blockReader->SaveScalarItem(arrow::Scalar(arrow::null()), outputBuffer); + UNIT_ASSERT(outputBuffer.Finish().empty()); + } + } + Y_UNIT_TEST(TestBuilderAllocatedSize) { TArrayBuilderTestData data; const auto optStringType = data.PgmBuilder.NewDataType(NUdf::EDataSlot::String, true); diff --git a/yql/essentials/public/udf/arrow/util.h b/yql/essentials/public/udf/arrow/util.h index f7bdb715f98..e899af26af7 100644 --- a/yql/essentials/public/udf/arrow/util.h +++ b/yql/essentials/public/udf/arrow/util.h @@ -12,6 +12,9 @@ #include <functional> +#include <yql/essentials/public/udf/udf_type_inspection.h> +#include <yql/essentials/public/udf/udf_types.h> + namespace NYql { namespace NUdf { @@ -236,5 +239,17 @@ inline void ZeroMemoryContext(void* ptr) { SetMemoryContext(ptr, nullptr); } +inline bool IsSingularType(const ITypeInfoHelper& typeInfoHelper, const TType* type) { + auto kind = typeInfoHelper.GetTypeKind(type); + return kind == ETypeKind::Null || + kind == ETypeKind::Void || + kind == ETypeKind::EmptyDict || + kind == ETypeKind::EmptyList; +} + +inline bool NeedWrapWithExternalOptional(const ITypeInfoHelper& typeInfoHelper, const TType* type) { + return TPgTypeInspector(typeInfoHelper, type) || IsSingularType(typeInfoHelper, type); +} + } // namespace NUdf } // namespace NYql diff --git a/yql/essentials/tests/sql/minirun/part0/canondata/result.json b/yql/essentials/tests/sql/minirun/part0/canondata/result.json index 3c1aa86fec2..ffeebb57ccb 100644 --- a/yql/essentials/tests/sql/minirun/part0/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part0/canondata/result.json @@ -275,6 +275,27 @@ "uri": "https://{canondata_backend}/1936842/8073eb626dd657fcbe20d34185c363a1a18c3e7c/resource.tar.gz#test.test_blocks-agg_all_mixed_distinct-default.txt-Results_/results.txt" } ], + "test.test[blocks-agg_singular_type_key_optional-default.txt-Debug]": [ + { + "checksum": "71ee94512d6ef28833fb6df3bace7b53", + "size": 2727, + "uri": "https://{canondata_backend}/1925842/7e03c084910acb6d9d50a1f7dc65eda3cdac3b45/resource.tar.gz#test.test_blocks-agg_singular_type_key_optional-default.txt-Debug_/opt.yql" + } + ], + "test.test[blocks-agg_singular_type_key_optional-default.txt-Peephole]": [ + { + "checksum": "db2e4bd6530b31b6efceb77a4a184b4e", + "size": 6606, + "uri": "https://{canondata_backend}/1925842/7e03c084910acb6d9d50a1f7dc65eda3cdac3b45/resource.tar.gz#test.test_blocks-agg_singular_type_key_optional-default.txt-Peephole_/opt.yql" + } + ], + "test.test[blocks-agg_singular_type_key_optional-default.txt-Results]": [ + { + "checksum": "4b79ad0d41612ad09d735f34513ee6ff", + "size": 7301, + "uri": "https://{canondata_backend}/1925842/7e03c084910acb6d9d50a1f7dc65eda3cdac3b45/resource.tar.gz#test.test_blocks-agg_singular_type_key_optional-default.txt-Results_/results.txt" + } + ], "test.test[blocks-and-default.txt-Debug]": [ { "checksum": "47525fa40526e04498f0c41e6bc48f59", diff --git a/yql/essentials/tests/sql/minirun/part2/canondata/result.json b/yql/essentials/tests/sql/minirun/part2/canondata/result.json index 48c0652311c..73ff7dcd875 100644 --- a/yql/essentials/tests/sql/minirun/part2/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part2/canondata/result.json @@ -258,6 +258,27 @@ "uri": "https://{canondata_backend}/1937150/3d01c6ab2777fc3b99338655d39a5bcbb1ac89c3/resource.tar.gz#test.test_blocks-agg_by_key_only_distinct-default.txt-Results_/results.txt" } ], + "test.test[blocks-agg_singular_type_value_optional-default.txt-Debug]": [ + { + "checksum": "06774e6dab64198fc6cc5d173b0bba26", + "size": 2781, + "uri": "https://{canondata_backend}/1781765/b8d92d6ccf46e436b2e5b3b70ab511bab6d820b0/resource.tar.gz#test.test_blocks-agg_singular_type_value_optional-default.txt-Debug_/opt.yql" + } + ], + "test.test[blocks-agg_singular_type_value_optional-default.txt-Peephole]": [ + { + "checksum": "d22ed37889eea3b41eadb6164bf6d017", + "size": 3113, + "uri": "https://{canondata_backend}/1781765/b8d92d6ccf46e436b2e5b3b70ab511bab6d820b0/resource.tar.gz#test.test_blocks-agg_singular_type_value_optional-default.txt-Peephole_/opt.yql" + } + ], + "test.test[blocks-agg_singular_type_value_optional-default.txt-Results]": [ + { + "checksum": "98fbeb83e5295954045efa6fd159626f", + "size": 5301, + "uri": "https://{canondata_backend}/1781765/b8d92d6ccf46e436b2e5b3b70ab511bab6d820b0/resource.tar.gz#test.test_blocks-agg_singular_type_value_optional-default.txt-Results_/results.txt" + } + ], "test.test[blocks-exists-default.txt-Debug]": [ { "checksum": "a871029504a6d3f1c07342493b86d28d", diff --git a/yql/essentials/tests/sql/minirun/part7/canondata/result.json b/yql/essentials/tests/sql/minirun/part7/canondata/result.json index 399f6d226b1..d018f1582d5 100644 --- a/yql/essentials/tests/sql/minirun/part7/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part7/canondata/result.json @@ -220,7 +220,7 @@ { "checksum": "02e80809d3cbf91101d09d4ac1e87aa0", "size": 623, - "uri": "https://{canondata_backend}/1917492/b01930df0710eb10e4ce2d35cddca6be33ac8a9f/resource.tar.gz#test.test_blocks-as_tuple-default.txt-Peephole_/opt.yql" + "uri": "https://{canondata_backend}/1130705/f9eb075ce8fc54a57832e4ee918669601325c133/resource.tar.gz#test.test_blocks-as_tuple-default.txt-Peephole_/opt.yql" } ], "test.test[blocks-as_tuple-default.txt-Results]": [ diff --git a/yql/essentials/tests/sql/minirun/part8/canondata/result.json b/yql/essentials/tests/sql/minirun/part8/canondata/result.json index 6d3cbc281dd..94d8c6547e4 100644 --- a/yql/essentials/tests/sql/minirun/part8/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part8/canondata/result.json @@ -352,6 +352,48 @@ "uri": "https://{canondata_backend}/1031349/4d0c6ce1905689c65e264d15d770d36efcd9426f/resource.tar.gz#test.test_binding-named_expr_input-default.txt-Results_/results.txt" } ], + "test.test[blocks-agg_singular_type_key-default.txt-Debug]": [ + { + "checksum": "b97d36400fafea8f8f6670954d7ac139", + "size": 2685, + "uri": "https://{canondata_backend}/1936273/07450a3416f3c728f9a8a8fdde6e5f5a0ca2d9a6/resource.tar.gz#test.test_blocks-agg_singular_type_key-default.txt-Debug_/opt.yql" + } + ], + "test.test[blocks-agg_singular_type_key-default.txt-Peephole]": [ + { + "checksum": "f77f6136a2995c5b0e0ae3ed00274d36", + "size": 6564, + "uri": "https://{canondata_backend}/1936273/07450a3416f3c728f9a8a8fdde6e5f5a0ca2d9a6/resource.tar.gz#test.test_blocks-agg_singular_type_key-default.txt-Peephole_/opt.yql" + } + ], + "test.test[blocks-agg_singular_type_key-default.txt-Results]": [ + { + "checksum": "e2233558149bd3009f7f16412bf4838a", + "size": 6117, + "uri": "https://{canondata_backend}/1936273/07450a3416f3c728f9a8a8fdde6e5f5a0ca2d9a6/resource.tar.gz#test.test_blocks-agg_singular_type_key-default.txt-Results_/results.txt" + } + ], + "test.test[blocks-agg_singular_type_value-default.txt-Debug]": [ + { + "checksum": "4733abf71c9c62e30af77c6490d59334", + "size": 2358, + "uri": "https://{canondata_backend}/1130705/a25045513209436069d9f9a29831b732c13e1675/resource.tar.gz#test.test_blocks-agg_singular_type_value-default.txt-Debug_/opt.yql" + } + ], + "test.test[blocks-agg_singular_type_value-default.txt-Peephole]": [ + { + "checksum": "b34c4e8ca42e6232ef03f2ffeb05fd83", + "size": 3013, + "uri": "https://{canondata_backend}/1130705/a25045513209436069d9f9a29831b732c13e1675/resource.tar.gz#test.test_blocks-agg_singular_type_value-default.txt-Peephole_/opt.yql" + } + ], + "test.test[blocks-agg_singular_type_value-default.txt-Results]": [ + { + "checksum": "4718805e72274809e4ac6c07ee8dfd7d", + "size": 3489, + "uri": "https://{canondata_backend}/1130705/a25045513209436069d9f9a29831b732c13e1675/resource.tar.gz#test.test_blocks-agg_singular_type_value-default.txt-Results_/results.txt" + } + ], "test.test[blocks-and_scalar-default.txt-Debug]": [ { "checksum": "e5ccc5c53756e09ded8e82b6d662e5e9", diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json index f345b0e389d..0c9c63e1618 100644 --- a/yql/essentials/tests/sql/sql2yql/canondata/result.json +++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json @@ -1378,6 +1378,34 @@ "uri": "https://{canondata_backend}/1917492/7dd4bc86433f6173a26b62397e1ef41fa9471945/resource.tar.gz#test_sql2yql.test_blocks-agg_by_key_only_distinct_/sql.yql" } ], + "test_sql2yql.test[blocks-agg_singular_type_key]": [ + { + "checksum": "7cae7f556775597a0b451a875e77a1df", + "size": 7636, + "uri": "https://{canondata_backend}/1784117/5ff6ff6c0808bf39612567f492af1bc2db36da20/resource.tar.gz#test_sql2yql.test_blocks-agg_singular_type_key_/sql.yql" + } + ], + "test_sql2yql.test[blocks-agg_singular_type_key_optional]": [ + { + "checksum": "a3f91d7949791561f4972eafc1610499", + "size": 7678, + "uri": "https://{canondata_backend}/1781765/9e1dc7f8aa95db55a59c09f397a0634224d08363/resource.tar.gz#test_sql2yql.test_blocks-agg_singular_type_key_optional_/sql.yql" + } + ], + "test_sql2yql.test[blocks-agg_singular_type_value]": [ + { + "checksum": "cd58c3714a9d215fd1f4bea4e36f37a2", + "size": 3634, + "uri": "https://{canondata_backend}/1781765/9e1dc7f8aa95db55a59c09f397a0634224d08363/resource.tar.gz#test_sql2yql.test_blocks-agg_singular_type_value_/sql.yql" + } + ], + "test_sql2yql.test[blocks-agg_singular_type_value_optional]": [ + { + "checksum": "5ae70db766241594bfea9edd5e5dec34", + "size": 3676, + "uri": "https://{canondata_backend}/1781765/0dce37dc71c65fe553d73ed7cf98a62bdee9ddee/resource.tar.gz#test_sql2yql.test_blocks-agg_singular_type_value_optional_/sql.yql" + } + ], "test_sql2yql.test[blocks-and]": [ { "checksum": "e22a52b51ef20174c3b832acb09df01b", @@ -1410,7 +1438,7 @@ { "checksum": "601f02d489707b615a9ff16a4fe1d3f5", "size": 1304, - "uri": "https://{canondata_backend}/1900335/c447765ddbde200b8fe3ee8091f4d625b36b6bc6/resource.tar.gz#test_sql2yql.test_blocks-as_tuple_/sql.yql" + "uri": "https://{canondata_backend}/1784826/bb2033aff3202d2b68e04361e6d1bacbf4cbbed6/resource.tar.gz#test_sql2yql.test_blocks-as_tuple_/sql.yql" } ], "test_sql2yql.test[blocks-coalesce]": [ @@ -8299,6 +8327,26 @@ "uri": "file://test_sql_format.test_blocks-agg_by_key_only_distinct_/formatted.sql" } ], + "test_sql_format.test[blocks-agg_singular_type_key]": [ + { + "uri": "file://test_sql_format.test_blocks-agg_singular_type_key_/formatted.sql" + } + ], + "test_sql_format.test[blocks-agg_singular_type_key_optional]": [ + { + "uri": "file://test_sql_format.test_blocks-agg_singular_type_key_optional_/formatted.sql" + } + ], + "test_sql_format.test[blocks-agg_singular_type_value]": [ + { + "uri": "file://test_sql_format.test_blocks-agg_singular_type_value_/formatted.sql" + } + ], + "test_sql_format.test[blocks-agg_singular_type_value_optional]": [ + { + "uri": "file://test_sql_format.test_blocks-agg_singular_type_value_optional_/formatted.sql" + } + ], "test_sql_format.test[blocks-and]": [ { "uri": "file://test_sql_format.test_blocks-and_/formatted.sql" diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_key_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_key_/formatted.sql new file mode 100644 index 00000000000..fd6c96da8f3 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_key_/formatted.sql @@ -0,0 +1,72 @@ +PRAGMA config.flags('PeepholeFlags', 'UseAggPhases'); + +$n = 3; + +$data = ListMap( + ListFromRange(1, $n), ($x) -> ( + <| + idx: $x, + empty_list: [], + empty_dict: {}, + nil: NULL, + val: $x + 5, + vid: Void(), + emtpy_tuple: AsTuple(), + empty_struct: AsStruct() + |> + ) +); + +SELECT + empty_list, + SOME(idx) +FROM + as_table($data) +GROUP BY + empty_list +; + +SELECT + empty_dict, + SOME(idx) +FROM + as_table($data) +GROUP BY + empty_dict +; + +SELECT + nil, + SOME(idx) +FROM + as_table($data) +GROUP BY + nil +; + +SELECT + vid, + SOME(idx) +FROM + as_table($data) +GROUP BY + vid +; + +SELECT + emtpy_tuple, + SOME(idx) +FROM + as_table($data) +GROUP BY + emtpy_tuple +; + +SELECT + empty_struct, + SOME(idx) +FROM + as_table($data) +GROUP BY + empty_struct +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_key_optional_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_key_optional_/formatted.sql new file mode 100644 index 00000000000..401f8117f5b --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_key_optional_/formatted.sql @@ -0,0 +1,72 @@ +PRAGMA config.flags('PeepholeFlags', 'UseAggPhases'); + +$n = 3; + +$data = ListMap( + ListFromRange(1, $n), ($x) -> ( + <| + idx: $x, + empty_list: Just([]), + empty_dict: Just({}), + nil: Just(NULL), + val: $x + 5, + vid: Just(Void()), + emtpy_tuple: Just(AsTuple()), + empty_struct: Just(AsStruct()) + |> + ) +); + +SELECT + empty_list, + SOME(idx) +FROM + as_table($data) +GROUP BY + empty_list +; + +SELECT + empty_dict, + SOME(idx) +FROM + as_table($data) +GROUP BY + empty_dict +; + +SELECT + nil, + SOME(idx) +FROM + as_table($data) +GROUP BY + nil +; + +SELECT + vid, + SOME(idx) +FROM + as_table($data) +GROUP BY + vid +; + +SELECT + emtpy_tuple, + SOME(idx) +FROM + as_table($data) +GROUP BY + emtpy_tuple +; + +SELECT + empty_struct, + SOME(idx) +FROM + as_table($data) +GROUP BY + empty_struct +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_value_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_value_/formatted.sql new file mode 100644 index 00000000000..f8836f06f6a --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_value_/formatted.sql @@ -0,0 +1,33 @@ +PRAGMA config.flags('PeepholeFlags', 'UseAggPhases'); + +$n = 3; + +$data = ListMap( + ListFromRange(1, $n), ($x) -> ( + <| + idx: $x, + empty_list: [], + empty_dict: {}, + nil: NULL, + val: $x + 5, + vid: Void(), + emtpy_tuple: AsTuple(), + empty_struct: AsStruct() + |> + ) +); + +SELECT + idx, + SOME(empty_dict), + SOME(empty_list), + SOME(nil), + SOME(empty_dict), + SOME(vid), + SOME(emtpy_tuple), + SOME(empty_struct), +FROM + as_table($data) +GROUP BY + idx +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_value_optional_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_value_optional_/formatted.sql new file mode 100644 index 00000000000..258de29fb0e --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_value_optional_/formatted.sql @@ -0,0 +1,33 @@ +PRAGMA config.flags('PeepholeFlags', 'UseAggPhases'); + +$n = 3; + +$data = ListMap( + ListFromRange(1, $n), ($x) -> ( + <| + idx: $x, + empty_list: Just([]), + empty_dict: Just({}), + nil: Just(NULL), + val: $x + 5, + vid: Just(Void()), + emtpy_tuple: Just(AsTuple()), + empty_struct: Just(AsStruct()) + |> + ) +); + +SELECT + idx, + SOME(empty_dict), + SOME(empty_list), + SOME(nil), + SOME(empty_dict), + SOME(vid), + SOME(emtpy_tuple), + SOME(empty_struct), +FROM + as_table($data) +GROUP BY + idx +; diff --git a/yql/essentials/tests/sql/suites/blocks/agg_singular_type_key.sql b/yql/essentials/tests/sql/suites/blocks/agg_singular_type_key.sql new file mode 100644 index 00000000000..34c1f319624 --- /dev/null +++ b/yql/essentials/tests/sql/suites/blocks/agg_singular_type_key.sql @@ -0,0 +1,23 @@ +PRAGMA config.flags('PeepholeFlags', 'UseAggPhases'); + +$n = 3; +$data = ListMap(ListFromRange(1, $n), ($x) -> (<|idx: $x, + empty_list: [], + empty_dict: {}, + nil: NULL, + val: $x + 5, + vid: Void(), + emtpy_tuple: AsTuple(), + empty_struct: AsStruct()|>)); + +SELECT empty_list, SOME(idx) FROM as_table($data) GROUP BY empty_list; + +SELECT empty_dict, SOME(idx) FROM as_table($data) GROUP BY empty_dict; + +SELECT nil, SOME(idx) FROM as_table($data) GROUP BY nil; + +SELECT vid, SOME(idx) FROM as_table($data) GROUP BY vid; + +SELECT emtpy_tuple, SOME(idx) FROM as_table($data) GROUP BY emtpy_tuple; + +SELECT empty_struct, SOME(idx) FROM as_table($data) GROUP BY empty_struct; diff --git a/yql/essentials/tests/sql/suites/blocks/agg_singular_type_key_optional.sql b/yql/essentials/tests/sql/suites/blocks/agg_singular_type_key_optional.sql new file mode 100644 index 00000000000..0b86e48184d --- /dev/null +++ b/yql/essentials/tests/sql/suites/blocks/agg_singular_type_key_optional.sql @@ -0,0 +1,24 @@ +PRAGMA config.flags('PeepholeFlags', 'UseAggPhases'); + +$n = 3; + +$data = ListMap(ListFromRange(1, $n), ($x) -> (<|idx: $x, + empty_list: Just([]), + empty_dict: Just({}), + nil: Just(NULL), + val: $x + 5, + vid: Just(Void()), + emtpy_tuple: Just(AsTuple()), + empty_struct: Just(AsStruct())|>)); + +SELECT empty_list, SOME(idx) FROM as_table($data) GROUP BY empty_list; + +SELECT empty_dict, SOME(idx) FROM as_table($data) GROUP BY empty_dict; + +SELECT nil, SOME(idx) FROM as_table($data) GROUP BY nil; + +SELECT vid, SOME(idx) FROM as_table($data) GROUP BY vid; + +SELECT emtpy_tuple, SOME(idx) FROM as_table($data) GROUP BY emtpy_tuple; + +SELECT empty_struct, SOME(idx) FROM as_table($data) GROUP BY empty_struct; diff --git a/yql/essentials/tests/sql/suites/blocks/agg_singular_type_value.sql b/yql/essentials/tests/sql/suites/blocks/agg_singular_type_value.sql new file mode 100644 index 00000000000..ac290e2d25e --- /dev/null +++ b/yql/essentials/tests/sql/suites/blocks/agg_singular_type_value.sql @@ -0,0 +1,26 @@ +PRAGMA config.flags('PeepholeFlags', 'UseAggPhases'); + +$n = 3; +$data = ListMap(ListFromRange(1, $n), ($x) -> (<|idx: $x, + empty_list: [], + empty_dict: {}, + nil: NULL, + val: $x + 5, + vid: Void(), + emtpy_tuple: AsTuple(), + empty_struct: AsStruct()|>)); + +SELECT + idx, + SOME(empty_dict), + SOME(empty_list), + SOME(nil), + SOME(empty_dict), + SOME(vid), + SOME(emtpy_tuple), + SOME(empty_struct), +FROM + as_table($data) +GROUP BY + idx +; diff --git a/yql/essentials/tests/sql/suites/blocks/agg_singular_type_value_optional.sql b/yql/essentials/tests/sql/suites/blocks/agg_singular_type_value_optional.sql new file mode 100644 index 00000000000..3214db10494 --- /dev/null +++ b/yql/essentials/tests/sql/suites/blocks/agg_singular_type_value_optional.sql @@ -0,0 +1,27 @@ +PRAGMA config.flags('PeepholeFlags', 'UseAggPhases'); + +$n = 3; +$data = ListMap(ListFromRange(1, $n), ($x) -> (<|idx: $x, + empty_list: Just([]), + empty_dict: Just({}), + nil: Just(NULL), + val: $x + 5, + vid: Just(Void()), + emtpy_tuple: Just(AsTuple()), + empty_struct: Just(AsStruct())|>)); + +SELECT + idx, + SOME(empty_dict), + SOME(empty_list), + SOME(nil), + SOME(empty_dict), + SOME(vid), + SOME(emtpy_tuple), + SOME(empty_struct), +FROM + as_table($data) +GROUP BY + idx +; + |