summaryrefslogtreecommitdiffstats
path: root/yql/essentials
diff options
context:
space:
mode:
authoratarasov5 <[email protected]>2025-03-03 16:31:45 +0300
committeratarasov5 <[email protected]>2025-03-03 16:49:51 +0300
commit35c4c93230d28f35ca37296c064a1b8807146307 (patch)
tree42beb0c25393375f86a4e2af97a9d2db8427601b /yql/essentials
parentca1e2aef23c33eb024704bdf3568f131a2763eaa (diff)
YQL-18276: Block singular type implementation
commit_hash:1334e5c0fb5ff26fd40681059409f46cf49ec025
Diffstat (limited to 'yql/essentials')
-rw-r--r--yql/essentials/minikql/computation/mkql_block_reader.cpp18
-rw-r--r--yql/essentials/minikql/computation/mkql_block_transport.cpp54
-rw-r--r--yql/essentials/minikql/computation/mkql_block_trimmer.cpp16
-rw-r--r--yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp15
-rw-r--r--yql/essentials/minikql/mkql_type_builder.cpp36
-rw-r--r--yql/essentials/public/udf/arrow/block_builder.h53
-rw-r--r--yql/essentials/public/udf/arrow/block_item.h12
-rw-r--r--yql/essentials/public/udf/arrow/block_item_comparator.h18
-rw-r--r--yql/essentials/public/udf/arrow/block_item_hasher.h8
-rw-r--r--yql/essentials/public/udf/arrow/block_reader.h51
-rw-r--r--yql/essentials/public/udf/arrow/dispatch_traits.h18
-rw-r--r--yql/essentials/public/udf/arrow/ut/array_builder_ut.cpp40
-rw-r--r--yql/essentials/public/udf/arrow/util.h15
-rw-r--r--yql/essentials/tests/sql/minirun/part0/canondata/result.json21
-rw-r--r--yql/essentials/tests/sql/minirun/part2/canondata/result.json21
-rw-r--r--yql/essentials/tests/sql/minirun/part7/canondata/result.json2
-rw-r--r--yql/essentials/tests/sql/minirun/part8/canondata/result.json42
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/result.json50
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_key_/formatted.sql72
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_key_optional_/formatted.sql72
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_value_/formatted.sql33
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_value_optional_/formatted.sql33
-rw-r--r--yql/essentials/tests/sql/suites/blocks/agg_singular_type_key.sql23
-rw-r--r--yql/essentials/tests/sql/suites/blocks/agg_singular_type_key_optional.sql24
-rw-r--r--yql/essentials/tests/sql/suites/blocks/agg_singular_type_value.sql26
-rw-r--r--yql/essentials/tests/sql/suites/blocks/agg_singular_type_value_optional.sql27
26 files changed, 786 insertions, 14 deletions
diff --git a/yql/essentials/minikql/computation/mkql_block_reader.cpp b/yql/essentials/minikql/computation/mkql_block_reader.cpp
index 4e2060e7395..5886e121c40 100644
--- a/yql/essentials/minikql/computation/mkql_block_reader.cpp
+++ b/yql/essentials/minikql/computation/mkql_block_reader.cpp
@@ -162,6 +162,19 @@ private:
i32 TypeLen = 0;
};
+class TSingularTypeItemConverter: public IBlockItemConverter {
+public:
+ NUdf::TUnboxedValuePod MakeValue(TBlockItem item, const THolderFactory& holderFactory) const final {
+ Y_UNUSED(item, holderFactory);
+ return NUdf::TUnboxedValuePod::Zero();
+ }
+
+ TBlockItem MakeItem(const NUdf::TUnboxedValuePod& value) const final {
+ Y_UNUSED(value);
+ return TBlockItem::Zero();
+ }
+};
+
template <bool Nullable>
class TTupleBlockItemConverter : public IBlockItemConverter {
public:
@@ -285,6 +298,7 @@ struct TConverterTraits {
using TExtOptional = TExternalOptionalBlockItemConverter;
template<typename TTzDate, bool Nullable>
using TTzDateConverter = TTzDateBlockItemConverter<TTzDate, Nullable>;
+ using TSingularType = TSingularTypeItemConverter;
constexpr static bool PassType = false;
@@ -325,6 +339,10 @@ struct TConverterTraits {
return std::make_unique<TTzDateConverter<TTzDate, false>>();
}
}
+
+ static std::unique_ptr<TResult> MakeSingular() {
+ return std::make_unique<TSingularType>();
+ }
};
} // namespace
diff --git a/yql/essentials/minikql/computation/mkql_block_transport.cpp b/yql/essentials/minikql/computation/mkql_block_transport.cpp
index a03a5027e86..2a37245f9d2 100644
--- a/yql/essentials/minikql/computation/mkql_block_transport.cpp
+++ b/yql/essentials/minikql/computation/mkql_block_transport.cpp
@@ -429,6 +429,49 @@ private:
const std::unique_ptr<TBlockDeserializerBase> Inner_;
};
+class TSingularTypeBlockSerializer final: public IBlockSerializer {
+private:
+ size_t ArrayMetadataCount() const final {
+ return 0;
+ }
+
+ void StoreMetadata(const arrow::ArrayData& data, const IBlockSerializer::TMetadataSink& metaSink) const final {
+ Y_UNUSED(data, metaSink);
+ }
+
+ void StoreArray(const arrow::ArrayData& data, TChunkedBuffer& dst) const final {
+ Y_UNUSED(data, dst);
+ }
+};
+
+class TSingularTypeBlockDeserializer final: public TBlockDeserializerBase {
+private:
+ void DoLoadMetadata(const TMetadataSource& metaSource) final {
+ Y_UNUSED(metaSource);
+ }
+
+ std::shared_ptr<arrow::ArrayData> DoMakeDefaultValue(const std::shared_ptr<arrow::Buffer>& nulls, i64 nullsCount, ui64 blockLen, ui64 offset) const final {
+ Y_UNUSED(offset);
+ Y_ENSURE(nullsCount == 0);
+ Y_ENSURE(!nulls || nulls->size() == 0);
+ return arrow::NullArray(blockLen).data();
+ }
+
+ std::shared_ptr<arrow::ArrayData> DoLoadArray(TChunkedBuffer& src, const std::shared_ptr<arrow::Buffer>& nulls, i64 nullsCount, ui64 blockLen, ui64 offset) final {
+ Y_UNUSED(offset, src);
+ Y_ENSURE(nullsCount == 0);
+ Y_ENSURE(!nulls || nulls->size() == 0);
+ return arrow::NullArray(blockLen).data();
+ }
+
+ bool IsNullable() const final {
+ return false;
+ }
+
+ void DoResetMetadata() final {
+ }
+};
+
template<bool Nullable, typename TDerived>
class TTupleBlockSerializerBase : public IBlockSerializer {
size_t ArrayMetadataCount() const final {
@@ -632,7 +675,7 @@ struct TSerializerTraits {
using TExtOptional = TExtOptionalBlockSerializer;
template<typename TTzDateType, bool Nullable>
using TTzDate = TTzDateBlockSerializer<TTzDateType, Nullable>;
-
+ using TSingularType = TSingularTypeBlockSerializer;
constexpr static bool PassType = false;
static std::unique_ptr<TResult> MakePg(const NUdf::TPgTypeDescription& desc, const NUdf::IPgBuilder* pgBuilder) {
@@ -648,6 +691,10 @@ struct TSerializerTraits {
ythrow yexception() << "Serializer not implemented for block resources";
}
+ static std::unique_ptr<TResult> MakeSingular() {
+ return std::make_unique<TSingularType>();
+ }
+
template<typename TTzDateType>
static std::unique_ptr<TResult> MakeTzDate(bool isOptional) {
if (isOptional) {
@@ -670,6 +717,7 @@ struct TDeserializerTraits {
using TExtOptional = TExtOptionalBlockDeserializer;
template<typename TTzDateType, bool Nullable>
using TTzDate = TTzDateBlockDeserializer<TTzDateType, Nullable>;
+ using TSingularType = TSingularTypeBlockDeserializer;
constexpr static bool PassType = false;
@@ -686,6 +734,10 @@ struct TDeserializerTraits {
ythrow yexception() << "Deserializer not implemented for block resources";
}
+ static std::unique_ptr<TResult> MakeSingular() {
+ return std::make_unique<TSingularType>();
+ }
+
template<typename TTzDateType>
static std::unique_ptr<TResult> MakeTzDate(bool isOptional) {
if (isOptional) {
diff --git a/yql/essentials/minikql/computation/mkql_block_trimmer.cpp b/yql/essentials/minikql/computation/mkql_block_trimmer.cpp
index b53a3890a4b..0b53f914525 100644
--- a/yql/essentials/minikql/computation/mkql_block_trimmer.cpp
+++ b/yql/essentials/minikql/computation/mkql_block_trimmer.cpp
@@ -98,6 +98,17 @@ public:
}
};
+class TSingularBlockTrimmer: public TBlockTrimmerBase {
+public:
+ TSingularBlockTrimmer(arrow::MemoryPool* pool)
+ : TBlockTrimmerBase(pool) {
+ }
+
+ std::shared_ptr<arrow::ArrayData> Trim(const std::shared_ptr<arrow::ArrayData>& array) override {
+ return array;
+ }
+};
+
template<typename TStringType, bool Nullable>
class TStringBlockTrimmer : public TBlockTrimmerBase {
using TOffset = typename TStringType::offset_type;
@@ -217,6 +228,7 @@ struct TTrimmerTraits {
using TResource = TResourceBlockTrimmer<Nullable>;
template<typename TTzDate, bool Nullable>
using TTzDateReader = TTzDateBlockTrimmer<TTzDate, Nullable>;
+ using TSingular = TSingularBlockTrimmer;
constexpr static bool PassType = false;
@@ -237,6 +249,10 @@ struct TTrimmerTraits {
}
}
+ static TResult::TPtr MakeSingular(arrow::MemoryPool* pool) {
+ return std::make_unique<TSingular>(pool);
+ }
+
template<typename TTzDate>
static TResult::TPtr MakeTzDate(bool isOptional, arrow::MemoryPool* pool) {
if (isOptional) {
diff --git a/yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp b/yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp
index b689e4cf8b1..cbff1c5722d 100644
--- a/yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp
+++ b/yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp
@@ -674,6 +674,8 @@ protected:
auto tzDateType = PgmBuilder.NewDataType(NUdf::EDataSlot::TzDate);
auto blockTzDateType = PgmBuilder.NewBlockType(tzDateType, TBlockType::EShape::Many);
+ auto nullType = PgmBuilder.NewNullType();
+ auto blockNullType = PgmBuilder.NewBlockType(nullType, TBlockType::EShape::Many);
auto rowType =
legacyStruct
@@ -683,11 +685,12 @@ protected:
{"_yql_block_length", scalarUi64Type},
{"a", scalarOptStrType},
{"b", blockOptTupleOptUi32StrType},
- {"c", blockTzDateType}
+ {"c", blockTzDateType},
+ {"nill", blockNullType},
})
: PgmBuilder.NewMultiType(
{blockUi32Type, blockOptStrType, scalarOptStrType,
- blockOptTupleOptUi32StrType, blockTzDateType, scalarUi64Type});
+ blockOptTupleOptUi32StrType, blockTzDateType, blockNullType, scalarUi64Type});
ui64 blockLen = 1000;
UNIT_ASSERT_LE(offset + len, blockLen);
@@ -696,6 +699,8 @@ protected:
auto builder2 = MakeArrayBuilder(TTypeInfoHelper(), optStrType, *ArrowPool_, CalcBlockLen(CalcMaxBlockItemSize(optStrType)), nullptr);
auto builder3 = MakeArrayBuilder(TTypeInfoHelper(), optTupleOptUi32StrType, *ArrowPool_, CalcBlockLen(CalcMaxBlockItemSize(optTupleOptUi32StrType)), nullptr);
auto builder4 = MakeArrayBuilder(TTypeInfoHelper(), tzDateType, *ArrowPool_, CalcBlockLen(CalcMaxBlockItemSize(tzDateType)), nullptr);
+ auto builder5 = MakeArrayBuilder(TTypeInfoHelper(), nullType, *ArrowPool_, CalcBlockLen(CalcMaxBlockItemSize(nullType)), nullptr);
+
for (ui32 i = 0; i < blockLen; ++i) {
TBlockItem b1(i);
@@ -712,6 +717,7 @@ protected:
TBlockItem tzDate {i};
tzDate.SetTimezoneId(i % 100);
builder4->Add(tzDate);
+ builder5->Add(TBlockItem::Zero());
}
std::string_view testScalarString = "foobar";
@@ -725,12 +731,14 @@ protected:
datums.emplace_back(arrow::Datum(std::make_shared<arrow::BinaryScalar>(strbuf)));
datums.emplace_back(builder3->Build(true));
datums.emplace_back(builder4->Build(true));
+ datums.emplace_back(builder5->Build(true));
} else {
datums.emplace_back(builder1->Build(true));
datums.emplace_back(builder2->Build(true));
datums.emplace_back(arrow::Datum(std::make_shared<arrow::BinaryScalar>(strbuf)));
datums.emplace_back(builder3->Build(true));
datums.emplace_back(builder4->Build(true));
+ datums.emplace_back(builder5->Build(true));
datums.emplace_back(arrow::Datum(std::make_shared<arrow::UInt64Scalar>(blockLen)));
}
@@ -785,6 +793,7 @@ protected:
auto reader2 = MakeBlockReader(TTypeInfoHelper(), optStrType);
auto reader3 = MakeBlockReader(TTypeInfoHelper(), optTupleOptUi32StrType);
auto reader4 = MakeBlockReader(TTypeInfoHelper(), tzDateType);
+ auto reader5 = MakeBlockReader(TTypeInfoHelper(), nullType);
for (ui32 i = offset; i < len; ++i) {
TBlockItem b1 = reader1->GetItem(*TArrowBlock::From(unpackedColumns[0]).GetDatum().array(), i - offset);
@@ -814,6 +823,8 @@ protected:
TBlockItem b4 = reader4->GetItem(*TArrowBlock::From(unpackedColumns[legacyStruct ? 5 : 4]).GetDatum().array(), i - offset);
UNIT_ASSERT(b4.Get<ui16>() == i);
UNIT_ASSERT(b4.GetTimezoneId() == (i % 100));
+ TBlockItem b5 = reader5->GetItem(*TArrowBlock::From(unpackedColumns[legacyStruct ? 6 : 5]).GetDatum().array(), i - offset);
+ UNIT_ASSERT(b5);
}
}
}
diff --git a/yql/essentials/minikql/mkql_type_builder.cpp b/yql/essentials/minikql/mkql_type_builder.cpp
index d1df31a97d8..dfbdd3f89c0 100644
--- a/yql/essentials/minikql/mkql_type_builder.cpp
+++ b/yql/essentials/minikql/mkql_type_builder.cpp
@@ -1522,6 +1522,17 @@ bool ConvertArrowTypeImpl(NUdf::EDataSlot slot, std::shared_ptr<arrow::DataType>
}
}
+inline bool IsSingularType(const TType* type) {
+ return type->IsNull() ||
+ type->IsVoid() ||
+ type->IsEmptyDict() ||
+ type->IsEmptyList();
+}
+
+inline bool NeedWrapWithExternalOptional(const TType* type) {
+ return type->IsPg() || IsSingularType(type);
+}
+
bool ConvertArrowTypeImpl(TType* itemType, std::shared_ptr<arrow::DataType>& type, const TArrowConvertFailedCallback& onFail, bool output) {
bool isOptional;
auto unpacked = UnpackOptional(itemType, isOptional);
@@ -1534,8 +1545,7 @@ bool ConvertArrowTypeImpl(TType* itemType, std::shared_ptr<arrow::DataType>& typ
return false;
}
- if (unpacked->IsOptional() || isOptional && unpacked->IsPg()) {
- // at least 2 levels of optionals
+ if (unpacked->IsOptional() || isOptional && NeedWrapWithExternalOptional(unpacked)) {
ui32 nestLevel = 0;
auto currentType = itemType;
auto previousType = itemType;
@@ -1545,12 +1555,11 @@ bool ConvertArrowTypeImpl(TType* itemType, std::shared_ptr<arrow::DataType>& typ
currentType = AS_TYPE(TOptionalType, currentType)->GetItemType();
} while (currentType->IsOptional());
- if (currentType->IsPg()) {
+ if (NeedWrapWithExternalOptional(currentType)) {
previousType = currentType;
++nestLevel;
}
- // previousType is always Optional
std::shared_ptr<arrow::DataType> innerArrowType;
if (!ConvertArrowTypeImpl(previousType, innerArrowType, onFail, output)) {
return false;
@@ -1618,6 +1627,11 @@ bool ConvertArrowTypeImpl(TType* itemType, std::shared_ptr<arrow::DataType>& typ
return true;
}
+ if (IsSingularType(unpacked)) {
+ type = arrow::null();
+ return true;
+ }
+
if (!unpacked->IsData()) {
if (onFail) {
onFail(unpacked);
@@ -2479,6 +2493,10 @@ size_t CalcMaxBlockItemSize(const TType* type) {
return sizeof(NYql::NUdf::TUnboxedValue);
}
+ if (IsSingularType(type)) {
+ return 0;
+ }
+
if (type->IsData()) {
auto slot = *AS_TYPE(TDataType, type)->GetDataSlot();
switch (slot) {
@@ -2552,6 +2570,7 @@ struct TComparatorTraits {
using TExtOptional = NUdf::TExternalOptionalBlockItemComparator;
template <typename T, bool Nullable>
using TTzDateComparator = NUdf::TTzDateBlockItemComparator<T, Nullable>;
+ using TSingularType = NUdf::TSingularTypeBlockItemComparator;
constexpr static bool PassType = false;
@@ -2565,6 +2584,10 @@ struct TComparatorTraits {
ythrow yexception() << "Comparator not implemented for block resources: ";
}
+ static std::unique_ptr<TResult> MakeSingular() {
+ return std::make_unique<TSingularType>();
+ }
+
template<typename TTzDate>
static std::unique_ptr<TResult> MakeTzDate(bool isOptional) {
if (isOptional) {
@@ -2586,6 +2609,7 @@ struct THasherTraits {
using TExtOptional = NUdf::TExternalOptionalBlockItemHasher;
template <typename T, bool Nullable>
using TTzDateHasher = NYql::NUdf::TTzDateBlockItemHasher<T, Nullable>;
+ using TSingularType = NUdf::TSingularTypeBlockItemHaser;
constexpr static bool PassType = false;
@@ -2607,6 +2631,10 @@ struct THasherTraits {
return std::make_unique<TTzDateHasher<TTzDate, false>>();
}
}
+
+ static std::unique_ptr<TResult> MakeSingular() {
+ return std::make_unique<TSingularType>();
+ }
};
NUdf::IBlockItemComparator::TPtr TBlockTypeHelper::MakeComparator(NUdf::TType* type) const {
diff --git a/yql/essentials/public/udf/arrow/block_builder.h b/yql/essentials/public/udf/arrow/block_builder.h
index 92f4f7e123d..baac1842b90 100644
--- a/yql/essentials/public/udf/arrow/block_builder.h
+++ b/yql/essentials/public/udf/arrow/block_builder.h
@@ -10,6 +10,7 @@
#include <yql/essentials/public/udf/udf_value_builder.h>
#include <yql/essentials/public/udf/udf_type_inspection.h>
+#include <arrow/array/array_base.h>
#include <arrow/datum.h>
#include <arrow/c/bridge.h>
@@ -1358,6 +1359,53 @@ private:
std::unique_ptr<TTypedBufferBuilder<ui8>> NullBuilder;
};
+class TSingularBlockBuilder final: public TArrayBuilderBase {
+public:
+ TSingularBlockBuilder(const TType* type, const ITypeInfoHelper& typeInfoHelper, arrow::MemoryPool& pool,
+ size_t maxLen, const TParams& params = {})
+ : TArrayBuilderBase(typeInfoHelper, type, pool, maxLen, params) {
+ Reserve();
+ }
+
+ void DoAdd(NUdf::TUnboxedValuePod value) final {
+ Y_UNUSED(value);
+ }
+
+ void DoAdd(TBlockItem value) final {
+ Y_UNUSED(value);
+ }
+
+ void DoAdd(TInputBuffer& input) final {
+ Y_UNUSED(input.PopChar());
+ }
+
+ void DoAddDefault() final {}
+
+ void DoAddMany(const arrow::ArrayData& array, const ui8* sparseBitmap, size_t popCount) final {
+ Y_UNUSED(array, sparseBitmap, popCount);
+ }
+
+ void DoAddMany(const arrow::ArrayData& array, ui64 beginIndex, size_t count) final {
+ Y_UNUSED(array, beginIndex, count);
+ }
+
+ void DoAddMany(const arrow::ArrayData& array, const ui64* indexes, size_t count) final {
+ Y_UNUSED(array, indexes, count);
+ }
+
+ TBlockArrayTree::Ptr DoBuildTree(bool finish) final {
+ TBlockArrayTree::Ptr result = std::make_shared<TBlockArrayTree>();
+ Y_UNUSED(finish);
+ result->Payload.push_back(arrow::NullArray(GetCurrLen()).data());
+ return result;
+ }
+
+private:
+ size_t DoReserve() final {
+ return 0;
+ }
+};
+
using TArrayBuilderParams = TArrayBuilderBase::TParams;
struct TBuilderTraits {
@@ -1373,6 +1421,7 @@ struct TBuilderTraits {
using TResource = TResourceArrayBuilder<Nullable>;
template<typename TTzDate, bool Nullable>
using TTzDateReader = TTzDateArrayBuilder<TTzDate, Nullable>;
+ using TSingular = TSingularBlockBuilder;
constexpr static bool PassType = true;
@@ -1412,6 +1461,10 @@ struct TBuilderTraits {
return std::make_unique<TTzDateReader<TTzDate, false>>(type, typeInfoHelper, pool, maxLen, params);
}
}
+
+ static std::unique_ptr<TResult> MakeSingular(const TType* type, const ITypeInfoHelper& typeInfoHelper, arrow::MemoryPool& pool, size_t maxLen, const TArrayBuilderParams& params) {
+ return std::make_unique<TSingular>(type, typeInfoHelper, pool, maxLen, params);
+ }
};
inline std::unique_ptr<IArrayBuilder> MakeArrayBuilder(
diff --git a/yql/essentials/public/udf/arrow/block_item.h b/yql/essentials/public/udf/arrow/block_item.h
index 2f9784cd3c4..79686b3094f 100644
--- a/yql/essentials/public/udf/arrow/block_item.h
+++ b/yql/essentials/public/udf/arrow/block_item.h
@@ -166,6 +166,18 @@ public:
return &Raw;
}
+ static inline TBlockItem Void() {
+ TBlockItem v;
+ v.Raw.Simple.Meta = static_cast<ui8>(EMarkers::Embedded);
+ return v;
+ }
+
+ static inline TBlockItem Zero() {
+ TBlockItem v;
+ v.Raw.Simple.Meta = static_cast<ui8>(EMarkers::Embedded);
+ return v;
+ }
+
inline const void* GetRawPtr() const
{
return &Raw;
diff --git a/yql/essentials/public/udf/arrow/block_item_comparator.h b/yql/essentials/public/udf/arrow/block_item_comparator.h
index e185b63f664..ad803799c63 100644
--- a/yql/essentials/public/udf/arrow/block_item_comparator.h
+++ b/yql/essentials/public/udf/arrow/block_item_comparator.h
@@ -169,6 +169,24 @@ public:
}
};
+class TSingularTypeBlockItemComparator: public TBlockItemComparatorBase<TSingularTypeBlockItemComparator, /*Nullable=*/false> {
+public:
+ i64 DoCompare(TBlockItem lhs, TBlockItem rhs) const {
+ Y_UNUSED(lhs, rhs);
+ return 0;
+ }
+
+ bool DoEquals(TBlockItem lhs, TBlockItem rhs) const {
+ Y_UNUSED(lhs, rhs);
+ return true;
+ }
+
+ bool DoLess(TBlockItem lhs, TBlockItem rhs) const {
+ Y_UNUSED(lhs, rhs);
+ return false;
+ }
+};
+
template<typename TTzType, bool Nullable>
class TTzDateBlockItemComparator : public TBlockItemComparatorBase<TTzDateBlockItemComparator<TTzType, Nullable>, Nullable> {
using TLayout = typename TDataType<TTzType>::TLayout;
diff --git a/yql/essentials/public/udf/arrow/block_item_hasher.h b/yql/essentials/public/udf/arrow/block_item_hasher.h
index 3f77e27b6f1..9108d7b06e8 100644
--- a/yql/essentials/public/udf/arrow/block_item_hasher.h
+++ b/yql/essentials/public/udf/arrow/block_item_hasher.h
@@ -76,6 +76,14 @@ public:
}
};
+class TSingularTypeBlockItemHaser : public TBlockItemHasherBase<TSingularTypeBlockItemHaser, /*Nullable=*/false> {
+public:
+ ui64 DoHash(TBlockItem value) const {
+ Y_UNUSED(value);
+ return 0;
+ }
+};
+
template <bool Nullable>
class TTupleBlockItemHasher : public TBlockItemHasherBase<TTupleBlockItemHasher<Nullable>, Nullable> {
public:
diff --git a/yql/essentials/public/udf/arrow/block_reader.h b/yql/essentials/public/udf/arrow/block_reader.h
index 05dd3ce4409..6652df2ac67 100644
--- a/yql/essentials/public/udf/arrow/block_reader.h
+++ b/yql/essentials/public/udf/arrow/block_reader.h
@@ -424,6 +424,48 @@ private:
TFixedSizeBlockReader<ui16, /* Nullable */false> TimezoneReader_;
};
+// NOTE: For any singular type we use arrow::null() data type.
+// This data type DOES NOT support bit mask so for optional type
+// we have to use |TExternalOptional| wrapper.
+class TSingularTypeBlockReader: public IBlockReader {
+public:
+ TSingularTypeBlockReader() = default;
+
+ ~TSingularTypeBlockReader() override = default;
+
+ TBlockItem GetItem(const arrow::ArrayData& data, size_t index) override {
+ Y_UNUSED(data, index);
+ return TBlockItem::Zero();
+ }
+
+ TBlockItem GetScalarItem(const arrow::Scalar& scalar) override {
+ Y_UNUSED(scalar);
+ return TBlockItem::Zero();
+ }
+
+ ui64 GetDataWeight(const arrow::ArrayData& data) const override {
+ Y_UNUSED(data);
+ return 0;
+ }
+
+ ui64 GetDataWeight(TBlockItem item) const override {
+ Y_UNUSED(item);
+ return 0;
+ }
+
+ ui64 GetDefaultValueWeight() const override {
+ return 0;
+ }
+
+ void SaveItem(const arrow::ArrayData& data, size_t index, TOutputBuffer& out) const override {
+ Y_UNUSED(index, data, out);
+ }
+
+ void SaveScalarItem(const arrow::Scalar& scalar, TOutputBuffer& out) const override {
+ Y_UNUSED(scalar, out);
+ }
+};
+
class TExternalOptionalBlockReader final : public IBlockReader {
public:
TExternalOptionalBlockReader(std::unique_ptr<IBlockReader>&& inner)
@@ -498,6 +540,7 @@ struct TReaderTraits {
using TResource = TResourceBlockReader<Nullable>;
template<typename TTzDate, bool Nullable>
using TTzDateReader = TTzDateBlockReader<TTzDate, Nullable>;
+ using TSingularType = TSingularTypeBlockReader;
constexpr static bool PassType = false;
@@ -518,6 +561,10 @@ struct TReaderTraits {
}
}
+ static std::unique_ptr<TResult> MakeSingular() {
+ return std::make_unique<TSingularType>();
+ }
+
template<typename TTzDate>
static std::unique_ptr<TResult> MakeTzDate(bool isOptional) {
if (isOptional) {
@@ -595,6 +642,10 @@ inline void UpdateBlockItemSerializeProps(const ITypeInfoHelper& typeInfoHelper,
return;
}
+ if (IsSingularType(typeInfoHelper, type)) {
+ return;
+ }
+
Y_ENSURE(false, "Unsupported type");
}
diff --git a/yql/essentials/public/udf/arrow/dispatch_traits.h b/yql/essentials/public/udf/arrow/dispatch_traits.h
index 88c303cc874..87c25b93f56 100644
--- a/yql/essentials/public/udf/arrow/dispatch_traits.h
+++ b/yql/essentials/public/udf/arrow/dispatch_traits.h
@@ -1,5 +1,6 @@
#pragma once
+#include <yql/essentials/public/udf/arrow/util.h>
#include <yql/essentials/public/udf/udf_type_inspection.h>
#include <yql/essentials/public/udf/udf_value_builder.h>
@@ -85,8 +86,7 @@ std::unique_ptr<typename TTraits::TResult> DispatchByArrowTraits(const ITypeInfo
TOptionalTypeInspector unpackedOpt(typeInfoHelper, unpacked);
TPgTypeInspector unpackedPg(typeInfoHelper, unpacked);
- if (unpackedOpt || typeOpt && unpackedPg) {
- // at least 2 levels of optionals
+ if (unpackedOpt || (typeOpt && NeedWrapWithExternalOptional(typeInfoHelper, unpacked))) {
ui32 nestLevel = 0;
auto currentType = type;
auto previousType = type;
@@ -103,7 +103,7 @@ std::unique_ptr<typename TTraits::TResult> DispatchByArrowTraits(const ITypeInfo
}
}
- if (TPgTypeInspector(typeInfoHelper, currentType)) {
+ if (NeedWrapWithExternalOptional(typeInfoHelper, currentType)) {
previousType = currentType;
++nestLevel;
}
@@ -118,8 +118,7 @@ std::unique_ptr<typename TTraits::TResult> DispatchByArrowTraits(const ITypeInfo
}
return reader;
- }
- else {
+ } else {
type = unpacked;
}
@@ -230,6 +229,15 @@ std::unique_ptr<typename TTraits::TResult> DispatchByArrowTraits(const ITypeInfo
}
}
+ if (IsSingularType(typeInfoHelper, type)) {
+ Y_ENSURE(!isOptional, "Optional data types are not supported directly for singular type. Please use TExternalOptional wrapper.");
+ if constexpr (TTraits::PassType) {
+ return TTraits::MakeSingular(type, std::forward<TArgs>(args)...);
+ } else {
+ return TTraits::MakeSingular(std::forward<TArgs>(args)...);
+ }
+ }
+
Y_ENSURE(false, "Unsupported type");
}
diff --git a/yql/essentials/public/udf/arrow/ut/array_builder_ut.cpp b/yql/essentials/public/udf/arrow/ut/array_builder_ut.cpp
index bbb4c134c86..d0851c5e869 100644
--- a/yql/essentials/public/udf/arrow/ut/array_builder_ut.cpp
+++ b/yql/essentials/public/udf/arrow/ut/array_builder_ut.cpp
@@ -220,6 +220,46 @@ Y_UNIT_TEST_SUITE(TArrayBuilderTest) {
UNIT_ASSERT_VALUES_EQUAL(item2AfterRead.GetStringRefFromValue(), "234");
}
+ Y_UNIT_TEST(TestSingularTypeValueBuilderReader) {
+ TArrayBuilderTestData data;
+ const auto nullType = data.PgmBuilder.NewNullType();
+
+ std::shared_ptr<arrow::ArrayData> arrayData = arrow::NullArray{42}.data();
+ IArrayBuilder::TArrayDataItem arrayDataItem = {.Data = arrayData.get(), .StartOffset = 0};
+ {
+ const auto arrayBuilder = MakeArrayBuilder(NMiniKQL::TTypeInfoHelper(), nullType, *data.ArrowPool, MAX_BLOCK_SIZE, /*pgBuilder=*/nullptr);
+ // Check builder.
+ arrayBuilder->Add(TUnboxedValuePod::Zero());
+ arrayBuilder->Add(TBlockItem::Zero());
+ arrayBuilder->Add(TBlockItem::Zero(), 4);
+ TInputBuffer inputBuffer("Just arbitrary string");
+ arrayBuilder->Add(inputBuffer);
+ arrayBuilder->AddMany(*arrayData, /*popCount=*/3u, /*sparseBitmat=*/nullptr, /*bitmapSize=*/arrayData->length);
+ arrayBuilder->AddMany(&arrayDataItem, /*arrayCount=*/1, /*beginIndex=*/1, /*count=*/3u);
+ std::vector<ui64> indexes = {1, 5, 7, 10};
+ arrayBuilder->AddMany(&arrayDataItem, /*arrayCount=*/1, /*beginIndex=*/indexes.data(), /*count=*/4u);
+ UNIT_ASSERT_VALUES_EQUAL(arrayBuilder->Build(true).array()->length, 1 + 1 + 4 + 1 + 3 + 3 + 4);
+ }
+
+ {
+ // Check reader.
+ const auto blockReader = MakeBlockReader(NMiniKQL::TTypeInfoHelper(), nullType);
+
+ UNIT_ASSERT(blockReader->GetItem(*arrayData, 0));
+ UNIT_ASSERT(blockReader->GetScalarItem(arrow::Scalar(arrow::null())));
+ UNIT_ASSERT_EQUAL(blockReader->GetDataWeight(*arrayData), 0);
+ UNIT_ASSERT_EQUAL(blockReader->GetDataWeight(TBlockItem::Zero()), 0);
+ UNIT_ASSERT_EQUAL(blockReader->GetDefaultValueWeight(), 0);
+ UNIT_ASSERT_EQUAL(blockReader->GetDefaultValueWeight(), 0);
+
+ TOutputBuffer outputBuffer;
+ blockReader->SaveItem(*arrayData, 1, outputBuffer);
+ UNIT_ASSERT(outputBuffer.Finish().empty());
+ blockReader->SaveScalarItem(arrow::Scalar(arrow::null()), outputBuffer);
+ UNIT_ASSERT(outputBuffer.Finish().empty());
+ }
+ }
+
Y_UNIT_TEST(TestBuilderAllocatedSize) {
TArrayBuilderTestData data;
const auto optStringType = data.PgmBuilder.NewDataType(NUdf::EDataSlot::String, true);
diff --git a/yql/essentials/public/udf/arrow/util.h b/yql/essentials/public/udf/arrow/util.h
index f7bdb715f98..e899af26af7 100644
--- a/yql/essentials/public/udf/arrow/util.h
+++ b/yql/essentials/public/udf/arrow/util.h
@@ -12,6 +12,9 @@
#include <functional>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+#include <yql/essentials/public/udf/udf_types.h>
+
namespace NYql {
namespace NUdf {
@@ -236,5 +239,17 @@ inline void ZeroMemoryContext(void* ptr) {
SetMemoryContext(ptr, nullptr);
}
+inline bool IsSingularType(const ITypeInfoHelper& typeInfoHelper, const TType* type) {
+ auto kind = typeInfoHelper.GetTypeKind(type);
+ return kind == ETypeKind::Null ||
+ kind == ETypeKind::Void ||
+ kind == ETypeKind::EmptyDict ||
+ kind == ETypeKind::EmptyList;
+}
+
+inline bool NeedWrapWithExternalOptional(const ITypeInfoHelper& typeInfoHelper, const TType* type) {
+ return TPgTypeInspector(typeInfoHelper, type) || IsSingularType(typeInfoHelper, type);
+}
+
} // namespace NUdf
} // namespace NYql
diff --git a/yql/essentials/tests/sql/minirun/part0/canondata/result.json b/yql/essentials/tests/sql/minirun/part0/canondata/result.json
index 3c1aa86fec2..ffeebb57ccb 100644
--- a/yql/essentials/tests/sql/minirun/part0/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part0/canondata/result.json
@@ -275,6 +275,27 @@
"uri": "https://{canondata_backend}/1936842/8073eb626dd657fcbe20d34185c363a1a18c3e7c/resource.tar.gz#test.test_blocks-agg_all_mixed_distinct-default.txt-Results_/results.txt"
}
],
+ "test.test[blocks-agg_singular_type_key_optional-default.txt-Debug]": [
+ {
+ "checksum": "71ee94512d6ef28833fb6df3bace7b53",
+ "size": 2727,
+ "uri": "https://{canondata_backend}/1925842/7e03c084910acb6d9d50a1f7dc65eda3cdac3b45/resource.tar.gz#test.test_blocks-agg_singular_type_key_optional-default.txt-Debug_/opt.yql"
+ }
+ ],
+ "test.test[blocks-agg_singular_type_key_optional-default.txt-Peephole]": [
+ {
+ "checksum": "db2e4bd6530b31b6efceb77a4a184b4e",
+ "size": 6606,
+ "uri": "https://{canondata_backend}/1925842/7e03c084910acb6d9d50a1f7dc65eda3cdac3b45/resource.tar.gz#test.test_blocks-agg_singular_type_key_optional-default.txt-Peephole_/opt.yql"
+ }
+ ],
+ "test.test[blocks-agg_singular_type_key_optional-default.txt-Results]": [
+ {
+ "checksum": "4b79ad0d41612ad09d735f34513ee6ff",
+ "size": 7301,
+ "uri": "https://{canondata_backend}/1925842/7e03c084910acb6d9d50a1f7dc65eda3cdac3b45/resource.tar.gz#test.test_blocks-agg_singular_type_key_optional-default.txt-Results_/results.txt"
+ }
+ ],
"test.test[blocks-and-default.txt-Debug]": [
{
"checksum": "47525fa40526e04498f0c41e6bc48f59",
diff --git a/yql/essentials/tests/sql/minirun/part2/canondata/result.json b/yql/essentials/tests/sql/minirun/part2/canondata/result.json
index 48c0652311c..73ff7dcd875 100644
--- a/yql/essentials/tests/sql/minirun/part2/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part2/canondata/result.json
@@ -258,6 +258,27 @@
"uri": "https://{canondata_backend}/1937150/3d01c6ab2777fc3b99338655d39a5bcbb1ac89c3/resource.tar.gz#test.test_blocks-agg_by_key_only_distinct-default.txt-Results_/results.txt"
}
],
+ "test.test[blocks-agg_singular_type_value_optional-default.txt-Debug]": [
+ {
+ "checksum": "06774e6dab64198fc6cc5d173b0bba26",
+ "size": 2781,
+ "uri": "https://{canondata_backend}/1781765/b8d92d6ccf46e436b2e5b3b70ab511bab6d820b0/resource.tar.gz#test.test_blocks-agg_singular_type_value_optional-default.txt-Debug_/opt.yql"
+ }
+ ],
+ "test.test[blocks-agg_singular_type_value_optional-default.txt-Peephole]": [
+ {
+ "checksum": "d22ed37889eea3b41eadb6164bf6d017",
+ "size": 3113,
+ "uri": "https://{canondata_backend}/1781765/b8d92d6ccf46e436b2e5b3b70ab511bab6d820b0/resource.tar.gz#test.test_blocks-agg_singular_type_value_optional-default.txt-Peephole_/opt.yql"
+ }
+ ],
+ "test.test[blocks-agg_singular_type_value_optional-default.txt-Results]": [
+ {
+ "checksum": "98fbeb83e5295954045efa6fd159626f",
+ "size": 5301,
+ "uri": "https://{canondata_backend}/1781765/b8d92d6ccf46e436b2e5b3b70ab511bab6d820b0/resource.tar.gz#test.test_blocks-agg_singular_type_value_optional-default.txt-Results_/results.txt"
+ }
+ ],
"test.test[blocks-exists-default.txt-Debug]": [
{
"checksum": "a871029504a6d3f1c07342493b86d28d",
diff --git a/yql/essentials/tests/sql/minirun/part7/canondata/result.json b/yql/essentials/tests/sql/minirun/part7/canondata/result.json
index 399f6d226b1..d018f1582d5 100644
--- a/yql/essentials/tests/sql/minirun/part7/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part7/canondata/result.json
@@ -220,7 +220,7 @@
{
"checksum": "02e80809d3cbf91101d09d4ac1e87aa0",
"size": 623,
- "uri": "https://{canondata_backend}/1917492/b01930df0710eb10e4ce2d35cddca6be33ac8a9f/resource.tar.gz#test.test_blocks-as_tuple-default.txt-Peephole_/opt.yql"
+ "uri": "https://{canondata_backend}/1130705/f9eb075ce8fc54a57832e4ee918669601325c133/resource.tar.gz#test.test_blocks-as_tuple-default.txt-Peephole_/opt.yql"
}
],
"test.test[blocks-as_tuple-default.txt-Results]": [
diff --git a/yql/essentials/tests/sql/minirun/part8/canondata/result.json b/yql/essentials/tests/sql/minirun/part8/canondata/result.json
index 6d3cbc281dd..94d8c6547e4 100644
--- a/yql/essentials/tests/sql/minirun/part8/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part8/canondata/result.json
@@ -352,6 +352,48 @@
"uri": "https://{canondata_backend}/1031349/4d0c6ce1905689c65e264d15d770d36efcd9426f/resource.tar.gz#test.test_binding-named_expr_input-default.txt-Results_/results.txt"
}
],
+ "test.test[blocks-agg_singular_type_key-default.txt-Debug]": [
+ {
+ "checksum": "b97d36400fafea8f8f6670954d7ac139",
+ "size": 2685,
+ "uri": "https://{canondata_backend}/1936273/07450a3416f3c728f9a8a8fdde6e5f5a0ca2d9a6/resource.tar.gz#test.test_blocks-agg_singular_type_key-default.txt-Debug_/opt.yql"
+ }
+ ],
+ "test.test[blocks-agg_singular_type_key-default.txt-Peephole]": [
+ {
+ "checksum": "f77f6136a2995c5b0e0ae3ed00274d36",
+ "size": 6564,
+ "uri": "https://{canondata_backend}/1936273/07450a3416f3c728f9a8a8fdde6e5f5a0ca2d9a6/resource.tar.gz#test.test_blocks-agg_singular_type_key-default.txt-Peephole_/opt.yql"
+ }
+ ],
+ "test.test[blocks-agg_singular_type_key-default.txt-Results]": [
+ {
+ "checksum": "e2233558149bd3009f7f16412bf4838a",
+ "size": 6117,
+ "uri": "https://{canondata_backend}/1936273/07450a3416f3c728f9a8a8fdde6e5f5a0ca2d9a6/resource.tar.gz#test.test_blocks-agg_singular_type_key-default.txt-Results_/results.txt"
+ }
+ ],
+ "test.test[blocks-agg_singular_type_value-default.txt-Debug]": [
+ {
+ "checksum": "4733abf71c9c62e30af77c6490d59334",
+ "size": 2358,
+ "uri": "https://{canondata_backend}/1130705/a25045513209436069d9f9a29831b732c13e1675/resource.tar.gz#test.test_blocks-agg_singular_type_value-default.txt-Debug_/opt.yql"
+ }
+ ],
+ "test.test[blocks-agg_singular_type_value-default.txt-Peephole]": [
+ {
+ "checksum": "b34c4e8ca42e6232ef03f2ffeb05fd83",
+ "size": 3013,
+ "uri": "https://{canondata_backend}/1130705/a25045513209436069d9f9a29831b732c13e1675/resource.tar.gz#test.test_blocks-agg_singular_type_value-default.txt-Peephole_/opt.yql"
+ }
+ ],
+ "test.test[blocks-agg_singular_type_value-default.txt-Results]": [
+ {
+ "checksum": "4718805e72274809e4ac6c07ee8dfd7d",
+ "size": 3489,
+ "uri": "https://{canondata_backend}/1130705/a25045513209436069d9f9a29831b732c13e1675/resource.tar.gz#test.test_blocks-agg_singular_type_value-default.txt-Results_/results.txt"
+ }
+ ],
"test.test[blocks-and_scalar-default.txt-Debug]": [
{
"checksum": "e5ccc5c53756e09ded8e82b6d662e5e9",
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json
index f345b0e389d..0c9c63e1618 100644
--- a/yql/essentials/tests/sql/sql2yql/canondata/result.json
+++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json
@@ -1378,6 +1378,34 @@
"uri": "https://{canondata_backend}/1917492/7dd4bc86433f6173a26b62397e1ef41fa9471945/resource.tar.gz#test_sql2yql.test_blocks-agg_by_key_only_distinct_/sql.yql"
}
],
+ "test_sql2yql.test[blocks-agg_singular_type_key]": [
+ {
+ "checksum": "7cae7f556775597a0b451a875e77a1df",
+ "size": 7636,
+ "uri": "https://{canondata_backend}/1784117/5ff6ff6c0808bf39612567f492af1bc2db36da20/resource.tar.gz#test_sql2yql.test_blocks-agg_singular_type_key_/sql.yql"
+ }
+ ],
+ "test_sql2yql.test[blocks-agg_singular_type_key_optional]": [
+ {
+ "checksum": "a3f91d7949791561f4972eafc1610499",
+ "size": 7678,
+ "uri": "https://{canondata_backend}/1781765/9e1dc7f8aa95db55a59c09f397a0634224d08363/resource.tar.gz#test_sql2yql.test_blocks-agg_singular_type_key_optional_/sql.yql"
+ }
+ ],
+ "test_sql2yql.test[blocks-agg_singular_type_value]": [
+ {
+ "checksum": "cd58c3714a9d215fd1f4bea4e36f37a2",
+ "size": 3634,
+ "uri": "https://{canondata_backend}/1781765/9e1dc7f8aa95db55a59c09f397a0634224d08363/resource.tar.gz#test_sql2yql.test_blocks-agg_singular_type_value_/sql.yql"
+ }
+ ],
+ "test_sql2yql.test[blocks-agg_singular_type_value_optional]": [
+ {
+ "checksum": "5ae70db766241594bfea9edd5e5dec34",
+ "size": 3676,
+ "uri": "https://{canondata_backend}/1781765/0dce37dc71c65fe553d73ed7cf98a62bdee9ddee/resource.tar.gz#test_sql2yql.test_blocks-agg_singular_type_value_optional_/sql.yql"
+ }
+ ],
"test_sql2yql.test[blocks-and]": [
{
"checksum": "e22a52b51ef20174c3b832acb09df01b",
@@ -1410,7 +1438,7 @@
{
"checksum": "601f02d489707b615a9ff16a4fe1d3f5",
"size": 1304,
- "uri": "https://{canondata_backend}/1900335/c447765ddbde200b8fe3ee8091f4d625b36b6bc6/resource.tar.gz#test_sql2yql.test_blocks-as_tuple_/sql.yql"
+ "uri": "https://{canondata_backend}/1784826/bb2033aff3202d2b68e04361e6d1bacbf4cbbed6/resource.tar.gz#test_sql2yql.test_blocks-as_tuple_/sql.yql"
}
],
"test_sql2yql.test[blocks-coalesce]": [
@@ -8299,6 +8327,26 @@
"uri": "file://test_sql_format.test_blocks-agg_by_key_only_distinct_/formatted.sql"
}
],
+ "test_sql_format.test[blocks-agg_singular_type_key]": [
+ {
+ "uri": "file://test_sql_format.test_blocks-agg_singular_type_key_/formatted.sql"
+ }
+ ],
+ "test_sql_format.test[blocks-agg_singular_type_key_optional]": [
+ {
+ "uri": "file://test_sql_format.test_blocks-agg_singular_type_key_optional_/formatted.sql"
+ }
+ ],
+ "test_sql_format.test[blocks-agg_singular_type_value]": [
+ {
+ "uri": "file://test_sql_format.test_blocks-agg_singular_type_value_/formatted.sql"
+ }
+ ],
+ "test_sql_format.test[blocks-agg_singular_type_value_optional]": [
+ {
+ "uri": "file://test_sql_format.test_blocks-agg_singular_type_value_optional_/formatted.sql"
+ }
+ ],
"test_sql_format.test[blocks-and]": [
{
"uri": "file://test_sql_format.test_blocks-and_/formatted.sql"
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_key_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_key_/formatted.sql
new file mode 100644
index 00000000000..fd6c96da8f3
--- /dev/null
+++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_key_/formatted.sql
@@ -0,0 +1,72 @@
+PRAGMA config.flags('PeepholeFlags', 'UseAggPhases');
+
+$n = 3;
+
+$data = ListMap(
+ ListFromRange(1, $n), ($x) -> (
+ <|
+ idx: $x,
+ empty_list: [],
+ empty_dict: {},
+ nil: NULL,
+ val: $x + 5,
+ vid: Void(),
+ emtpy_tuple: AsTuple(),
+ empty_struct: AsStruct()
+ |>
+ )
+);
+
+SELECT
+ empty_list,
+ SOME(idx)
+FROM
+ as_table($data)
+GROUP BY
+ empty_list
+;
+
+SELECT
+ empty_dict,
+ SOME(idx)
+FROM
+ as_table($data)
+GROUP BY
+ empty_dict
+;
+
+SELECT
+ nil,
+ SOME(idx)
+FROM
+ as_table($data)
+GROUP BY
+ nil
+;
+
+SELECT
+ vid,
+ SOME(idx)
+FROM
+ as_table($data)
+GROUP BY
+ vid
+;
+
+SELECT
+ emtpy_tuple,
+ SOME(idx)
+FROM
+ as_table($data)
+GROUP BY
+ emtpy_tuple
+;
+
+SELECT
+ empty_struct,
+ SOME(idx)
+FROM
+ as_table($data)
+GROUP BY
+ empty_struct
+;
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_key_optional_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_key_optional_/formatted.sql
new file mode 100644
index 00000000000..401f8117f5b
--- /dev/null
+++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_key_optional_/formatted.sql
@@ -0,0 +1,72 @@
+PRAGMA config.flags('PeepholeFlags', 'UseAggPhases');
+
+$n = 3;
+
+$data = ListMap(
+ ListFromRange(1, $n), ($x) -> (
+ <|
+ idx: $x,
+ empty_list: Just([]),
+ empty_dict: Just({}),
+ nil: Just(NULL),
+ val: $x + 5,
+ vid: Just(Void()),
+ emtpy_tuple: Just(AsTuple()),
+ empty_struct: Just(AsStruct())
+ |>
+ )
+);
+
+SELECT
+ empty_list,
+ SOME(idx)
+FROM
+ as_table($data)
+GROUP BY
+ empty_list
+;
+
+SELECT
+ empty_dict,
+ SOME(idx)
+FROM
+ as_table($data)
+GROUP BY
+ empty_dict
+;
+
+SELECT
+ nil,
+ SOME(idx)
+FROM
+ as_table($data)
+GROUP BY
+ nil
+;
+
+SELECT
+ vid,
+ SOME(idx)
+FROM
+ as_table($data)
+GROUP BY
+ vid
+;
+
+SELECT
+ emtpy_tuple,
+ SOME(idx)
+FROM
+ as_table($data)
+GROUP BY
+ emtpy_tuple
+;
+
+SELECT
+ empty_struct,
+ SOME(idx)
+FROM
+ as_table($data)
+GROUP BY
+ empty_struct
+;
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_value_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_value_/formatted.sql
new file mode 100644
index 00000000000..f8836f06f6a
--- /dev/null
+++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_value_/formatted.sql
@@ -0,0 +1,33 @@
+PRAGMA config.flags('PeepholeFlags', 'UseAggPhases');
+
+$n = 3;
+
+$data = ListMap(
+ ListFromRange(1, $n), ($x) -> (
+ <|
+ idx: $x,
+ empty_list: [],
+ empty_dict: {},
+ nil: NULL,
+ val: $x + 5,
+ vid: Void(),
+ emtpy_tuple: AsTuple(),
+ empty_struct: AsStruct()
+ |>
+ )
+);
+
+SELECT
+ idx,
+ SOME(empty_dict),
+ SOME(empty_list),
+ SOME(nil),
+ SOME(empty_dict),
+ SOME(vid),
+ SOME(emtpy_tuple),
+ SOME(empty_struct),
+FROM
+ as_table($data)
+GROUP BY
+ idx
+;
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_value_optional_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_value_optional_/formatted.sql
new file mode 100644
index 00000000000..258de29fb0e
--- /dev/null
+++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_blocks-agg_singular_type_value_optional_/formatted.sql
@@ -0,0 +1,33 @@
+PRAGMA config.flags('PeepholeFlags', 'UseAggPhases');
+
+$n = 3;
+
+$data = ListMap(
+ ListFromRange(1, $n), ($x) -> (
+ <|
+ idx: $x,
+ empty_list: Just([]),
+ empty_dict: Just({}),
+ nil: Just(NULL),
+ val: $x + 5,
+ vid: Just(Void()),
+ emtpy_tuple: Just(AsTuple()),
+ empty_struct: Just(AsStruct())
+ |>
+ )
+);
+
+SELECT
+ idx,
+ SOME(empty_dict),
+ SOME(empty_list),
+ SOME(nil),
+ SOME(empty_dict),
+ SOME(vid),
+ SOME(emtpy_tuple),
+ SOME(empty_struct),
+FROM
+ as_table($data)
+GROUP BY
+ idx
+;
diff --git a/yql/essentials/tests/sql/suites/blocks/agg_singular_type_key.sql b/yql/essentials/tests/sql/suites/blocks/agg_singular_type_key.sql
new file mode 100644
index 00000000000..34c1f319624
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/blocks/agg_singular_type_key.sql
@@ -0,0 +1,23 @@
+PRAGMA config.flags('PeepholeFlags', 'UseAggPhases');
+
+$n = 3;
+$data = ListMap(ListFromRange(1, $n), ($x) -> (<|idx: $x,
+ empty_list: [],
+ empty_dict: {},
+ nil: NULL,
+ val: $x + 5,
+ vid: Void(),
+ emtpy_tuple: AsTuple(),
+ empty_struct: AsStruct()|>));
+
+SELECT empty_list, SOME(idx) FROM as_table($data) GROUP BY empty_list;
+
+SELECT empty_dict, SOME(idx) FROM as_table($data) GROUP BY empty_dict;
+
+SELECT nil, SOME(idx) FROM as_table($data) GROUP BY nil;
+
+SELECT vid, SOME(idx) FROM as_table($data) GROUP BY vid;
+
+SELECT emtpy_tuple, SOME(idx) FROM as_table($data) GROUP BY emtpy_tuple;
+
+SELECT empty_struct, SOME(idx) FROM as_table($data) GROUP BY empty_struct;
diff --git a/yql/essentials/tests/sql/suites/blocks/agg_singular_type_key_optional.sql b/yql/essentials/tests/sql/suites/blocks/agg_singular_type_key_optional.sql
new file mode 100644
index 00000000000..0b86e48184d
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/blocks/agg_singular_type_key_optional.sql
@@ -0,0 +1,24 @@
+PRAGMA config.flags('PeepholeFlags', 'UseAggPhases');
+
+$n = 3;
+
+$data = ListMap(ListFromRange(1, $n), ($x) -> (<|idx: $x,
+ empty_list: Just([]),
+ empty_dict: Just({}),
+ nil: Just(NULL),
+ val: $x + 5,
+ vid: Just(Void()),
+ emtpy_tuple: Just(AsTuple()),
+ empty_struct: Just(AsStruct())|>));
+
+SELECT empty_list, SOME(idx) FROM as_table($data) GROUP BY empty_list;
+
+SELECT empty_dict, SOME(idx) FROM as_table($data) GROUP BY empty_dict;
+
+SELECT nil, SOME(idx) FROM as_table($data) GROUP BY nil;
+
+SELECT vid, SOME(idx) FROM as_table($data) GROUP BY vid;
+
+SELECT emtpy_tuple, SOME(idx) FROM as_table($data) GROUP BY emtpy_tuple;
+
+SELECT empty_struct, SOME(idx) FROM as_table($data) GROUP BY empty_struct;
diff --git a/yql/essentials/tests/sql/suites/blocks/agg_singular_type_value.sql b/yql/essentials/tests/sql/suites/blocks/agg_singular_type_value.sql
new file mode 100644
index 00000000000..ac290e2d25e
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/blocks/agg_singular_type_value.sql
@@ -0,0 +1,26 @@
+PRAGMA config.flags('PeepholeFlags', 'UseAggPhases');
+
+$n = 3;
+$data = ListMap(ListFromRange(1, $n), ($x) -> (<|idx: $x,
+ empty_list: [],
+ empty_dict: {},
+ nil: NULL,
+ val: $x + 5,
+ vid: Void(),
+ emtpy_tuple: AsTuple(),
+ empty_struct: AsStruct()|>));
+
+SELECT
+ idx,
+ SOME(empty_dict),
+ SOME(empty_list),
+ SOME(nil),
+ SOME(empty_dict),
+ SOME(vid),
+ SOME(emtpy_tuple),
+ SOME(empty_struct),
+FROM
+ as_table($data)
+GROUP BY
+ idx
+;
diff --git a/yql/essentials/tests/sql/suites/blocks/agg_singular_type_value_optional.sql b/yql/essentials/tests/sql/suites/blocks/agg_singular_type_value_optional.sql
new file mode 100644
index 00000000000..3214db10494
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/blocks/agg_singular_type_value_optional.sql
@@ -0,0 +1,27 @@
+PRAGMA config.flags('PeepholeFlags', 'UseAggPhases');
+
+$n = 3;
+$data = ListMap(ListFromRange(1, $n), ($x) -> (<|idx: $x,
+ empty_list: Just([]),
+ empty_dict: Just({}),
+ nil: Just(NULL),
+ val: $x + 5,
+ vid: Just(Void()),
+ emtpy_tuple: Just(AsTuple()),
+ empty_struct: Just(AsStruct())|>));
+
+SELECT
+ idx,
+ SOME(empty_dict),
+ SOME(empty_list),
+ SOME(nil),
+ SOME(empty_dict),
+ SOME(vid),
+ SOME(emtpy_tuple),
+ SOME(empty_struct),
+FROM
+ as_table($data)
+GROUP BY
+ idx
+;
+