summaryrefslogtreecommitdiffstats
path: root/yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp
diff options
context:
space:
mode:
authorgrigoriypisar <[email protected]>2025-03-19 16:51:27 +0300
committergrigoriypisar <[email protected]>2025-03-19 17:17:00 +0300
commit8abfb3cd4a6f7251be9cf73563349a97dc8f8ea0 (patch)
treeb4e3e6ccd8526a5822b0e1cb76d014657af26e25 /yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp
parent5ccc45c97200ac3e0cd3464f37fb931163ac0214 (diff)
YQL mkql blocks transport, added triming by MinFillPercentage
Added block trimming in transport commit_hash:9794613300322045a81a9b40d4ebe519d30937a4
Diffstat (limited to 'yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp')
-rw-r--r--yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp42
1 files changed, 31 insertions, 11 deletions
diff --git a/yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp b/yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp
index 5bb0b980054..cc175f3b7d8 100644
--- a/yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp
+++ b/yql/essentials/minikql/computation/mkql_computation_node_pack_ut.cpp
@@ -662,9 +662,14 @@ protected:
ui64 Len = 0;
bool LegacyStruct = false;
bool TrimBlock = false;
+ TMaybe<ui8> MinFillPercentage;
TString ToString() const {
- return TStringBuilder() << "Offset: " << Offset << ", Len: " << Len << ", LegacyStruct: " << LegacyStruct << ", TrimBlock: " << TrimBlock;
+ auto result = TStringBuilder() << "Offset: " << Offset << ", Len: " << Len << ", LegacyStruct: " << LegacyStruct << ", TrimBlock: " << TrimBlock;
+ if (MinFillPercentage) {
+ result << ", MinFillPercentage: " << ui64(*MinFillPercentage);
+ }
+ return result;
}
};
@@ -749,7 +754,6 @@ protected:
auto builder4 = MakeArrayBuilder(TTypeInfoHelper(), tzDateType, *ArrowPool_, CalcBlockLen(CalcMaxBlockItemSize(tzDateType)), nullptr);
auto builder5 = MakeArrayBuilder(TTypeInfoHelper(), nullType, *ArrowPool_, CalcBlockLen(CalcMaxBlockItemSize(nullType)), nullptr);
-
for (ui32 i = 0; i < blockLen; ++i) {
TBlockItem b1(i);
builder1->Add(b1);
@@ -790,24 +794,28 @@ protected:
datums.emplace_back(arrow::Datum(std::make_shared<arrow::UInt64Scalar>(blockLen)));
}
+ const ui32 blockLenIndex = legacyStruct ? 2 : 6;
if (offset != 0 || len != blockLen) {
for (auto& datum : datums) {
if (datum.is_array()) {
datum = NYql::NUdf::DeepSlice(datum.array(), offset, len);
}
}
+ datums[blockLenIndex] = arrow::Datum(std::make_shared<arrow::UInt64Scalar>(len));
}
+
+ const auto trimmerFactory = [&](ui32 index) {
+ const TType* columnType = legacyStruct ? static_cast<const TStructType*>(rowType)->GetMemberType(index)
+ : static_cast<const TMultiType*>(rowType)->GetElementType(index);
+ return MakeBlockTrimmer(NMiniKQL::TTypeInfoHelper(), static_cast<const TBlockType*>(columnType)->GetItemType(), ArrowPool_);
+ };
if (args.TrimBlock) {
for (ui32 index = 0; index < datums.size(); ++index) {
auto& datum = datums[index];
if (!datum.is_array()) {
continue;
}
-
- const TType* columnType = legacyStruct ? static_cast<const TStructType*>(rowType)->GetMemberType(index)
- : static_cast<const TMultiType*>(rowType)->GetElementType(index);
- const auto trimmer = MakeBlockTrimmer(NMiniKQL::TTypeInfoHelper(), static_cast<const TBlockType*>(columnType)->GetItemType(), ArrowPool_);
- datum = trimmer->Trim(datum.array());
+ datum = trimmerFactory(index)->Trim(datum.array());
}
}
TUnboxedValueVector columns;
@@ -815,7 +823,7 @@ protected:
columns.emplace_back(HolderFactory.CreateArrowBlock(std::move(datum)));
}
- TValuePackerType packer(false, rowType, ArrowPool_);
+ TValuePackerType packer(false, rowType, ArrowPool_, args.MinFillPercentage);
if (legacyStruct) {
TUnboxedValueVector columnsCopy = columns;
NUdf::TUnboxedValue row = HolderFactory.VectorAsArray(columnsCopy);
@@ -842,13 +850,24 @@ protected:
UNIT_ASSERT_VALUES_EQUAL(unpackedColumns.size(), columns.size());
if (legacyStruct) {
- UNIT_ASSERT_VALUES_EQUAL(TArrowBlock::From(unpackedColumns[2]).GetDatum().scalar_as<arrow::UInt64Scalar>().value, blockLen);
+ UNIT_ASSERT_VALUES_EQUAL(TArrowBlock::From(unpackedColumns[2]).GetDatum().scalar_as<arrow::UInt64Scalar>().value, len);
UNIT_ASSERT_VALUES_EQUAL(TArrowBlock::From(unpackedColumns[3]).GetDatum().scalar_as<arrow::BinaryScalar>().value->ToString(), testScalarString);
} else {
- UNIT_ASSERT_VALUES_EQUAL(TArrowBlock::From(unpackedColumns.back()).GetDatum().scalar_as<arrow::UInt64Scalar>().value, blockLen);
+ UNIT_ASSERT_VALUES_EQUAL(TArrowBlock::From(unpackedColumns.back()).GetDatum().scalar_as<arrow::UInt64Scalar>().value, len);
UNIT_ASSERT_VALUES_EQUAL(TArrowBlock::From(unpackedColumns[2]).GetDatum().scalar_as<arrow::BinaryScalar>().value->ToString(), testScalarString);
}
+ if (args.MinFillPercentage) {
+ for (size_t i = 0; i < unpackedColumns.size(); ++i) {
+ auto datum = TArrowBlock::From(unpackedColumns[i]).GetDatum();
+ if (datum.is_scalar()) {
+ continue;
+ }
+ const auto unpackedSize = NUdf::GetSizeOfArrayDataInBytes(*datum.array());
+ const auto trimmedSize = NUdf::GetSizeOfArrayDataInBytes(*trimmerFactory(i)->Trim(datum.array()));
+ UNIT_ASSERT_GE_C(trimmedSize, unpackedSize * *args.MinFillPercentage / 100, "column: " << i);
+ }
+ }
auto reader1 = MakeBlockReader(TTypeInfoHelper(), ui32Type);
auto reader2 = MakeBlockReader(TTypeInfoHelper(), optStrType);
@@ -914,7 +933,8 @@ protected:
{.Offset = 19, .Len = 623}
}),
MakeIntrusive<TArgsDispatcher<bool>>(args.LegacyStruct, std::vector<bool>{false, true}),
- MakeIntrusive<TArgsDispatcher<bool>>(args.TrimBlock, std::vector<bool>{false, true})
+ MakeIntrusive<TArgsDispatcher<bool>>(args.TrimBlock, std::vector<bool>{false, true}),
+ MakeIntrusive<TArgsDispatcher<TMaybe<ui8>>>(args.MinFillPercentage, std::vector<TMaybe<ui8>>{Nothing(), 90})
});
}