summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornadya73 <[email protected]>2023-10-03 22:30:48 +0300
committernadya73 <[email protected]>2023-10-03 22:53:44 +0300
commita33fdb9a34581fd124e92535153b1f1fdeca6aaf (patch)
treea3a0257d9efb0b89379c7ed344abb71b10e180a9
parent4275724293f2729fd92a2d2ea602e82c0237d283 (diff)
Add test for integer column serialization and fix row_index columns
-rw-r--r--yt/yt/client/arrow/arrow_row_stream_encoder.cpp9
-rw-r--r--yt/yt/library/formats/arrow_writer.cpp13
2 files changed, 12 insertions, 10 deletions
diff --git a/yt/yt/client/arrow/arrow_row_stream_encoder.cpp b/yt/yt/client/arrow/arrow_row_stream_encoder.cpp
index a0e8f7f5f55..b5988c33687 100644
--- a/yt/yt/client/arrow/arrow_row_stream_encoder.cpp
+++ b/yt/yt/client/arrow/arrow_row_stream_encoder.cpp
@@ -396,21 +396,22 @@ void SerializeIntegerColumn(
? column->GetTypedValues<ui64>()
: TRange<ui64>();
+ auto startIndex = column->StartIndex;
+
switch (simpleType) {
#define XX(cppType, ytType) \
case ESimpleLogicalValueType::ytType: { \
auto dstValues = GetTypedValues<cppType>(dstRef); \
auto* currentOutput = dstValues.Begin(); \
DecodeIntegerVector( \
- column->StartIndex, \
- column->StartIndex + column->ValueCount, \
+ startIndex, \
+ startIndex + column->ValueCount, \
valueColumn->Values->BaseValue, \
valueColumn->Values->ZigZagEncoded, \
TRange<ui32>(), \
rleIndexes, \
[&] (auto index) { \
- YT_VERIFY(index >= column->StartIndex); \
- return values[index - column->StartIndex]; \
+ return values[index]; \
}, \
[&] (auto value) { \
*currentOutput++ = value; \
diff --git a/yt/yt/library/formats/arrow_writer.cpp b/yt/yt/library/formats/arrow_writer.cpp
index 56bdec02358..cd8e720767b 100644
--- a/yt/yt/library/formats/arrow_writer.cpp
+++ b/yt/yt/library/formats/arrow_writer.cpp
@@ -399,21 +399,22 @@ void SerializeIntegerColumn(
? column->GetTypedValues<ui64>()
: TRange<ui64>();
+ auto startIndex = column->StartIndex;
+
switch (simpleType) {
#define XX(cppType, ytType) \
case ESimpleLogicalValueType::ytType: { \
auto dstValues = GetTypedValues<cppType>(dstRef); \
auto* currentOutput = dstValues.Begin(); \
DecodeIntegerVector( \
- column->StartIndex, \
- column->StartIndex + column->ValueCount, \
+ startIndex, \
+ startIndex + column->ValueCount, \
valueColumn->Values->BaseValue, \
valueColumn->Values->ZigZagEncoded, \
TRange<ui32>(), \
rleIndexes, \
[&] (auto index) { \
- YT_VERIFY(index >= column->StartIndex); \
- return values[index - column->StartIndex]; \
+ return values[index]; \
}, \
[&] (auto value) { \
*currentOutput++ = value; \
@@ -683,10 +684,10 @@ private:
{
auto columnarBatch = rowBatch->TryAsColumnar();
if (!columnarBatch) {
- YT_LOG_DEBUG("Encoding non-columnar batch; running write rows");
+ YT_LOG_DEBUG("Encoding non-columnar batch; running write rows (RowCount: %v)", rowBatch->GetRowCount());
DoWrite(rowBatch->MaterializeRows());
} else {
- YT_LOG_DEBUG("Encoding columnar batch");
+ YT_LOG_DEBUG("Encoding columnar batch (RowCount: %v)", rowBatch->GetRowCount());
Reset();
NumberOfRows_ = rowBatch->GetRowCount();
PrepareColumns(columnarBatch->MaterializeColumns());