diff options
author | nadya73 <[email protected]> | 2023-10-03 22:30:48 +0300 |
---|---|---|
committer | nadya73 <[email protected]> | 2023-10-03 22:53:44 +0300 |
commit | a33fdb9a34581fd124e92535153b1f1fdeca6aaf (patch) | |
tree | a3a0257d9efb0b89379c7ed344abb71b10e180a9 | |
parent | 4275724293f2729fd92a2d2ea602e82c0237d283 (diff) |
Add test for integer column serialization and fix row_index columns
-rw-r--r-- | yt/yt/client/arrow/arrow_row_stream_encoder.cpp | 9 | ||||
-rw-r--r-- | yt/yt/library/formats/arrow_writer.cpp | 13 |
2 files changed, 12 insertions, 10 deletions
diff --git a/yt/yt/client/arrow/arrow_row_stream_encoder.cpp b/yt/yt/client/arrow/arrow_row_stream_encoder.cpp index a0e8f7f5f55..b5988c33687 100644 --- a/yt/yt/client/arrow/arrow_row_stream_encoder.cpp +++ b/yt/yt/client/arrow/arrow_row_stream_encoder.cpp @@ -396,21 +396,22 @@ void SerializeIntegerColumn( ? column->GetTypedValues<ui64>() : TRange<ui64>(); + auto startIndex = column->StartIndex; + switch (simpleType) { #define XX(cppType, ytType) \ case ESimpleLogicalValueType::ytType: { \ auto dstValues = GetTypedValues<cppType>(dstRef); \ auto* currentOutput = dstValues.Begin(); \ DecodeIntegerVector( \ - column->StartIndex, \ - column->StartIndex + column->ValueCount, \ + startIndex, \ + startIndex + column->ValueCount, \ valueColumn->Values->BaseValue, \ valueColumn->Values->ZigZagEncoded, \ TRange<ui32>(), \ rleIndexes, \ [&] (auto index) { \ - YT_VERIFY(index >= column->StartIndex); \ - return values[index - column->StartIndex]; \ + return values[index]; \ }, \ [&] (auto value) { \ *currentOutput++ = value; \ diff --git a/yt/yt/library/formats/arrow_writer.cpp b/yt/yt/library/formats/arrow_writer.cpp index 56bdec02358..cd8e720767b 100644 --- a/yt/yt/library/formats/arrow_writer.cpp +++ b/yt/yt/library/formats/arrow_writer.cpp @@ -399,21 +399,22 @@ void SerializeIntegerColumn( ? column->GetTypedValues<ui64>() : TRange<ui64>(); + auto startIndex = column->StartIndex; + switch (simpleType) { #define XX(cppType, ytType) \ case ESimpleLogicalValueType::ytType: { \ auto dstValues = GetTypedValues<cppType>(dstRef); \ auto* currentOutput = dstValues.Begin(); \ DecodeIntegerVector( \ - column->StartIndex, \ - column->StartIndex + column->ValueCount, \ + startIndex, \ + startIndex + column->ValueCount, \ valueColumn->Values->BaseValue, \ valueColumn->Values->ZigZagEncoded, \ TRange<ui32>(), \ rleIndexes, \ [&] (auto index) { \ - YT_VERIFY(index >= column->StartIndex); \ - return values[index - column->StartIndex]; \ + return values[index]; \ }, \ [&] (auto value) { \ *currentOutput++ = value; \ @@ -683,10 +684,10 @@ private: { auto columnarBatch = rowBatch->TryAsColumnar(); if (!columnarBatch) { - YT_LOG_DEBUG("Encoding non-columnar batch; running write rows"); + YT_LOG_DEBUG("Encoding non-columnar batch; running write rows (RowCount: %v)", rowBatch->GetRowCount()); DoWrite(rowBatch->MaterializeRows()); } else { - YT_LOG_DEBUG("Encoding columnar batch"); + YT_LOG_DEBUG("Encoding columnar batch (RowCount: %v)", rowBatch->GetRowCount()); Reset(); NumberOfRows_ = rowBatch->GetRowCount(); PrepareColumns(columnarBatch->MaterializeColumns()); |