diff options
| author | nadya02 <[email protected]> | 2025-04-30 17:42:56 +0300 |
|---|---|---|
| committer | nadya02 <[email protected]> | 2025-04-30 17:58:21 +0300 |
| commit | 188632f29527a147511b1e223d22a03a509ea49e (patch) | |
| tree | 5f939cc253b20d954db634995b678af5d2a1ceda | |
| parent | 816a0f4682fac9c75accacdd34ccca3d816e8761 (diff) | |
YT-24426: Fix optional date
* Changelog entry
Type: fix
Component: proxy
Fixing the reading of tables with date type columns in arrow format.
commit_hash:2b6c31c267c7bc3b3dba247387edce1713d3625a
| -rw-r--r-- | yt/yt/client/table_client/columnar-inl.h | 50 | ||||
| -rw-r--r-- | yt/yt/client/table_client/columnar.cpp | 17 | ||||
| -rw-r--r-- | yt/yt/client/table_client/columnar.h | 1 | ||||
| -rw-r--r-- | yt/yt/client/table_client/helpers.cpp | 18 | ||||
| -rw-r--r-- | yt/yt/client/table_client/helpers.h | 6 | ||||
| -rw-r--r-- | yt/yt/library/formats/arrow_writer.cpp | 30 |
6 files changed, 100 insertions, 22 deletions
diff --git a/yt/yt/client/table_client/columnar-inl.h b/yt/yt/client/table_client/columnar-inl.h index 64e7a91426e..ddb4f2c7058 100644 --- a/yt/yt/client/table_client/columnar-inl.h +++ b/yt/yt/client/table_client/columnar-inl.h @@ -4,6 +4,8 @@ #include "columnar.h" #endif +#include "helpers.h" + #include <library/cpp/yt/coding/zig_zag.h> namespace NYT::NTableClient { @@ -67,6 +69,7 @@ void DecodeVectorRleImpl( ui64 baseValue, TRange<ui32> dictionaryIndexes, TRange<ui64> rleIndexes, + TRef bitmap, TGetter getter, TConsumer consumer) { @@ -81,17 +84,30 @@ void DecodeVectorRleImpl( break; } decltype(getter(0)) currentValue; + bool isNull = false; if constexpr(WithDictionary) { auto dictionaryIndex = dictionaryIndexes[currentRleIndex]; if (dictionaryIndex == 0) { currentValue = {}; } else { - currentValue = getter(dictionaryIndex - 1); + if (bitmap && GetBit(bitmap, dictionaryIndex - 1)) { + isNull = true; + } else { + currentValue = getter(dictionaryIndex - 1); + } + } } else { - currentValue = getter(currentRleIndex); + if (bitmap && GetBit(bitmap, currentRleIndex)) { + isNull = true; + } else { + currentValue = getter(currentRleIndex); + } + } + currentDecodedValue = {}; + if (!isNull) { + currentDecodedValue = TValueDecoder<WithBaseValue, WithZigZag, T>::Run(currentValue, baseValue); } - currentDecodedValue = TValueDecoder<WithBaseValue, WithZigZag, T>::Run(currentValue, baseValue); ++currentRleIndex; thresholdIndex = currentRleIndex < std::ssize(rleIndexes) ? std::min(static_cast<i64>(rleIndexes[currentRleIndex]), endIndex) @@ -131,22 +147,36 @@ void DecodeVectorDirectImpl( i64 endIndex, ui64 baseValue, TRange<ui32> dictionaryIndexes, + TRef bitmap, TGetter getter, TConsumer consumer) { for (i64 index = startIndex; index < endIndex; ++index) { + bool isNull = false; decltype(getter(0)) value; if constexpr(WithDictionary) { auto dictionaryIndex = dictionaryIndexes[index]; if (dictionaryIndex == 0) { value = {}; } else { - value = getter(dictionaryIndex - 1); + if (bitmap && GetBit(bitmap, dictionaryIndex - 1)) { + isNull = true; + } else { + value = getter(dictionaryIndex - 1); + } } } else { - value = getter(index); + if (bitmap && GetBit(bitmap, index)) { + isNull = true; + } else { + value = getter(index); + } } - auto decodedValue = TValueDecoder<WithBaseValue, WithZigZag, T>::Run(value, baseValue); + decltype(getter(0)) decodedValue = {}; + if (!isNull) { + decodedValue = TValueDecoder<WithBaseValue, WithZigZag, T>::Run(value, baseValue); + } + consumer(decodedValue); } } @@ -172,6 +202,7 @@ void DecodeVectorImpl( ui64 baseValue, TRange<ui32> dictionaryIndexes, TRange<ui64> rleIndexes, + TRef bitmap, TGetter getter, TConsumer consumer) { @@ -182,6 +213,7 @@ void DecodeVectorImpl( baseValue, dictionaryIndexes, rleIndexes, + bitmap, std::forward<TGetter>(getter), std::forward<TConsumer>(consumer)); } else { @@ -190,6 +222,7 @@ void DecodeVectorImpl( endIndex, baseValue, dictionaryIndexes, + bitmap, std::forward<TGetter>(getter), std::forward<TConsumer>(consumer)); } @@ -258,6 +291,7 @@ void DecodeVector( bool zigZagEncoded, TRange<ui32> dictionaryIndexes, TRange<ui64> rleIndexes, + TRef bitmap, TGetter getter, TConsumer consumer) { @@ -271,6 +305,7 @@ void DecodeVector( baseValue, \ dictionaryIndexes, \ rleIndexes, \ + bitmap, \ std::forward<TGetter>(getter), \ std::forward<TConsumer>(consumer)); @@ -324,6 +359,7 @@ void DecodeIntegerVector( bool zigZagEncoded, TRange<ui32> dictionaryIndexes, TRange<ui64> rleIndexes, + TRef bitmap, TFetcher fetcher, TConsumer consumer) { @@ -334,6 +370,7 @@ void DecodeIntegerVector( zigZagEncoded, dictionaryIndexes, rleIndexes, + bitmap, std::forward<TFetcher>(fetcher), std::forward<TConsumer>(consumer)); } @@ -358,6 +395,7 @@ void DecodeRawVector( false, dictionaryIndexes, rleIndexes, + /*bitmap*/ {}, std::forward<TFetcher>(fetcher), std::forward<TConsumer>(consumer)); } diff --git a/yt/yt/client/table_client/columnar.cpp b/yt/yt/client/table_client/columnar.cpp index 5cfc89f80f5..dfa27d18a3d 100644 --- a/yt/yt/client/table_client/columnar.cpp +++ b/yt/yt/client/table_client/columnar.cpp @@ -1,4 +1,5 @@ #include "columnar.h" +#include "helpers.h" #include <yt/yt/library/numeric/algorithm_helpers.h> @@ -128,22 +129,6 @@ void CopyBitmapRangeToBitmapImpl( } } -bool GetBit(TRef bitmap, i64 index) -{ - return (bitmap[index >> 3] & (1U << (index & 7))) != 0; -} - -void SetBit(TMutableRef bitmap, i64 index, bool value) -{ - auto& byte = bitmap[index >> 3]; - auto mask = (1U << (index & 7)); - if (value) { - byte |= mask; - } else { - byte &= ~mask; - } -} - template <class F> void BuildBitmapFromRleImpl( TRange<ui64> rleIndexes, diff --git a/yt/yt/client/table_client/columnar.h b/yt/yt/client/table_client/columnar.h index 94958b5d49f..442284efc28 100644 --- a/yt/yt/client/table_client/columnar.h +++ b/yt/yt/client/table_client/columnar.h @@ -211,6 +211,7 @@ void DecodeIntegerVector( bool zigZagEncoded, TRange<ui32> dictionaryIndexes, TRange<ui64> rleIndexes, + TRef bitmap, TFetcher fetcher, TConsumer consumer); diff --git a/yt/yt/client/table_client/helpers.cpp b/yt/yt/client/table_client/helpers.cpp index b2eabd6d0aa..10755ecbd09 100644 --- a/yt/yt/client/table_client/helpers.cpp +++ b/yt/yt/client/table_client/helpers.cpp @@ -1669,4 +1669,22 @@ TUnversionedValueRangeTruncationResult TruncateUnversionedValues( //////////////////////////////////////////////////////////////////////////////// +bool GetBit(TRef bitmap, i64 index) +{ + return (bitmap[index >> 3] & (1U << (index & 7))) != 0; +} + +void SetBit(TMutableRef bitmap, i64 index, bool value) +{ + auto& byte = bitmap[index >> 3]; + auto mask = (1U << (index & 7)); + if (value) { + byte |= mask; + } else { + byte &= ~mask; + } +} + +//////////////////////////////////////////////////////////////////////////////// + } // namespace NYT::NTableClient diff --git a/yt/yt/client/table_client/helpers.h b/yt/yt/client/table_client/helpers.h index 3b85e1edfb0..6ea310ac9b5 100644 --- a/yt/yt/client/table_client/helpers.h +++ b/yt/yt/client/table_client/helpers.h @@ -392,6 +392,12 @@ TUnversionedValueRangeTruncationResult TruncateUnversionedValues(TUnversionedVal //////////////////////////////////////////////////////////////////////////////// +bool GetBit(TRef bitmap, i64 index); + +void SetBit(TMutableRef bitmap, i64 index, bool value); + +//////////////////////////////////////////////////////////////////////////////// + } // namespace NYT::NTableClient #define HELPERS_INL_H_ diff --git a/yt/yt/library/formats/arrow_writer.cpp b/yt/yt/library/formats/arrow_writer.cpp index 377ecc8634a..5c5483491a2 100644 --- a/yt/yt/library/formats/arrow_writer.cpp +++ b/yt/yt/library/formats/arrow_writer.cpp @@ -171,6 +171,10 @@ int ExtractTableIndexFromColumn(const TBatchColumn* column) const auto* valueColumn = column->Rle->ValueColumn; auto values = valueColumn->GetTypedValues<ui64>(); + TRef nullBitmap; + if (valueColumn->NullBitmap) { + nullBitmap = valueColumn->NullBitmap->Data; + } // Expecting only one element. YT_VERIFY(values.size() == 1); @@ -187,12 +191,14 @@ int ExtractTableIndexFromColumn(const TBatchColumn* column) valueColumn->Values->ZigZagEncoded, TRange<ui32>(), rleIndexes, + nullBitmap, [&] (auto index) { return values[index]; }, [&] (auto value) { tableIndex = value; }); + return tableIndex; } @@ -497,6 +503,11 @@ void SerializeIntegerColumn( auto startIndex = column->StartIndex; + TRef nullBitmap; + if (valueColumn->NullBitmap) { + nullBitmap = valueColumn->NullBitmap->Data; + } + switch (simpleType) { #define XX(cppType, ytType) \ case ESimpleLogicalValueType::ytType: { \ @@ -509,6 +520,7 @@ void SerializeIntegerColumn( valueColumn->Values->ZigZagEncoded, \ TRange<ui32>(), \ rleIndexes, \ + nullBitmap, \ [&] (auto index) { \ return values[index]; \ }, \ @@ -565,6 +577,11 @@ void SerializeDateColumn( ? column->GetTypedValues<ui64>() : TRange<ui64>(); + TRef nullBitmap; + if (valueColumn->NullBitmap) { + nullBitmap = valueColumn->NullBitmap->Data; + } + auto startIndex = column->StartIndex; auto dstValues = GetTypedValues<i32>(dstRef); @@ -576,6 +593,7 @@ void SerializeDateColumn( valueColumn->Values->ZigZagEncoded, TRange<ui32>(), rleIndexes, + nullBitmap, [&] (auto index) { return values[index]; }, @@ -616,6 +634,11 @@ void SerializeDatetimeColumn( ? column->GetTypedValues<ui64>() : TRange<ui64>(); + TRef nullBitmap; + if (valueColumn->NullBitmap) { + nullBitmap = valueColumn->NullBitmap->Data; + } + auto startIndex = column->StartIndex; auto dstValues = GetTypedValues<i64>(dstRef); @@ -627,6 +650,7 @@ void SerializeDatetimeColumn( valueColumn->Values->ZigZagEncoded, TRange<ui32>(), rleIndexes, + nullBitmap, [&] (auto index) { return values[index]; }, @@ -666,6 +690,11 @@ void SerializeTimestampColumn( ? column->GetTypedValues<ui64>() : TRange<ui64>(); + TRef nullBitmap; + if (valueColumn->NullBitmap) { + nullBitmap = valueColumn->NullBitmap->Data; + } + auto startIndex = column->StartIndex; auto dstValues = GetTypedValues<i64>(dstRef); @@ -677,6 +706,7 @@ void SerializeTimestampColumn( valueColumn->Values->ZigZagEncoded, TRange<ui32>(), rleIndexes, + nullBitmap, [&] (auto index) { return values[index]; }, |
