summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornadya02 <[email protected]>2025-04-30 17:42:56 +0300
committernadya02 <[email protected]>2025-04-30 17:58:21 +0300
commit188632f29527a147511b1e223d22a03a509ea49e (patch)
tree5f939cc253b20d954db634995b678af5d2a1ceda
parent816a0f4682fac9c75accacdd34ccca3d816e8761 (diff)
YT-24426: Fix optional date
* Changelog entry Type: fix Component: proxy Fixing the reading of tables with date type columns in arrow format. commit_hash:2b6c31c267c7bc3b3dba247387edce1713d3625a
-rw-r--r--yt/yt/client/table_client/columnar-inl.h50
-rw-r--r--yt/yt/client/table_client/columnar.cpp17
-rw-r--r--yt/yt/client/table_client/columnar.h1
-rw-r--r--yt/yt/client/table_client/helpers.cpp18
-rw-r--r--yt/yt/client/table_client/helpers.h6
-rw-r--r--yt/yt/library/formats/arrow_writer.cpp30
6 files changed, 100 insertions, 22 deletions
diff --git a/yt/yt/client/table_client/columnar-inl.h b/yt/yt/client/table_client/columnar-inl.h
index 64e7a91426e..ddb4f2c7058 100644
--- a/yt/yt/client/table_client/columnar-inl.h
+++ b/yt/yt/client/table_client/columnar-inl.h
@@ -4,6 +4,8 @@
#include "columnar.h"
#endif
+#include "helpers.h"
+
#include <library/cpp/yt/coding/zig_zag.h>
namespace NYT::NTableClient {
@@ -67,6 +69,7 @@ void DecodeVectorRleImpl(
ui64 baseValue,
TRange<ui32> dictionaryIndexes,
TRange<ui64> rleIndexes,
+ TRef bitmap,
TGetter getter,
TConsumer consumer)
{
@@ -81,17 +84,30 @@ void DecodeVectorRleImpl(
break;
}
decltype(getter(0)) currentValue;
+ bool isNull = false;
if constexpr(WithDictionary) {
auto dictionaryIndex = dictionaryIndexes[currentRleIndex];
if (dictionaryIndex == 0) {
currentValue = {};
} else {
- currentValue = getter(dictionaryIndex - 1);
+ if (bitmap && GetBit(bitmap, dictionaryIndex - 1)) {
+ isNull = true;
+ } else {
+ currentValue = getter(dictionaryIndex - 1);
+ }
+
}
} else {
- currentValue = getter(currentRleIndex);
+ if (bitmap && GetBit(bitmap, currentRleIndex)) {
+ isNull = true;
+ } else {
+ currentValue = getter(currentRleIndex);
+ }
+ }
+ currentDecodedValue = {};
+ if (!isNull) {
+ currentDecodedValue = TValueDecoder<WithBaseValue, WithZigZag, T>::Run(currentValue, baseValue);
}
- currentDecodedValue = TValueDecoder<WithBaseValue, WithZigZag, T>::Run(currentValue, baseValue);
++currentRleIndex;
thresholdIndex = currentRleIndex < std::ssize(rleIndexes)
? std::min(static_cast<i64>(rleIndexes[currentRleIndex]), endIndex)
@@ -131,22 +147,36 @@ void DecodeVectorDirectImpl(
i64 endIndex,
ui64 baseValue,
TRange<ui32> dictionaryIndexes,
+ TRef bitmap,
TGetter getter,
TConsumer consumer)
{
for (i64 index = startIndex; index < endIndex; ++index) {
+ bool isNull = false;
decltype(getter(0)) value;
if constexpr(WithDictionary) {
auto dictionaryIndex = dictionaryIndexes[index];
if (dictionaryIndex == 0) {
value = {};
} else {
- value = getter(dictionaryIndex - 1);
+ if (bitmap && GetBit(bitmap, dictionaryIndex - 1)) {
+ isNull = true;
+ } else {
+ value = getter(dictionaryIndex - 1);
+ }
}
} else {
- value = getter(index);
+ if (bitmap && GetBit(bitmap, index)) {
+ isNull = true;
+ } else {
+ value = getter(index);
+ }
}
- auto decodedValue = TValueDecoder<WithBaseValue, WithZigZag, T>::Run(value, baseValue);
+ decltype(getter(0)) decodedValue = {};
+ if (!isNull) {
+ decodedValue = TValueDecoder<WithBaseValue, WithZigZag, T>::Run(value, baseValue);
+ }
+
consumer(decodedValue);
}
}
@@ -172,6 +202,7 @@ void DecodeVectorImpl(
ui64 baseValue,
TRange<ui32> dictionaryIndexes,
TRange<ui64> rleIndexes,
+ TRef bitmap,
TGetter getter,
TConsumer consumer)
{
@@ -182,6 +213,7 @@ void DecodeVectorImpl(
baseValue,
dictionaryIndexes,
rleIndexes,
+ bitmap,
std::forward<TGetter>(getter),
std::forward<TConsumer>(consumer));
} else {
@@ -190,6 +222,7 @@ void DecodeVectorImpl(
endIndex,
baseValue,
dictionaryIndexes,
+ bitmap,
std::forward<TGetter>(getter),
std::forward<TConsumer>(consumer));
}
@@ -258,6 +291,7 @@ void DecodeVector(
bool zigZagEncoded,
TRange<ui32> dictionaryIndexes,
TRange<ui64> rleIndexes,
+ TRef bitmap,
TGetter getter,
TConsumer consumer)
{
@@ -271,6 +305,7 @@ void DecodeVector(
baseValue, \
dictionaryIndexes, \
rleIndexes, \
+ bitmap, \
std::forward<TGetter>(getter), \
std::forward<TConsumer>(consumer));
@@ -324,6 +359,7 @@ void DecodeIntegerVector(
bool zigZagEncoded,
TRange<ui32> dictionaryIndexes,
TRange<ui64> rleIndexes,
+ TRef bitmap,
TFetcher fetcher,
TConsumer consumer)
{
@@ -334,6 +370,7 @@ void DecodeIntegerVector(
zigZagEncoded,
dictionaryIndexes,
rleIndexes,
+ bitmap,
std::forward<TFetcher>(fetcher),
std::forward<TConsumer>(consumer));
}
@@ -358,6 +395,7 @@ void DecodeRawVector(
false,
dictionaryIndexes,
rleIndexes,
+ /*bitmap*/ {},
std::forward<TFetcher>(fetcher),
std::forward<TConsumer>(consumer));
}
diff --git a/yt/yt/client/table_client/columnar.cpp b/yt/yt/client/table_client/columnar.cpp
index 5cfc89f80f5..dfa27d18a3d 100644
--- a/yt/yt/client/table_client/columnar.cpp
+++ b/yt/yt/client/table_client/columnar.cpp
@@ -1,4 +1,5 @@
#include "columnar.h"
+#include "helpers.h"
#include <yt/yt/library/numeric/algorithm_helpers.h>
@@ -128,22 +129,6 @@ void CopyBitmapRangeToBitmapImpl(
}
}
-bool GetBit(TRef bitmap, i64 index)
-{
- return (bitmap[index >> 3] & (1U << (index & 7))) != 0;
-}
-
-void SetBit(TMutableRef bitmap, i64 index, bool value)
-{
- auto& byte = bitmap[index >> 3];
- auto mask = (1U << (index & 7));
- if (value) {
- byte |= mask;
- } else {
- byte &= ~mask;
- }
-}
-
template <class F>
void BuildBitmapFromRleImpl(
TRange<ui64> rleIndexes,
diff --git a/yt/yt/client/table_client/columnar.h b/yt/yt/client/table_client/columnar.h
index 94958b5d49f..442284efc28 100644
--- a/yt/yt/client/table_client/columnar.h
+++ b/yt/yt/client/table_client/columnar.h
@@ -211,6 +211,7 @@ void DecodeIntegerVector(
bool zigZagEncoded,
TRange<ui32> dictionaryIndexes,
TRange<ui64> rleIndexes,
+ TRef bitmap,
TFetcher fetcher,
TConsumer consumer);
diff --git a/yt/yt/client/table_client/helpers.cpp b/yt/yt/client/table_client/helpers.cpp
index b2eabd6d0aa..10755ecbd09 100644
--- a/yt/yt/client/table_client/helpers.cpp
+++ b/yt/yt/client/table_client/helpers.cpp
@@ -1669,4 +1669,22 @@ TUnversionedValueRangeTruncationResult TruncateUnversionedValues(
////////////////////////////////////////////////////////////////////////////////
+bool GetBit(TRef bitmap, i64 index)
+{
+ return (bitmap[index >> 3] & (1U << (index & 7))) != 0;
+}
+
+void SetBit(TMutableRef bitmap, i64 index, bool value)
+{
+ auto& byte = bitmap[index >> 3];
+ auto mask = (1U << (index & 7));
+ if (value) {
+ byte |= mask;
+ } else {
+ byte &= ~mask;
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
} // namespace NYT::NTableClient
diff --git a/yt/yt/client/table_client/helpers.h b/yt/yt/client/table_client/helpers.h
index 3b85e1edfb0..6ea310ac9b5 100644
--- a/yt/yt/client/table_client/helpers.h
+++ b/yt/yt/client/table_client/helpers.h
@@ -392,6 +392,12 @@ TUnversionedValueRangeTruncationResult TruncateUnversionedValues(TUnversionedVal
////////////////////////////////////////////////////////////////////////////////
+bool GetBit(TRef bitmap, i64 index);
+
+void SetBit(TMutableRef bitmap, i64 index, bool value);
+
+////////////////////////////////////////////////////////////////////////////////
+
} // namespace NYT::NTableClient
#define HELPERS_INL_H_
diff --git a/yt/yt/library/formats/arrow_writer.cpp b/yt/yt/library/formats/arrow_writer.cpp
index 377ecc8634a..5c5483491a2 100644
--- a/yt/yt/library/formats/arrow_writer.cpp
+++ b/yt/yt/library/formats/arrow_writer.cpp
@@ -171,6 +171,10 @@ int ExtractTableIndexFromColumn(const TBatchColumn* column)
const auto* valueColumn = column->Rle->ValueColumn;
auto values = valueColumn->GetTypedValues<ui64>();
+ TRef nullBitmap;
+ if (valueColumn->NullBitmap) {
+ nullBitmap = valueColumn->NullBitmap->Data;
+ }
// Expecting only one element.
YT_VERIFY(values.size() == 1);
@@ -187,12 +191,14 @@ int ExtractTableIndexFromColumn(const TBatchColumn* column)
valueColumn->Values->ZigZagEncoded,
TRange<ui32>(),
rleIndexes,
+ nullBitmap,
[&] (auto index) {
return values[index];
},
[&] (auto value) {
tableIndex = value;
});
+
return tableIndex;
}
@@ -497,6 +503,11 @@ void SerializeIntegerColumn(
auto startIndex = column->StartIndex;
+ TRef nullBitmap;
+ if (valueColumn->NullBitmap) {
+ nullBitmap = valueColumn->NullBitmap->Data;
+ }
+
switch (simpleType) {
#define XX(cppType, ytType) \
case ESimpleLogicalValueType::ytType: { \
@@ -509,6 +520,7 @@ void SerializeIntegerColumn(
valueColumn->Values->ZigZagEncoded, \
TRange<ui32>(), \
rleIndexes, \
+ nullBitmap, \
[&] (auto index) { \
return values[index]; \
}, \
@@ -565,6 +577,11 @@ void SerializeDateColumn(
? column->GetTypedValues<ui64>()
: TRange<ui64>();
+ TRef nullBitmap;
+ if (valueColumn->NullBitmap) {
+ nullBitmap = valueColumn->NullBitmap->Data;
+ }
+
auto startIndex = column->StartIndex;
auto dstValues = GetTypedValues<i32>(dstRef);
@@ -576,6 +593,7 @@ void SerializeDateColumn(
valueColumn->Values->ZigZagEncoded,
TRange<ui32>(),
rleIndexes,
+ nullBitmap,
[&] (auto index) {
return values[index];
},
@@ -616,6 +634,11 @@ void SerializeDatetimeColumn(
? column->GetTypedValues<ui64>()
: TRange<ui64>();
+ TRef nullBitmap;
+ if (valueColumn->NullBitmap) {
+ nullBitmap = valueColumn->NullBitmap->Data;
+ }
+
auto startIndex = column->StartIndex;
auto dstValues = GetTypedValues<i64>(dstRef);
@@ -627,6 +650,7 @@ void SerializeDatetimeColumn(
valueColumn->Values->ZigZagEncoded,
TRange<ui32>(),
rleIndexes,
+ nullBitmap,
[&] (auto index) {
return values[index];
},
@@ -666,6 +690,11 @@ void SerializeTimestampColumn(
? column->GetTypedValues<ui64>()
: TRange<ui64>();
+ TRef nullBitmap;
+ if (valueColumn->NullBitmap) {
+ nullBitmap = valueColumn->NullBitmap->Data;
+ }
+
auto startIndex = column->StartIndex;
auto dstValues = GetTypedValues<i64>(dstRef);
@@ -677,6 +706,7 @@ void SerializeTimestampColumn(
valueColumn->Values->ZigZagEncoded,
TRange<ui32>(),
rleIndexes,
+ nullBitmap,
[&] (auto index) {
return values[index];
},