diff options
| author | nadya02 <[email protected]> | 2025-02-07 14:26:51 +0300 |
|---|---|---|
| committer | nadya02 <[email protected]> | 2025-02-07 14:55:04 +0300 |
| commit | 68b05620c95278208a216afdfec8ca8f7c2fa9c3 (patch) | |
| tree | f83d3ee9aa4f99e74544cb89a11bf3848e612429 | |
| parent | 2dc0b90341ba0adaa79f54a751f4e9fb8c1f2636 (diff) | |
YT-23828: Support native date types in arrow parser
* Changelog entry
Type: feature
Component: proxy
Support native date types in arrow parser.
commit_hash:dfeadd91a3b6aff92584b2334068fee20b51163d
| -rw-r--r-- | yt/yt/library/formats/arrow_parser.cpp | 418 | ||||
| -rw-r--r-- | yt/yt/library/formats/unittests/arrow_parser_ut.cpp | 129 |
2 files changed, 535 insertions, 12 deletions
diff --git a/yt/yt/library/formats/arrow_parser.cpp b/yt/yt/library/formats/arrow_parser.cpp index 7e73bd77988..4db86df939c 100644 --- a/yt/yt/library/formats/arrow_parser.cpp +++ b/yt/yt/library/formats/arrow_parser.cpp @@ -36,6 +36,39 @@ namespace { //////////////////////////////////////////////////////////////////////////////// +static constexpr i64 SecondsToMicroCoefficient = 1'000'000; +static constexpr i64 MilliToMicroCoefficient = 1'000; +static constexpr i64 MicroToNanoCoefficient = 1'000; +static constexpr i64 SecondsToMilliCoefficient = 1'000; + +//////////////////////////////////////////////////////////////////////////////// + +i64 SignedSaturationArithmeticMultiply(i64 lhs, i64 rhs) +{ + if (lhs == 0 || rhs == 0) { + return 0; + } + + i64 sign = 1; + if (lhs < 0) { + sign = -sign; + } + if (rhs < 0) { + sign = -sign; + } + + i64 result; + if (__builtin_mul_overflow(lhs, rhs, &result)) { + if (sign < 0) { + return std::numeric_limits<i64>::min(); + } else { + return std::numeric_limits<i64>::max(); + } + } else { + return result; + } +} + void ThrowOnError(const arrow::Status& status) { if (!status.ok()) { @@ -63,11 +96,43 @@ void CheckArrowTypeMatch( { switch (columnType) { case ESimpleLogicalValueType::Int8: + CheckArrowType( + columnType, + { + arrow::Type::INT8, + arrow::Type::DICTIONARY + }, + arrowTypeName, + arrowTypeId); + break; + case ESimpleLogicalValueType::Int16: + CheckArrowType( + columnType, + { + arrow::Type::INT8, + arrow::Type::INT16, + arrow::Type::DICTIONARY + }, + arrowTypeName, + arrowTypeId); + break; case ESimpleLogicalValueType::Int32: - case ESimpleLogicalValueType::Int64: + CheckArrowType( + columnType, + { + arrow::Type::INT8, + arrow::Type::INT16, + arrow::Type::INT32, + arrow::Type::DATE32, + arrow::Type::TIME32, + arrow::Type::DICTIONARY + }, + arrowTypeName, + arrowTypeId); + break; - case ESimpleLogicalValueType::Interval: + case ESimpleLogicalValueType::Int64: CheckArrowType( columnType, { @@ -86,9 +151,56 @@ void CheckArrowTypeMatch( arrowTypeId); break; + case ESimpleLogicalValueType::Interval: + CheckArrowType( + columnType, + { + arrow::Type::INT8, + arrow::Type::INT16, + arrow::Type::INT32, + arrow::Type::INT64, + arrow::Type::DICTIONARY + }, + arrowTypeName, + arrowTypeId); + break; + case ESimpleLogicalValueType::Uint8: + CheckArrowType( + columnType, + { + arrow::Type::UINT8, + arrow::Type::DICTIONARY + }, + arrowTypeName, + arrowTypeId); + break; + case ESimpleLogicalValueType::Uint16: + CheckArrowType( + columnType, + { + arrow::Type::UINT8, + arrow::Type::UINT16, + arrow::Type::DICTIONARY + }, + arrowTypeName, + arrowTypeId); + break; + case ESimpleLogicalValueType::Uint32: + CheckArrowType( + columnType, + { + arrow::Type::UINT8, + arrow::Type::UINT16, + arrow::Type::UINT32, + arrow::Type::DICTIONARY + }, + arrowTypeName, + arrowTypeId); + break; + case ESimpleLogicalValueType::Uint64: CheckArrowType( columnType, @@ -104,21 +216,67 @@ void CheckArrowTypeMatch( break; case ESimpleLogicalValueType::Date: + CheckArrowType( + columnType, + { + arrow::Type::UINT32, + arrow::Type::DICTIONARY, + arrow::Type::DATE32, + }, + arrowTypeName, + arrowTypeId); + break; + case ESimpleLogicalValueType::Datetime: - case ESimpleLogicalValueType::Timestamp: CheckArrowType( columnType, { arrow::Type::UINT32, arrow::Type::UINT64, arrow::Type::DICTIONARY, + arrow::Type::DATE64, + }, + arrowTypeName, + arrowTypeId); + break; + + case ESimpleLogicalValueType::Timestamp: + CheckArrowType( + columnType, + { + arrow::Type::UINT64, + arrow::Type::DICTIONARY, + arrow::Type::TIMESTAMP, }, arrowTypeName, arrowTypeId); break; case ESimpleLogicalValueType::Date32: + CheckArrowType( + columnType, + { + arrow::Type::INT32, + arrow::Type::DICTIONARY, + arrow::Type::DATE32, + }, + arrowTypeName, + arrowTypeId); + break; + case ESimpleLogicalValueType::Datetime64: + CheckArrowType( + columnType, + { + arrow::Type::INT32, + arrow::Type::INT64, + arrow::Type::DICTIONARY, + arrow::Type::DATE64, + }, + arrowTypeName, + arrowTypeId); + break; + case ESimpleLogicalValueType::Timestamp64: CheckArrowType( columnType, @@ -126,6 +284,7 @@ void CheckArrowTypeMatch( arrow::Type::INT32, arrow::Type::INT64, arrow::Type::DICTIONARY, + arrow::Type::TIMESTAMP, }, arrowTypeName, arrowTypeId); @@ -287,6 +446,81 @@ TStringBuf SerializeDecimalBinary(TStringBuf value, int precision, char* buffer, //////////////////////////////////////////////////////////////////////////////// +i64 CheckAndTransformDate(i64 arrowValue, i64 minAllowedDate, i64 maxAllowedDate) +{ + if (arrowValue < minAllowedDate || arrowValue > maxAllowedDate) { + THROW_ERROR_EXCEPTION( + "Arrow date32 value %v is incompatible with the YT date type, value should be in range [%v, %v]", + arrowValue, + minAllowedDate, + maxAllowedDate); + } + return arrowValue; +} + +i64 CheckAndTransformDatetime(i64 arrowValue, i64 minAllowedDate, i64 maxAllowedDate) +{ + auto minarrowValue = SignedSaturationArithmeticMultiply(minAllowedDate, SecondsToMilliCoefficient); + auto maxarrowValue = SignedSaturationArithmeticMultiply(maxAllowedDate, SecondsToMilliCoefficient); + if (arrowValue < minarrowValue || arrowValue > maxarrowValue) { + THROW_ERROR_EXCEPTION( + "Arrow date64 value %v is incompatible with the YT datetime type, value should be in range [%v, %v]", + arrowValue, + minarrowValue, + maxarrowValue); + } + // Сonverting from seconds to milliseconds. + return arrowValue / SecondsToMilliCoefficient; +} + +i64 CheckAndTransformTimestamp(i64 arrowValue, arrow::TimeUnit::type timeUnit, i64 minAllowedTimestamp, i64 maxAllowedTimestamp) +{ + i64 resultValue; + i64 minArrowAllowedTimestamp; + i64 maxArrowAllowedTimestamp; + + switch (timeUnit) { + case arrow::TimeUnit::type::NANO: + resultValue = arrowValue / MicroToNanoCoefficient; + minArrowAllowedTimestamp = SignedSaturationArithmeticMultiply(minAllowedTimestamp, MicroToNanoCoefficient); + maxArrowAllowedTimestamp = SignedSaturationArithmeticMultiply(minAllowedTimestamp, MicroToNanoCoefficient); + break; + + case arrow::TimeUnit::type::SECOND: + resultValue = SignedSaturationArithmeticMultiply(arrowValue, SecondsToMicroCoefficient); + minArrowAllowedTimestamp = minAllowedTimestamp / SecondsToMicroCoefficient; + maxArrowAllowedTimestamp = maxAllowedTimestamp /SecondsToMicroCoefficient; + break; + + case arrow::TimeUnit::type::MILLI: + resultValue = SignedSaturationArithmeticMultiply(arrowValue, MilliToMicroCoefficient); + minArrowAllowedTimestamp = minAllowedTimestamp / MilliToMicroCoefficient; + maxArrowAllowedTimestamp = maxAllowedTimestamp /MilliToMicroCoefficient; + break; + + case arrow::TimeUnit::type::MICRO: + resultValue = arrowValue; + minArrowAllowedTimestamp = minAllowedTimestamp; + maxArrowAllowedTimestamp = maxAllowedTimestamp; + break; + + default: + THROW_ERROR_EXCEPTION("Unexpected arrow time unit %Qv", static_cast<int>(timeUnit)); + } + + if (resultValue < minAllowedTimestamp || resultValue > maxAllowedTimestamp) { + THROW_ERROR_EXCEPTION( + "Arrow timestamp value %v is incompatible with the YT timestamp type, value should be in range [%v, %v]", + arrowValue, + minArrowAllowedTimestamp, + maxArrowAllowedTimestamp); + } + + return resultValue; +} + +//////////////////////////////////////////////////////////////////////////////// + class TArraySimpleVisitor : public arrow::TypeVisitor { @@ -337,17 +571,35 @@ public: arrow::Status Visit(const arrow::Date32Type& /*type*/) override { - return ParseInt64<arrow::Date32Array>(); + if (ColumnType_ && *ColumnType_ == ESimpleLogicalValueType::Date32) { + return ParseDate32<arrow::Date32Array>(); + } else if (ColumnType_ && *ColumnType_ == ESimpleLogicalValueType::Date) { + return ParseDate<arrow::Date32Array>(); + } else { + return ParseInt64<arrow::Date32Array>(); + } } arrow::Status Visit(const arrow::Date64Type& /*type*/) override { - return ParseInt64<arrow::Date64Array>(); + if (ColumnType_ && *ColumnType_ == ESimpleLogicalValueType::Datetime64) { + return ParseDate64<arrow::Date64Array>(); + } else if (ColumnType_ && *ColumnType_ == ESimpleLogicalValueType::Datetime) { + return ParseDatetime<arrow::Date64Array>(); + } else { + return ParseInt64<arrow::Date64Array>(); + } } - arrow::Status Visit(const arrow::TimestampType& /*type*/) override + arrow::Status Visit(const arrow::TimestampType& type) override { - return ParseInt64<arrow::TimestampArray>(); + if (ColumnType_ && *ColumnType_ == ESimpleLogicalValueType::Timestamp64) { + return ParseTimestamp64<arrow::TimestampArray>(type.unit()); + } else if (ColumnType_ && *ColumnType_ == ESimpleLogicalValueType::Timestamp) { + return ParseTimestamp<arrow::TimestampArray>(type.unit()); + } else { + return ParseInt64<arrow::TimestampArray>(); + } } // Unsigned int types. @@ -433,6 +685,72 @@ private: TUnversionedRowValues* RowValues_; template <typename ArrayType> + arrow::Status ParseDate() + { + auto makeUnversionedValue = [] (i64 value, i64 columnId) { + return MakeUnversionedUint64Value( + static_cast<ui64>(CheckAndTransformDate(value, /*minAllowedDate*/ 0, DateUpperBound)), + columnId); + }; + ParseSimpleNumeric<ArrayType, decltype(makeUnversionedValue)>(makeUnversionedValue); + return arrow::Status::OK(); + } + + template <typename ArrayType> + arrow::Status ParseDate32() + { + auto makeUnversionedValue = [] (i64 value, i64 columnId) { + return MakeUnversionedInt64Value(CheckAndTransformDate(value, Date32LowerBound, Date32UpperBound), columnId); + }; + ParseSimpleNumeric<ArrayType, decltype(makeUnversionedValue)>(makeUnversionedValue); + return arrow::Status::OK(); + } + + template <typename ArrayType> + arrow::Status ParseDatetime() + { + auto makeUnversionedValue = [] (i64 value, i64 columnId) { + return MakeUnversionedUint64Value( + static_cast<ui64>(CheckAndTransformDatetime(value, /*minAllowedDate*/ 0, DatetimeUpperBound)), + columnId); + }; + ParseSimpleNumeric<ArrayType, decltype(makeUnversionedValue)>(makeUnversionedValue); + return arrow::Status::OK(); + } + + template <typename ArrayType> + arrow::Status ParseDate64() + { + auto makeUnversionedValue = [] (i64 value, i64 columnId) { + return MakeUnversionedInt64Value(CheckAndTransformDatetime(value, Datetime64LowerBound, DatetimeUpperBound), columnId); + }; + ParseSimpleNumeric<ArrayType, decltype(makeUnversionedValue)>(makeUnversionedValue); + return arrow::Status::OK(); + } + + template <typename ArrayType> + arrow::Status ParseTimestamp(arrow::TimeUnit::type timeUnit) + { + auto makeUnversionedValue = [timeUnit] (i64 value, i64 columnId) { + return MakeUnversionedUint64Value( + static_cast<ui64>(CheckAndTransformTimestamp(value, timeUnit, /*minAllowedTimestamp*/ 0, TimestampUpperBound)), + columnId); + }; + ParseSimpleNumeric<ArrayType, decltype(makeUnversionedValue)>(makeUnversionedValue); + return arrow::Status::OK(); + } + + template <typename ArrayType> + arrow::Status ParseTimestamp64(arrow::TimeUnit::type timeUnit) + { + auto makeUnversionedValue = [timeUnit] (i64 value, i64 columnId) { + return MakeUnversionedInt64Value(CheckAndTransformTimestamp(value, timeUnit, Timestamp64LowerBound, Timestamp64UpperBound), columnId); + }; + ParseSimpleNumeric<ArrayType, decltype(makeUnversionedValue)>(makeUnversionedValue); + return arrow::Status::OK(); + } + + template <typename ArrayType> arrow::Status ParseInt64() { auto makeUnversionedValue = [] (i64 value, i64 columnId) { @@ -610,19 +928,37 @@ public: arrow::Status Visit(const arrow::Date32Type& type) override { CheckArrowTypeMatch(YTType_->AsSimpleTypeRef().GetElement(), type.type_name(), type.id()); - return ParseInt64<arrow::Date32Array>(); + if (YTType_->AsSimpleTypeRef().GetElement() == ESimpleLogicalValueType::Date32) { + return ParseDate32<arrow::Date32Array>(); + } else if (YTType_->AsSimpleTypeRef().GetElement() == ESimpleLogicalValueType::Date) { + return ParseDate<arrow::Date32Array>(); + } else { + return ParseInt64<arrow::Date32Array>(); + } } arrow::Status Visit(const arrow::Date64Type& type) override { CheckArrowTypeMatch(YTType_->AsSimpleTypeRef().GetElement(), type.type_name(), type.id()); - return ParseInt64<arrow::Date64Array>(); + if (YTType_->AsSimpleTypeRef().GetElement()== ESimpleLogicalValueType::Datetime64) { + return ParseDate64<arrow::Date64Array>(); + } else if (YTType_->AsSimpleTypeRef().GetElement() == ESimpleLogicalValueType::Datetime) { + return ParseDatetime<arrow::Date64Array>(); + } else { + return ParseInt64<arrow::Date64Array>(); + } } arrow::Status Visit(const arrow::TimestampType& type) override { CheckArrowTypeMatch(YTType_->AsSimpleTypeRef().GetElement(), type.type_name(), type.id()); - return ParseInt64<arrow::TimestampArray>(); + if (YTType_->AsSimpleTypeRef().GetElement() == ESimpleLogicalValueType::Timestamp64) { + return ParseTimestamp64<arrow::TimestampArray>(type.unit()); + } else if (YTType_->AsSimpleTypeRef().GetElement() == ESimpleLogicalValueType::Timestamp) { + return ParseTimestamp<arrow::TimestampArray>(type.unit()); + } else { + return ParseInt64<arrow::TimestampArray>(); + } } // Unsigned integer types. @@ -812,6 +1148,66 @@ private: return arrow::Status::OK(); } + template <typename ArrayType> + arrow::Status ParseDate() + { + auto writeNumericValue = [] (NYson::TCheckedInDebugYsonTokenWriter* writer, i64 value) { + writer->WriteBinaryUint64(CheckAndTransformDate(value, /*minAllowedDate*/ 0, DateUpperBound)); + }; + ParseComplexNumeric<ArrayType, decltype(writeNumericValue)>(writeNumericValue); + return arrow::Status::OK(); + } + + template <typename ArrayType> + arrow::Status ParseDate32() + { + auto writeNumericValue = [] (NYson::TCheckedInDebugYsonTokenWriter* writer, i64 value) { + writer->WriteBinaryInt64(CheckAndTransformDate(value, Date32LowerBound, Date32UpperBound)); + }; + ParseComplexNumeric<ArrayType, decltype(writeNumericValue)>(writeNumericValue); + return arrow::Status::OK(); + } + + template <typename ArrayType> + arrow::Status ParseDatetime() + { + auto writeNumericValue = [] (NYson::TCheckedInDebugYsonTokenWriter* writer, i64 value) { + writer->WriteBinaryUint64(CheckAndTransformDatetime(value, /*minAllowedDate*/ 0, DatetimeUpperBound)); + }; + ParseComplexNumeric<ArrayType, decltype(writeNumericValue)>(writeNumericValue); + return arrow::Status::OK(); + } + + template <typename ArrayType> + arrow::Status ParseDate64() + { + auto writeNumericValue = [] (NYson::TCheckedInDebugYsonTokenWriter* writer, i64 value) { + writer->WriteBinaryInt64(CheckAndTransformDatetime(value, Datetime64LowerBound, Datetime64UpperBound)); + }; + ParseComplexNumeric<ArrayType, decltype(writeNumericValue)>(writeNumericValue); + return arrow::Status::OK(); + } + + template <typename ArrayType> + arrow::Status ParseTimestamp(arrow::TimeUnit::type timeUnit) + { + auto writeNumericValue = [timeUnit] (NYson::TCheckedInDebugYsonTokenWriter* writer, i64 value) { + writer->WriteBinaryUint64(CheckAndTransformTimestamp(value, timeUnit, /*minAllowedTimestamp*/ 0, TimestampUpperBound)); + }; + ParseComplexNumeric<ArrayType, decltype(writeNumericValue)>(writeNumericValue); + return arrow::Status::OK(); + } + + template <typename ArrayType> + arrow::Status ParseTimestamp64(arrow::TimeUnit::type timeUnit) + { + auto writeNumericValue = [timeUnit] (NYson::TCheckedInDebugYsonTokenWriter* writer, i64 value) { + writer->WriteBinaryInt64(CheckAndTransformTimestamp(value, timeUnit, Timestamp64LowerBound, Timestamp64UpperBound)); + }; + ParseComplexNumeric<ArrayType, decltype(writeNumericValue)>(writeNumericValue); + return arrow::Status::OK(); + } + arrow::Status ParseList() { if (YTType_->GetMetatype() != ELogicalMetatype::List) { @@ -1108,7 +1504,6 @@ void PrepareArray( rowsValues, columnIndex, columnId); - break; case ELogicalMetatype::List: case ELogicalMetatype::Dict: @@ -1127,7 +1522,6 @@ void PrepareArray( rowsValues, columnIndex, columnId); - break; case ELogicalMetatype::Tagged: // Denullified type should not contain tagged type. diff --git a/yt/yt/library/formats/unittests/arrow_parser_ut.cpp b/yt/yt/library/formats/unittests/arrow_parser_ut.cpp index 852323af189..e8a4c1c6532 100644 --- a/yt/yt/library/formats/unittests/arrow_parser_ut.cpp +++ b/yt/yt/library/formats/unittests/arrow_parser_ut.cpp @@ -273,6 +273,70 @@ std::string MakeStructArrow(const std::vector<std::string>& stringData, const st return MakeOutputFromRecordBatch(recordBatch); } +std::string MakeDateArrow( + const std::vector<i32>& date32Column, + const std::vector<i64>& date64Column, + const std::vector<i64>& timestampColumn) +{ + arrow::Date32Builder date32Builder; + + for (const auto& value : date32Column) { + Verify(date32Builder.Append(value)); + } + + auto date32Array = date32Builder.Finish(); + + arrow::Date64Builder date64Builder; + + for (const auto& value : date64Column) { + Verify(date64Builder.Append(value)); + } + + auto date64Array = date64Builder.Finish(); + + arrow::TimestampBuilder timestampBuilder(arrow::timestamp(arrow::TimeUnit::TimeUnit::MICRO), arrow::default_memory_pool()); + + for (const auto& value : timestampColumn) { + Verify(timestampBuilder.Append(value)); + } + + auto timestampArray = timestampBuilder.Finish(); + + auto arrowSchema = arrow::schema({ + arrow::field("date", arrow::date32()), + arrow::field("datetime", arrow::date64()), + arrow::field("timestamp", arrow::timestamp(arrow::TimeUnit::MICRO)), + }); + std::vector<std::shared_ptr<arrow::Array>> columns = {*date32Array, *date64Array, *timestampArray}; + auto recordBatch = arrow::RecordBatch::Make(arrowSchema, columns[0]->length(), columns); + return MakeOutputFromRecordBatch(recordBatch); +} + +std::string MakeDatetimeListArrow(const std::vector<std::vector<i64>>& date64Column) +{ + auto* pool = arrow::default_memory_pool(); + + auto valueBuilder = std::make_shared<arrow::Date64Builder>(pool); + auto listBuilder = std::make_unique<arrow::ListBuilder>(pool, valueBuilder); + + for (const auto& list : date64Column) { + Verify(listBuilder->Append()); + for (const auto& value : list) { + Verify(valueBuilder->Append(value)); + } + } + + auto arrowSchema = arrow::schema({arrow::field("list", listBuilder->type())}); + + std::shared_ptr<arrow::Array> listArray; + Verify(listBuilder->Finish(&listArray)); + std::vector<std::shared_ptr<arrow::Array>> columns = {listArray}; + + auto recordBatch = arrow::RecordBatch::Make(arrowSchema, columns[0]->length(), columns); + + return MakeOutputFromRecordBatch(recordBatch); +} + std::string MakeDecimalArrows(std::vector<TString> values, std::vector<std::tuple<int, int, int>> columnParameters) { auto* pool = arrow::default_memory_pool(); @@ -652,6 +716,71 @@ TEST(TArrowParserTest, DecimalVariousPrecisions) ASSERT_EQ(expectedValues_76_3, collectStrings("decimal256_76_3")); } +TEST(TArrowParserTest, Datetime) +{ + auto tableSchema = New<TTableSchema>(std::vector<TColumnSchema>{ + TColumnSchema("date", ESimpleLogicalValueType::Date), + TColumnSchema("datetime", ESimpleLogicalValueType::Datetime), + TColumnSchema("timestamp", ESimpleLogicalValueType::Timestamp), + }); + + TCollectingValueConsumer collectedRows(tableSchema); + + auto parser = CreateParserForArrow(&collectedRows); + + parser->Read(MakeDateArrow({18367}, {1586966302000}, {1586966302504185})); + parser->Finish(); + + ASSERT_EQ(collectedRows.Size(), 1); + + ASSERT_EQ(GetUint64(collectedRows.GetRowValue(0, "date")), 18367u); + ASSERT_EQ(GetUint64(collectedRows.GetRowValue(0, "datetime")), 1586966302u); + ASSERT_EQ(GetUint64(collectedRows.GetRowValue(0, "timestamp")), 1586966302504185u); +} + +TEST(TArrowParserTest, Datetime64) +{ + auto tableSchema = New<TTableSchema>(std::vector<TColumnSchema>{ + TColumnSchema("date", ESimpleLogicalValueType::Date32), + TColumnSchema("datetime", ESimpleLogicalValueType::Datetime64), + TColumnSchema("timestamp", ESimpleLogicalValueType::Timestamp64), + }); + + TCollectingValueConsumer collectedRows(tableSchema); + + auto parser = CreateParserForArrow(&collectedRows); + + parser->Read(MakeDateArrow({-18367}, {-1586966302000}, {-1586966302504185})); + parser->Finish(); + + ASSERT_EQ(collectedRows.Size(), 1); + + ASSERT_EQ(GetInt64(collectedRows.GetRowValue(0, "date")), -18367); + ASSERT_EQ(GetInt64(collectedRows.GetRowValue(0, "datetime")), -1586966302); + ASSERT_EQ(GetInt64(collectedRows.GetRowValue(0, "timestamp")), -1586966302504185); +} + +TEST(TArrowParserTest, ListOfDatetimes) +{ + auto tableSchema = New<TTableSchema>(std::vector<TColumnSchema>{ + TColumnSchema("list", ListLogicalType(SimpleLogicalType(ESimpleLogicalValueType::Datetime64))), + }); + + TCollectingValueConsumer collectedRows(tableSchema); + + auto parser = CreateParserForArrow(&collectedRows); + + auto data = MakeDatetimeListArrow({{18367000, 1586966302000}, {}}); + parser->Read(data); + parser->Finish(); + + auto firstNode = GetComposite(collectedRows.GetRowValue(0, "list")); + ASSERT_EQ(ConvertToYsonTextStringStable(firstNode), "[18367;1586966302;]"); + + auto secondNode = GetComposite(collectedRows.GetRowValue(1, "list")); + ASSERT_EQ(ConvertToYsonTextStringStable(secondNode), "[]"); +} + TEST(TArrowParserTest, ListOfDecimals) { auto tableSchema = New<TTableSchema>(std::vector<TColumnSchema>{ |
