diff options
author | ivanmorozov <ivanmorozov@yandex-team.com> | 2022-12-21 13:53:02 +0300 |
---|---|---|
committer | ivanmorozov <ivanmorozov@yandex-team.com> | 2022-12-21 13:53:02 +0300 |
commit | f7bd981e4e46e41709092fe9e7d149e1bc8b7738 (patch) | |
tree | 4dd999b4a53b7c35beebe94c14526d1c7646cee1 | |
parent | 1199f4673636d8f4967fd393eb5903f9da6d97fb (diff) | |
download | ydb-f7bd981e4e46e41709092fe9e7d149e1bc8b7738.tar.gz |
add test and fix
-rw-r--r-- | ydb/core/io_formats/csv_arrow.cpp | 12 | ||||
-rw-r--r-- | ydb/core/io_formats/ut_csv.cpp | 23 |
2 files changed, 31 insertions, 4 deletions
diff --git a/ydb/core/io_formats/csv_arrow.cpp b/ydb/core/io_formats/csv_arrow.cpp index 40a560fb53..7028c62b00 100644 --- a/ydb/core/io_formats/csv_arrow.cpp +++ b/ydb/core/io_formats/csv_arrow.cpp @@ -119,14 +119,22 @@ std::shared_ptr<arrow::RecordBatch> TArrowCSV::ConvertColumnTypes(std::shared_pt arrow::UInt16Builder aBuilder; Y_VERIFY(aBuilder.Reserve(parsedBatch->num_rows()).ok()); for (long i = 0; i < parsedBatch->num_rows(); ++i) { - aBuilder.UnsafeAppend(i64Arr->Value(i) / 86400ull); + if (i64Arr->IsNull(i)) { + Y_VERIFY(aBuilder.AppendNull().ok()); + } else { + aBuilder.UnsafeAppend(i64Arr->Value(i) / 86400ull); + } } arrResult = aBuilder.Finish(); } else if (originalType->id() == arrow::UInt32Type::type_id) { arrow::UInt32Builder aBuilder; Y_VERIFY(aBuilder.Reserve(parsedBatch->num_rows()).ok()); for (long i = 0; i < parsedBatch->num_rows(); ++i) { - aBuilder.UnsafeAppend(i64Arr->Value(i)); + if (i64Arr->IsNull(i)) { + Y_VERIFY(aBuilder.AppendNull().ok()); + } else { + aBuilder.UnsafeAppend(i64Arr->Value(i)); + } } arrResult = aBuilder.Finish(); } else { diff --git a/ydb/core/io_formats/ut_csv.cpp b/ydb/core/io_formats/ut_csv.cpp index 6f2c442867..dd610cb9d8 100644 --- a/ydb/core/io_formats/ut_csv.cpp +++ b/ydb/core/io_formats/ut_csv.cpp @@ -79,7 +79,7 @@ TestReadSingleBatch(const TVector<std::pair<TString, NScheme::TTypeInfo>>& colum Y_UNIT_TEST_SUITE(FormatCSV) { Y_UNIT_TEST(Instants) { const TString dateTimeString = "2005-08-09T18:31:42"; - const TString data = "11,12,2013-07-15," + dateTimeString + "," + dateTimeString; + const TString data = "11,12,2013-07-15," + dateTimeString + "," + dateTimeString + ",,,"; TVector<std::pair<TString, NScheme::TTypeInfo>> columns; { @@ -88,7 +88,10 @@ Y_UNIT_TEST_SUITE(FormatCSV) { {"timestamp_int", NScheme::TTypeInfo(NScheme::NTypeIds::Timestamp)}, {"date", NScheme::TTypeInfo(NScheme::NTypeIds::Date) }, {"datetime", NScheme::TTypeInfo(NScheme::NTypeIds::Datetime)}, - {"timestamp", NScheme::TTypeInfo(NScheme::NTypeIds::Timestamp)} + {"timestamp", NScheme::TTypeInfo(NScheme::NTypeIds::Timestamp)}, + {"date_null", NScheme::TTypeInfo(NScheme::NTypeIds::Timestamp)}, + {"datetime_null", NScheme::TTypeInfo(NScheme::NTypeIds::Timestamp)}, + {"timestamp_null", NScheme::TTypeInfo(NScheme::NTypeIds::Timestamp)}, }; TInstant dtInstant; Y_VERIFY(TInstant::TryParseIso8601(dateTimeString, dtInstant)); @@ -100,6 +103,10 @@ Y_UNIT_TEST_SUITE(FormatCSV) { UNIT_ASSERT(!!batch); UNIT_ASSERT(errorMessage.empty()); + auto cDateNull = batch->GetColumnByName("date_null"); + auto cDatetimeNull = batch->GetColumnByName("datetime_null"); + auto cTimestampNull = batch->GetColumnByName("timestamp_null"); + auto cDatetimeInt = batch->GetColumnByName("datetime_int"); auto cTimestampInt = batch->GetColumnByName("timestamp_int"); auto cDate = batch->GetColumnByName("date"); @@ -114,6 +121,18 @@ Y_UNIT_TEST_SUITE(FormatCSV) { Y_VERIFY(batch->num_rows() == 1); { + auto& ui16Column = static_cast<arrow::UInt32Array&>(*cDateNull); + Y_VERIFY(ui16Column.IsNull(0)); + } + { + auto& ui32Column = static_cast<arrow::UInt32Array&>(*cDatetimeNull); + Y_VERIFY(ui32Column.IsNull(0)); + } + { + auto& tsColumn = static_cast<arrow::TimestampArray&>(*cTimestampNull); + Y_VERIFY(tsColumn.IsNull(0)); + } + { auto& ui32Column = static_cast<arrow::UInt32Array&>(*cDatetimeInt); Y_VERIFY(ui32Column.Value(0) == 11, "%d", ui32Column.Value(0)); } |