aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorivanmorozov <ivanmorozov@yandex-team.com>2022-12-21 13:53:02 +0300
committerivanmorozov <ivanmorozov@yandex-team.com>2022-12-21 13:53:02 +0300
commitf7bd981e4e46e41709092fe9e7d149e1bc8b7738 (patch)
tree4dd999b4a53b7c35beebe94c14526d1c7646cee1
parent1199f4673636d8f4967fd393eb5903f9da6d97fb (diff)
downloadydb-f7bd981e4e46e41709092fe9e7d149e1bc8b7738.tar.gz
add test and fix
-rw-r--r--ydb/core/io_formats/csv_arrow.cpp12
-rw-r--r--ydb/core/io_formats/ut_csv.cpp23
2 files changed, 31 insertions, 4 deletions
diff --git a/ydb/core/io_formats/csv_arrow.cpp b/ydb/core/io_formats/csv_arrow.cpp
index 40a560fb53..7028c62b00 100644
--- a/ydb/core/io_formats/csv_arrow.cpp
+++ b/ydb/core/io_formats/csv_arrow.cpp
@@ -119,14 +119,22 @@ std::shared_ptr<arrow::RecordBatch> TArrowCSV::ConvertColumnTypes(std::shared_pt
arrow::UInt16Builder aBuilder;
Y_VERIFY(aBuilder.Reserve(parsedBatch->num_rows()).ok());
for (long i = 0; i < parsedBatch->num_rows(); ++i) {
- aBuilder.UnsafeAppend(i64Arr->Value(i) / 86400ull);
+ if (i64Arr->IsNull(i)) {
+ Y_VERIFY(aBuilder.AppendNull().ok());
+ } else {
+ aBuilder.UnsafeAppend(i64Arr->Value(i) / 86400ull);
+ }
}
arrResult = aBuilder.Finish();
} else if (originalType->id() == arrow::UInt32Type::type_id) {
arrow::UInt32Builder aBuilder;
Y_VERIFY(aBuilder.Reserve(parsedBatch->num_rows()).ok());
for (long i = 0; i < parsedBatch->num_rows(); ++i) {
- aBuilder.UnsafeAppend(i64Arr->Value(i));
+ if (i64Arr->IsNull(i)) {
+ Y_VERIFY(aBuilder.AppendNull().ok());
+ } else {
+ aBuilder.UnsafeAppend(i64Arr->Value(i));
+ }
}
arrResult = aBuilder.Finish();
} else {
diff --git a/ydb/core/io_formats/ut_csv.cpp b/ydb/core/io_formats/ut_csv.cpp
index 6f2c442867..dd610cb9d8 100644
--- a/ydb/core/io_formats/ut_csv.cpp
+++ b/ydb/core/io_formats/ut_csv.cpp
@@ -79,7 +79,7 @@ TestReadSingleBatch(const TVector<std::pair<TString, NScheme::TTypeInfo>>& colum
Y_UNIT_TEST_SUITE(FormatCSV) {
Y_UNIT_TEST(Instants) {
const TString dateTimeString = "2005-08-09T18:31:42";
- const TString data = "11,12,2013-07-15," + dateTimeString + "," + dateTimeString;
+ const TString data = "11,12,2013-07-15," + dateTimeString + "," + dateTimeString + ",,,";
TVector<std::pair<TString, NScheme::TTypeInfo>> columns;
{
@@ -88,7 +88,10 @@ Y_UNIT_TEST_SUITE(FormatCSV) {
{"timestamp_int", NScheme::TTypeInfo(NScheme::NTypeIds::Timestamp)},
{"date", NScheme::TTypeInfo(NScheme::NTypeIds::Date) },
{"datetime", NScheme::TTypeInfo(NScheme::NTypeIds::Datetime)},
- {"timestamp", NScheme::TTypeInfo(NScheme::NTypeIds::Timestamp)}
+ {"timestamp", NScheme::TTypeInfo(NScheme::NTypeIds::Timestamp)},
+ {"date_null", NScheme::TTypeInfo(NScheme::NTypeIds::Timestamp)},
+ {"datetime_null", NScheme::TTypeInfo(NScheme::NTypeIds::Timestamp)},
+ {"timestamp_null", NScheme::TTypeInfo(NScheme::NTypeIds::Timestamp)},
};
TInstant dtInstant;
Y_VERIFY(TInstant::TryParseIso8601(dateTimeString, dtInstant));
@@ -100,6 +103,10 @@ Y_UNIT_TEST_SUITE(FormatCSV) {
UNIT_ASSERT(!!batch);
UNIT_ASSERT(errorMessage.empty());
+ auto cDateNull = batch->GetColumnByName("date_null");
+ auto cDatetimeNull = batch->GetColumnByName("datetime_null");
+ auto cTimestampNull = batch->GetColumnByName("timestamp_null");
+
auto cDatetimeInt = batch->GetColumnByName("datetime_int");
auto cTimestampInt = batch->GetColumnByName("timestamp_int");
auto cDate = batch->GetColumnByName("date");
@@ -114,6 +121,18 @@ Y_UNIT_TEST_SUITE(FormatCSV) {
Y_VERIFY(batch->num_rows() == 1);
{
+ auto& ui16Column = static_cast<arrow::UInt32Array&>(*cDateNull);
+ Y_VERIFY(ui16Column.IsNull(0));
+ }
+ {
+ auto& ui32Column = static_cast<arrow::UInt32Array&>(*cDatetimeNull);
+ Y_VERIFY(ui32Column.IsNull(0));
+ }
+ {
+ auto& tsColumn = static_cast<arrow::TimestampArray&>(*cTimestampNull);
+ Y_VERIFY(tsColumn.IsNull(0));
+ }
+ {
auto& ui32Column = static_cast<arrow::UInt32Array&>(*cDatetimeInt);
Y_VERIFY(ui32Column.Value(0) == 11, "%d", ui32Column.Value(0));
}