diff options
author | ivanmorozov <ivanmorozov@yandex-team.com> | 2023-06-29 19:55:01 +0300 |
---|---|---|
committer | ivanmorozov <ivanmorozov@yandex-team.com> | 2023-06-29 19:55:01 +0300 |
commit | 48ac40272844f8e875067d7e785b879304bdb9ce (patch) | |
tree | 35851f4513c4a201dcbd6c1a43d47deef82efdb6 | |
parent | 0da132170af57898a5d97bd0135546bbc28e307a (diff) | |
download | ydb-48ac40272844f8e875067d7e785b879304bdb9ce.tar.gz |
filler for timestamp and tests
-rw-r--r-- | ydb/core/formats/arrow/simple_builder/array.h | 20 | ||||
-rw-r--r-- | ydb/core/formats/arrow/ut/CMakeLists.darwin-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/core/formats/arrow/ut/CMakeLists.linux-aarch64.txt | 1 | ||||
-rw-r--r-- | ydb/core/formats/arrow/ut/CMakeLists.linux-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/core/formats/arrow/ut/CMakeLists.windows-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/core/formats/arrow/ut/ut_size_calcer.cpp | 44 | ||||
-rw-r--r-- | ydb/core/formats/arrow/ut/ya.make | 1 |
7 files changed, 68 insertions, 1 deletions
diff --git a/ydb/core/formats/arrow/simple_builder/array.h b/ydb/core/formats/arrow/simple_builder/array.h index 87a81a6c37c..5cca878b03b 100644 --- a/ydb/core/formats/arrow/simple_builder/array.h +++ b/ydb/core/formats/arrow/simple_builder/array.h @@ -25,6 +25,24 @@ public: } }; +template <class TValue> +class TFillerBuilderConstructor { +public: + using TBuilder = typename arrow::TypeTraits<TValue>::BuilderType; + static TBuilder Construct() { + return TBuilder(); + } +}; + +template <> +class TFillerBuilderConstructor<arrow::TimestampType> { +public: + using TBuilder = arrow::TypeTraits<arrow::TimestampType>::BuilderType; + static TBuilder Construct() { + return arrow::TimestampBuilder(arrow::timestamp(arrow::TimeUnit::TimeUnit::MICRO), arrow::default_memory_pool()); + } +}; + template <class TFiller> class TSimpleArrayConstructor: public IArrayBuilder { private: @@ -33,7 +51,7 @@ private: const TFiller Filler; protected: virtual std::shared_ptr<arrow::Array> DoBuildArray(const ui32 recordsCount) const override { - TBuilder fBuilder = TBuilder(); + TBuilder fBuilder = TFillerBuilderConstructor<typename TFiller::TValue>::Construct(); Y_VERIFY(fBuilder.Reserve(recordsCount).ok()); for (ui32 i = 0; i < recordsCount; ++i) { Y_VERIFY(fBuilder.Append(Filler.GetValue(i)).ok()); diff --git a/ydb/core/formats/arrow/ut/CMakeLists.darwin-x86_64.txt b/ydb/core/formats/arrow/ut/CMakeLists.darwin-x86_64.txt index f77bf86c419..8bc68ca411b 100644 --- a/ydb/core/formats/arrow/ut/CMakeLists.darwin-x86_64.txt +++ b/ydb/core/formats/arrow/ut/CMakeLists.darwin-x86_64.txt @@ -39,6 +39,7 @@ target_sources(ydb-core-formats-arrow-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut_arrow.cpp ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut_program_step.cpp ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut/ut_dictionary.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut/ut_size_calcer.cpp ) set_property( TARGET diff --git a/ydb/core/formats/arrow/ut/CMakeLists.linux-aarch64.txt b/ydb/core/formats/arrow/ut/CMakeLists.linux-aarch64.txt index ad981e1dca2..2694f81d92f 100644 --- a/ydb/core/formats/arrow/ut/CMakeLists.linux-aarch64.txt +++ b/ydb/core/formats/arrow/ut/CMakeLists.linux-aarch64.txt @@ -42,6 +42,7 @@ target_sources(ydb-core-formats-arrow-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut_arrow.cpp ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut_program_step.cpp ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut/ut_dictionary.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut/ut_size_calcer.cpp ) set_property( TARGET diff --git a/ydb/core/formats/arrow/ut/CMakeLists.linux-x86_64.txt b/ydb/core/formats/arrow/ut/CMakeLists.linux-x86_64.txt index de6ae69716f..cbe7a35919d 100644 --- a/ydb/core/formats/arrow/ut/CMakeLists.linux-x86_64.txt +++ b/ydb/core/formats/arrow/ut/CMakeLists.linux-x86_64.txt @@ -43,6 +43,7 @@ target_sources(ydb-core-formats-arrow-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut_arrow.cpp ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut_program_step.cpp ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut/ut_dictionary.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut/ut_size_calcer.cpp ) set_property( TARGET diff --git a/ydb/core/formats/arrow/ut/CMakeLists.windows-x86_64.txt b/ydb/core/formats/arrow/ut/CMakeLists.windows-x86_64.txt index bcbbd96108b..2f1212b5f30 100644 --- a/ydb/core/formats/arrow/ut/CMakeLists.windows-x86_64.txt +++ b/ydb/core/formats/arrow/ut/CMakeLists.windows-x86_64.txt @@ -32,6 +32,7 @@ target_sources(ydb-core-formats-arrow-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut_arrow.cpp ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut_program_step.cpp ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut/ut_dictionary.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut/ut_size_calcer.cpp ) set_property( TARGET diff --git a/ydb/core/formats/arrow/ut/ut_size_calcer.cpp b/ydb/core/formats/arrow/ut/ut_size_calcer.cpp new file mode 100644 index 00000000000..1b9835d8010 --- /dev/null +++ b/ydb/core/formats/arrow/ut/ut_size_calcer.cpp @@ -0,0 +1,44 @@ +#include <library/cpp/testing/unittest/registar.h> +#include <ydb/core/formats/arrow/arrow_helpers.h> +#include <ydb/core/formats/arrow/serializer/batch_only.h> +#include <ydb/core/formats/arrow/serializer/full.h> +#include <ydb/core/formats/arrow/simple_builder/array.h> +#include <ydb/core/formats/arrow/simple_builder/batch.h> +#include <ydb/core/formats/arrow/simple_builder/filler.h> +#include <ydb/core/formats/arrow/dictionary/conversion.h> + +Y_UNIT_TEST_SUITE(SizeCalcer) { + + using namespace NKikimr::NArrow; + + Y_UNIT_TEST(SimpleStrings) { + NConstruction::IArrayBuilder::TPtr column = std::make_shared<NConstruction::TSimpleArrayConstructor<NConstruction::TStringPoolFiller>>( + "field", NConstruction::TStringPoolFiller(1024, 512)); + std::shared_ptr<arrow::RecordBatch> batch = NConstruction::TRecordBatchConstructor({ column }).BuildBatch(2048); + Cerr << GetBatchDataSize(batch) << Endl; + UNIT_ASSERT(GetBatchDataSize(batch) == 2048 * 512); + } + + Y_UNIT_TEST(DictionaryStrings) { + NConstruction::IArrayBuilder::TPtr column = std::make_shared<NConstruction::TDictionaryArrayConstructor<NConstruction::TStringPoolFiller>>( + "field", NConstruction::TStringPoolFiller(1024, 512)); + std::shared_ptr<arrow::RecordBatch> batch = NConstruction::TRecordBatchConstructor({ column }).BuildBatch(2048); + Cerr << GetBatchDataSize(batch) << Endl; + UNIT_ASSERT(GetBatchDataSize(batch) == 2048 * 512); + } + + Y_UNIT_TEST(SimpleInt64) { + NConstruction::IArrayBuilder::TPtr column = std::make_shared<NConstruction::TSimpleArrayConstructor<NConstruction::TIntSeqFiller<arrow::Int64Type>>>("field"); + std::shared_ptr<arrow::RecordBatch> batch = NConstruction::TRecordBatchConstructor({ column }).BuildBatch(2048); + Cerr << GetBatchDataSize(batch) << Endl; + UNIT_ASSERT(GetBatchDataSize(batch) == 2048 * 8); + } + + Y_UNIT_TEST(SimpleTimestamp) { + NConstruction::IArrayBuilder::TPtr column = std::make_shared<NConstruction::TSimpleArrayConstructor<NConstruction::TIntSeqFiller<arrow::TimestampType>>>("field"); + std::shared_ptr<arrow::RecordBatch> batch = NConstruction::TRecordBatchConstructor({ column }).BuildBatch(2048); + Cerr << GetBatchDataSize(batch) << Endl; + UNIT_ASSERT(GetBatchDataSize(batch) == 2048 * 8); + } + +}; diff --git a/ydb/core/formats/arrow/ut/ya.make b/ydb/core/formats/arrow/ut/ya.make index cd2531b9723..4f51aac13bc 100644 --- a/ydb/core/formats/arrow/ut/ya.make +++ b/ydb/core/formats/arrow/ut/ya.make @@ -26,6 +26,7 @@ SRCS( ut_arrow.cpp ut_program_step.cpp ut_dictionary.cpp + ut_size_calcer.cpp ) END() |