aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorivanmorozov <ivanmorozov@yandex-team.com>2023-06-29 19:55:01 +0300
committerivanmorozov <ivanmorozov@yandex-team.com>2023-06-29 19:55:01 +0300
commit48ac40272844f8e875067d7e785b879304bdb9ce (patch)
tree35851f4513c4a201dcbd6c1a43d47deef82efdb6
parent0da132170af57898a5d97bd0135546bbc28e307a (diff)
downloadydb-48ac40272844f8e875067d7e785b879304bdb9ce.tar.gz
filler for timestamp and tests
-rw-r--r--ydb/core/formats/arrow/simple_builder/array.h20
-rw-r--r--ydb/core/formats/arrow/ut/CMakeLists.darwin-x86_64.txt1
-rw-r--r--ydb/core/formats/arrow/ut/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/core/formats/arrow/ut/CMakeLists.linux-x86_64.txt1
-rw-r--r--ydb/core/formats/arrow/ut/CMakeLists.windows-x86_64.txt1
-rw-r--r--ydb/core/formats/arrow/ut/ut_size_calcer.cpp44
-rw-r--r--ydb/core/formats/arrow/ut/ya.make1
7 files changed, 68 insertions, 1 deletions
diff --git a/ydb/core/formats/arrow/simple_builder/array.h b/ydb/core/formats/arrow/simple_builder/array.h
index 87a81a6c37c..5cca878b03b 100644
--- a/ydb/core/formats/arrow/simple_builder/array.h
+++ b/ydb/core/formats/arrow/simple_builder/array.h
@@ -25,6 +25,24 @@ public:
}
};
+template <class TValue>
+class TFillerBuilderConstructor {
+public:
+ using TBuilder = typename arrow::TypeTraits<TValue>::BuilderType;
+ static TBuilder Construct() {
+ return TBuilder();
+ }
+};
+
+template <>
+class TFillerBuilderConstructor<arrow::TimestampType> {
+public:
+ using TBuilder = arrow::TypeTraits<arrow::TimestampType>::BuilderType;
+ static TBuilder Construct() {
+ return arrow::TimestampBuilder(arrow::timestamp(arrow::TimeUnit::TimeUnit::MICRO), arrow::default_memory_pool());
+ }
+};
+
template <class TFiller>
class TSimpleArrayConstructor: public IArrayBuilder {
private:
@@ -33,7 +51,7 @@ private:
const TFiller Filler;
protected:
virtual std::shared_ptr<arrow::Array> DoBuildArray(const ui32 recordsCount) const override {
- TBuilder fBuilder = TBuilder();
+ TBuilder fBuilder = TFillerBuilderConstructor<typename TFiller::TValue>::Construct();
Y_VERIFY(fBuilder.Reserve(recordsCount).ok());
for (ui32 i = 0; i < recordsCount; ++i) {
Y_VERIFY(fBuilder.Append(Filler.GetValue(i)).ok());
diff --git a/ydb/core/formats/arrow/ut/CMakeLists.darwin-x86_64.txt b/ydb/core/formats/arrow/ut/CMakeLists.darwin-x86_64.txt
index f77bf86c419..8bc68ca411b 100644
--- a/ydb/core/formats/arrow/ut/CMakeLists.darwin-x86_64.txt
+++ b/ydb/core/formats/arrow/ut/CMakeLists.darwin-x86_64.txt
@@ -39,6 +39,7 @@ target_sources(ydb-core-formats-arrow-ut PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut_arrow.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut_program_step.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut/ut_dictionary.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut/ut_size_calcer.cpp
)
set_property(
TARGET
diff --git a/ydb/core/formats/arrow/ut/CMakeLists.linux-aarch64.txt b/ydb/core/formats/arrow/ut/CMakeLists.linux-aarch64.txt
index ad981e1dca2..2694f81d92f 100644
--- a/ydb/core/formats/arrow/ut/CMakeLists.linux-aarch64.txt
+++ b/ydb/core/formats/arrow/ut/CMakeLists.linux-aarch64.txt
@@ -42,6 +42,7 @@ target_sources(ydb-core-formats-arrow-ut PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut_arrow.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut_program_step.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut/ut_dictionary.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut/ut_size_calcer.cpp
)
set_property(
TARGET
diff --git a/ydb/core/formats/arrow/ut/CMakeLists.linux-x86_64.txt b/ydb/core/formats/arrow/ut/CMakeLists.linux-x86_64.txt
index de6ae69716f..cbe7a35919d 100644
--- a/ydb/core/formats/arrow/ut/CMakeLists.linux-x86_64.txt
+++ b/ydb/core/formats/arrow/ut/CMakeLists.linux-x86_64.txt
@@ -43,6 +43,7 @@ target_sources(ydb-core-formats-arrow-ut PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut_arrow.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut_program_step.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut/ut_dictionary.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut/ut_size_calcer.cpp
)
set_property(
TARGET
diff --git a/ydb/core/formats/arrow/ut/CMakeLists.windows-x86_64.txt b/ydb/core/formats/arrow/ut/CMakeLists.windows-x86_64.txt
index bcbbd96108b..2f1212b5f30 100644
--- a/ydb/core/formats/arrow/ut/CMakeLists.windows-x86_64.txt
+++ b/ydb/core/formats/arrow/ut/CMakeLists.windows-x86_64.txt
@@ -32,6 +32,7 @@ target_sources(ydb-core-formats-arrow-ut PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut_arrow.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut_program_step.cpp
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut/ut_dictionary.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/ut/ut_size_calcer.cpp
)
set_property(
TARGET
diff --git a/ydb/core/formats/arrow/ut/ut_size_calcer.cpp b/ydb/core/formats/arrow/ut/ut_size_calcer.cpp
new file mode 100644
index 00000000000..1b9835d8010
--- /dev/null
+++ b/ydb/core/formats/arrow/ut/ut_size_calcer.cpp
@@ -0,0 +1,44 @@
+#include <library/cpp/testing/unittest/registar.h>
+#include <ydb/core/formats/arrow/arrow_helpers.h>
+#include <ydb/core/formats/arrow/serializer/batch_only.h>
+#include <ydb/core/formats/arrow/serializer/full.h>
+#include <ydb/core/formats/arrow/simple_builder/array.h>
+#include <ydb/core/formats/arrow/simple_builder/batch.h>
+#include <ydb/core/formats/arrow/simple_builder/filler.h>
+#include <ydb/core/formats/arrow/dictionary/conversion.h>
+
+Y_UNIT_TEST_SUITE(SizeCalcer) {
+
+ using namespace NKikimr::NArrow;
+
+ Y_UNIT_TEST(SimpleStrings) {
+ NConstruction::IArrayBuilder::TPtr column = std::make_shared<NConstruction::TSimpleArrayConstructor<NConstruction::TStringPoolFiller>>(
+ "field", NConstruction::TStringPoolFiller(1024, 512));
+ std::shared_ptr<arrow::RecordBatch> batch = NConstruction::TRecordBatchConstructor({ column }).BuildBatch(2048);
+ Cerr << GetBatchDataSize(batch) << Endl;
+ UNIT_ASSERT(GetBatchDataSize(batch) == 2048 * 512);
+ }
+
+ Y_UNIT_TEST(DictionaryStrings) {
+ NConstruction::IArrayBuilder::TPtr column = std::make_shared<NConstruction::TDictionaryArrayConstructor<NConstruction::TStringPoolFiller>>(
+ "field", NConstruction::TStringPoolFiller(1024, 512));
+ std::shared_ptr<arrow::RecordBatch> batch = NConstruction::TRecordBatchConstructor({ column }).BuildBatch(2048);
+ Cerr << GetBatchDataSize(batch) << Endl;
+ UNIT_ASSERT(GetBatchDataSize(batch) == 2048 * 512);
+ }
+
+ Y_UNIT_TEST(SimpleInt64) {
+ NConstruction::IArrayBuilder::TPtr column = std::make_shared<NConstruction::TSimpleArrayConstructor<NConstruction::TIntSeqFiller<arrow::Int64Type>>>("field");
+ std::shared_ptr<arrow::RecordBatch> batch = NConstruction::TRecordBatchConstructor({ column }).BuildBatch(2048);
+ Cerr << GetBatchDataSize(batch) << Endl;
+ UNIT_ASSERT(GetBatchDataSize(batch) == 2048 * 8);
+ }
+
+ Y_UNIT_TEST(SimpleTimestamp) {
+ NConstruction::IArrayBuilder::TPtr column = std::make_shared<NConstruction::TSimpleArrayConstructor<NConstruction::TIntSeqFiller<arrow::TimestampType>>>("field");
+ std::shared_ptr<arrow::RecordBatch> batch = NConstruction::TRecordBatchConstructor({ column }).BuildBatch(2048);
+ Cerr << GetBatchDataSize(batch) << Endl;
+ UNIT_ASSERT(GetBatchDataSize(batch) == 2048 * 8);
+ }
+
+};
diff --git a/ydb/core/formats/arrow/ut/ya.make b/ydb/core/formats/arrow/ut/ya.make
index cd2531b9723..4f51aac13bc 100644
--- a/ydb/core/formats/arrow/ut/ya.make
+++ b/ydb/core/formats/arrow/ut/ya.make
@@ -26,6 +26,7 @@ SRCS(
ut_arrow.cpp
ut_program_step.cpp
ut_dictionary.cpp
+ ut_size_calcer.cpp
)
END()