aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorivanmorozov <ivanmorozov@yandex-team.com>2023-10-01 10:47:51 +0300
committerivanmorozov <ivanmorozov@yandex-team.com>2023-10-01 11:13:29 +0300
commit2e588fa7b028551dfe9aa1d43b28eddec9d5534d (patch)
tree8c4715e3aadf60544d867d2195e89aba324342a6
parent11dd57fd9a283200b4512930ce11839b8dbc0aac (diff)
downloadydb-2e588fa7b028551dfe9aa1d43b28eddec9d5534d.tar.gz
KIKIMR-19218: fix huge memory consumption
additional validation
-rw-r--r--ydb/core/formats/arrow/serializer/full.cpp7
-rw-r--r--ydb/core/formats/arrow/switch/CMakeLists.darwin-x86_64.txt1
-rw-r--r--ydb/core/formats/arrow/switch/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/core/formats/arrow/switch/CMakeLists.linux-x86_64.txt1
-rw-r--r--ydb/core/formats/arrow/switch/CMakeLists.windows-x86_64.txt1
-rw-r--r--ydb/core/formats/arrow/switch/switch_type.h36
-rw-r--r--ydb/core/formats/arrow/switch/ya.make1
7 files changed, 31 insertions, 17 deletions
diff --git a/ydb/core/formats/arrow/serializer/full.cpp b/ydb/core/formats/arrow/serializer/full.cpp
index b41b32a2b22..afaabc151b2 100644
--- a/ydb/core/formats/arrow/serializer/full.cpp
+++ b/ydb/core/formats/arrow/serializer/full.cpp
@@ -35,7 +35,12 @@ arrow::Result<std::shared_ptr<arrow::RecordBatch>> TFullDataDeserializer::DoDese
TString TFullDataSerializer::DoSerialize(const std::shared_ptr<arrow::RecordBatch>& batch) const {
TString result;
- result.reserve(64u << 10);
+ {
+ arrow::io::MockOutputStream mock;
+ auto writer = TStatusValidator::GetValid(arrow::ipc::MakeStreamWriter(&mock, batch->schema(), Options));
+ TStatusValidator::Validate(writer->WriteRecordBatch(*batch));
+ result.reserve(mock.GetExtentBytesWritten());
+ }
{
TStringOutputStream stream(&result);
auto writer = TStatusValidator::GetValid(arrow::ipc::MakeStreamWriter(&stream, batch->schema(), Options));
diff --git a/ydb/core/formats/arrow/switch/CMakeLists.darwin-x86_64.txt b/ydb/core/formats/arrow/switch/CMakeLists.darwin-x86_64.txt
index fb4dc6e1cc9..4f9ac4adff3 100644
--- a/ydb/core/formats/arrow/switch/CMakeLists.darwin-x86_64.txt
+++ b/ydb/core/formats/arrow/switch/CMakeLists.darwin-x86_64.txt
@@ -13,6 +13,7 @@ target_link_libraries(formats-arrow-switch PUBLIC
yutil
libs-apache-arrow
ydb-core-scheme_types
+ cpp-actors-core
)
target_sources(formats-arrow-switch PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/switch/switch_type.cpp
diff --git a/ydb/core/formats/arrow/switch/CMakeLists.linux-aarch64.txt b/ydb/core/formats/arrow/switch/CMakeLists.linux-aarch64.txt
index 6e6f464e616..7eed9a1f2e4 100644
--- a/ydb/core/formats/arrow/switch/CMakeLists.linux-aarch64.txt
+++ b/ydb/core/formats/arrow/switch/CMakeLists.linux-aarch64.txt
@@ -14,6 +14,7 @@ target_link_libraries(formats-arrow-switch PUBLIC
yutil
libs-apache-arrow
ydb-core-scheme_types
+ cpp-actors-core
)
target_sources(formats-arrow-switch PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/switch/switch_type.cpp
diff --git a/ydb/core/formats/arrow/switch/CMakeLists.linux-x86_64.txt b/ydb/core/formats/arrow/switch/CMakeLists.linux-x86_64.txt
index 6e6f464e616..7eed9a1f2e4 100644
--- a/ydb/core/formats/arrow/switch/CMakeLists.linux-x86_64.txt
+++ b/ydb/core/formats/arrow/switch/CMakeLists.linux-x86_64.txt
@@ -14,6 +14,7 @@ target_link_libraries(formats-arrow-switch PUBLIC
yutil
libs-apache-arrow
ydb-core-scheme_types
+ cpp-actors-core
)
target_sources(formats-arrow-switch PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/switch/switch_type.cpp
diff --git a/ydb/core/formats/arrow/switch/CMakeLists.windows-x86_64.txt b/ydb/core/formats/arrow/switch/CMakeLists.windows-x86_64.txt
index fb4dc6e1cc9..4f9ac4adff3 100644
--- a/ydb/core/formats/arrow/switch/CMakeLists.windows-x86_64.txt
+++ b/ydb/core/formats/arrow/switch/CMakeLists.windows-x86_64.txt
@@ -13,6 +13,7 @@ target_link_libraries(formats-arrow-switch PUBLIC
yutil
libs-apache-arrow
ydb-core-scheme_types
+ cpp-actors-core
)
target_sources(formats-arrow-switch PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/switch/switch_type.cpp
diff --git a/ydb/core/formats/arrow/switch/switch_type.h b/ydb/core/formats/arrow/switch/switch_type.h
index 7e0f98a5b97..05741a62280 100644
--- a/ydb/core/formats/arrow/switch/switch_type.h
+++ b/ydb/core/formats/arrow/switch/switch_type.h
@@ -1,8 +1,10 @@
#pragma once
-#include <contrib/libs/apache/arrow/cpp/src/arrow/api.h>
-#include <util/system/yassert.h>
#include <ydb/core/scheme_types/scheme_type_info.h>
#include <ydb/core/scheme/scheme_type_id.h>
+#include <ydb/core/formats/arrow/common/validation.h>
+
+#include <contrib/libs/apache/arrow/cpp/src/arrow/api.h>
+#include <util/system/yassert.h>
namespace NKikimr::NArrow {
@@ -196,32 +198,32 @@ template <typename T>
bool Append(arrow::ArrayBuilder& builder, const typename T::c_type& value) {
using TBuilder = typename arrow::TypeTraits<T>::BuilderType;
- auto status = static_cast<TBuilder&>(builder).Append(value);
- return status.ok();
+ TStatusValidator::Validate(static_cast<TBuilder&>(builder).Append(value));
+ return true;
}
template <typename T>
bool Append(arrow::ArrayBuilder& builder, arrow::util::string_view value) {
using TBuilder = typename arrow::TypeTraits<T>::BuilderType;
- auto status = static_cast<TBuilder&>(builder).Append(value);
- return status.ok();
+ TStatusValidator::Validate(static_cast<TBuilder&>(builder).Append(value));
+ return true;
}
template <typename T>
bool Append(arrow::ArrayBuilder& builder, const typename T::c_type* values, size_t size) {
using TBuilder = typename arrow::NumericBuilder<T>;
- auto status = static_cast<TBuilder&>(builder).AppendValues(values, size);
- return status.ok();
+ TStatusValidator::Validate(static_cast<TBuilder&>(builder).AppendValues(values, size));
+ return true;
}
template <typename T>
bool Append(arrow::ArrayBuilder& builder, const std::vector<typename T::c_type>& values) {
using TBuilder = typename arrow::NumericBuilder<T>;
- auto status = static_cast<TBuilder&>(builder).AppendValues(values.data(), values.size());
- return status.ok();
+ TStatusValidator::Validate(static_cast<TBuilder&>(builder).AppendValues(values.data(), values.size()));
+ return true;
}
template <typename T>
@@ -235,27 +237,29 @@ bool Append(T& builder, const arrow::Array& array, int position, ui64* recordSiz
auto& typedBuilder = static_cast<TBuilder&>(builder);
if (typedArray.IsNull(position)) {
- auto status = typedBuilder.AppendNull();
+ TStatusValidator::Validate(typedBuilder.AppendNull());
if (recordSize) {
*recordSize += 4;
}
- return status.ok();
+ return true;
} else {
if constexpr (!arrow::has_string_view<typename TWrap::T>::value) {
- auto status = typedBuilder.Append(typedArray.GetView(position));
+ TStatusValidator::Validate(typedBuilder.Append(typedArray.GetView(position)));
if (recordSize) {
*recordSize += sizeof(typedArray.GetView(position));
}
- return status.ok();
+ return true;
}
if constexpr (arrow::has_string_view<typename TWrap::T>::value) {
- auto status = typedBuilder.Append(typedArray.GetView(position));
+ TStatusValidator::Validate(typedBuilder.Append(typedArray.GetView(position)));
if (recordSize) {
*recordSize += typedArray.GetView(position).size();
}
- return status.ok();
+ return true;
}
}
+ Y_VERIFY(false, "unpredictable variant");
+ return false;
});
}
diff --git a/ydb/core/formats/arrow/switch/ya.make b/ydb/core/formats/arrow/switch/ya.make
index e3736d0b1a8..6865cd04220 100644
--- a/ydb/core/formats/arrow/switch/ya.make
+++ b/ydb/core/formats/arrow/switch/ya.make
@@ -3,6 +3,7 @@ LIBRARY()
PEERDIR(
contrib/libs/apache/arrow
ydb/core/scheme_types
+ library/cpp/actors/core
)
SRCS(