diff options
author | ivanmorozov <ivanmorozov@yandex-team.com> | 2023-10-01 10:47:51 +0300 |
---|---|---|
committer | ivanmorozov <ivanmorozov@yandex-team.com> | 2023-10-01 11:13:29 +0300 |
commit | 2e588fa7b028551dfe9aa1d43b28eddec9d5534d (patch) | |
tree | 8c4715e3aadf60544d867d2195e89aba324342a6 | |
parent | 11dd57fd9a283200b4512930ce11839b8dbc0aac (diff) | |
download | ydb-2e588fa7b028551dfe9aa1d43b28eddec9d5534d.tar.gz |
KIKIMR-19218: fix huge memory consumption
additional validation
7 files changed, 31 insertions, 17 deletions
diff --git a/ydb/core/formats/arrow/serializer/full.cpp b/ydb/core/formats/arrow/serializer/full.cpp index b41b32a2b22..afaabc151b2 100644 --- a/ydb/core/formats/arrow/serializer/full.cpp +++ b/ydb/core/formats/arrow/serializer/full.cpp @@ -35,7 +35,12 @@ arrow::Result<std::shared_ptr<arrow::RecordBatch>> TFullDataDeserializer::DoDese TString TFullDataSerializer::DoSerialize(const std::shared_ptr<arrow::RecordBatch>& batch) const { TString result; - result.reserve(64u << 10); + { + arrow::io::MockOutputStream mock; + auto writer = TStatusValidator::GetValid(arrow::ipc::MakeStreamWriter(&mock, batch->schema(), Options)); + TStatusValidator::Validate(writer->WriteRecordBatch(*batch)); + result.reserve(mock.GetExtentBytesWritten()); + } { TStringOutputStream stream(&result); auto writer = TStatusValidator::GetValid(arrow::ipc::MakeStreamWriter(&stream, batch->schema(), Options)); diff --git a/ydb/core/formats/arrow/switch/CMakeLists.darwin-x86_64.txt b/ydb/core/formats/arrow/switch/CMakeLists.darwin-x86_64.txt index fb4dc6e1cc9..4f9ac4adff3 100644 --- a/ydb/core/formats/arrow/switch/CMakeLists.darwin-x86_64.txt +++ b/ydb/core/formats/arrow/switch/CMakeLists.darwin-x86_64.txt @@ -13,6 +13,7 @@ target_link_libraries(formats-arrow-switch PUBLIC yutil libs-apache-arrow ydb-core-scheme_types + cpp-actors-core ) target_sources(formats-arrow-switch PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/switch/switch_type.cpp diff --git a/ydb/core/formats/arrow/switch/CMakeLists.linux-aarch64.txt b/ydb/core/formats/arrow/switch/CMakeLists.linux-aarch64.txt index 6e6f464e616..7eed9a1f2e4 100644 --- a/ydb/core/formats/arrow/switch/CMakeLists.linux-aarch64.txt +++ b/ydb/core/formats/arrow/switch/CMakeLists.linux-aarch64.txt @@ -14,6 +14,7 @@ target_link_libraries(formats-arrow-switch PUBLIC yutil libs-apache-arrow ydb-core-scheme_types + cpp-actors-core ) target_sources(formats-arrow-switch PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/switch/switch_type.cpp diff --git a/ydb/core/formats/arrow/switch/CMakeLists.linux-x86_64.txt b/ydb/core/formats/arrow/switch/CMakeLists.linux-x86_64.txt index 6e6f464e616..7eed9a1f2e4 100644 --- a/ydb/core/formats/arrow/switch/CMakeLists.linux-x86_64.txt +++ b/ydb/core/formats/arrow/switch/CMakeLists.linux-x86_64.txt @@ -14,6 +14,7 @@ target_link_libraries(formats-arrow-switch PUBLIC yutil libs-apache-arrow ydb-core-scheme_types + cpp-actors-core ) target_sources(formats-arrow-switch PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/switch/switch_type.cpp diff --git a/ydb/core/formats/arrow/switch/CMakeLists.windows-x86_64.txt b/ydb/core/formats/arrow/switch/CMakeLists.windows-x86_64.txt index fb4dc6e1cc9..4f9ac4adff3 100644 --- a/ydb/core/formats/arrow/switch/CMakeLists.windows-x86_64.txt +++ b/ydb/core/formats/arrow/switch/CMakeLists.windows-x86_64.txt @@ -13,6 +13,7 @@ target_link_libraries(formats-arrow-switch PUBLIC yutil libs-apache-arrow ydb-core-scheme_types + cpp-actors-core ) target_sources(formats-arrow-switch PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/formats/arrow/switch/switch_type.cpp diff --git a/ydb/core/formats/arrow/switch/switch_type.h b/ydb/core/formats/arrow/switch/switch_type.h index 7e0f98a5b97..05741a62280 100644 --- a/ydb/core/formats/arrow/switch/switch_type.h +++ b/ydb/core/formats/arrow/switch/switch_type.h @@ -1,8 +1,10 @@ #pragma once -#include <contrib/libs/apache/arrow/cpp/src/arrow/api.h> -#include <util/system/yassert.h> #include <ydb/core/scheme_types/scheme_type_info.h> #include <ydb/core/scheme/scheme_type_id.h> +#include <ydb/core/formats/arrow/common/validation.h> + +#include <contrib/libs/apache/arrow/cpp/src/arrow/api.h> +#include <util/system/yassert.h> namespace NKikimr::NArrow { @@ -196,32 +198,32 @@ template <typename T> bool Append(arrow::ArrayBuilder& builder, const typename T::c_type& value) { using TBuilder = typename arrow::TypeTraits<T>::BuilderType; - auto status = static_cast<TBuilder&>(builder).Append(value); - return status.ok(); + TStatusValidator::Validate(static_cast<TBuilder&>(builder).Append(value)); + return true; } template <typename T> bool Append(arrow::ArrayBuilder& builder, arrow::util::string_view value) { using TBuilder = typename arrow::TypeTraits<T>::BuilderType; - auto status = static_cast<TBuilder&>(builder).Append(value); - return status.ok(); + TStatusValidator::Validate(static_cast<TBuilder&>(builder).Append(value)); + return true; } template <typename T> bool Append(arrow::ArrayBuilder& builder, const typename T::c_type* values, size_t size) { using TBuilder = typename arrow::NumericBuilder<T>; - auto status = static_cast<TBuilder&>(builder).AppendValues(values, size); - return status.ok(); + TStatusValidator::Validate(static_cast<TBuilder&>(builder).AppendValues(values, size)); + return true; } template <typename T> bool Append(arrow::ArrayBuilder& builder, const std::vector<typename T::c_type>& values) { using TBuilder = typename arrow::NumericBuilder<T>; - auto status = static_cast<TBuilder&>(builder).AppendValues(values.data(), values.size()); - return status.ok(); + TStatusValidator::Validate(static_cast<TBuilder&>(builder).AppendValues(values.data(), values.size())); + return true; } template <typename T> @@ -235,27 +237,29 @@ bool Append(T& builder, const arrow::Array& array, int position, ui64* recordSiz auto& typedBuilder = static_cast<TBuilder&>(builder); if (typedArray.IsNull(position)) { - auto status = typedBuilder.AppendNull(); + TStatusValidator::Validate(typedBuilder.AppendNull()); if (recordSize) { *recordSize += 4; } - return status.ok(); + return true; } else { if constexpr (!arrow::has_string_view<typename TWrap::T>::value) { - auto status = typedBuilder.Append(typedArray.GetView(position)); + TStatusValidator::Validate(typedBuilder.Append(typedArray.GetView(position))); if (recordSize) { *recordSize += sizeof(typedArray.GetView(position)); } - return status.ok(); + return true; } if constexpr (arrow::has_string_view<typename TWrap::T>::value) { - auto status = typedBuilder.Append(typedArray.GetView(position)); + TStatusValidator::Validate(typedBuilder.Append(typedArray.GetView(position))); if (recordSize) { *recordSize += typedArray.GetView(position).size(); } - return status.ok(); + return true; } } + Y_VERIFY(false, "unpredictable variant"); + return false; }); } diff --git a/ydb/core/formats/arrow/switch/ya.make b/ydb/core/formats/arrow/switch/ya.make index e3736d0b1a8..6865cd04220 100644 --- a/ydb/core/formats/arrow/switch/ya.make +++ b/ydb/core/formats/arrow/switch/ya.make @@ -3,6 +3,7 @@ LIBRARY() PEERDIR( contrib/libs/apache/arrow ydb/core/scheme_types + library/cpp/actors/core ) SRCS( |