diff options
author | ivanmorozov <ivanmorozov@yandex-team.com> | 2023-08-11 10:35:37 +0300 |
---|---|---|
committer | ivanmorozov <ivanmorozov@yandex-team.com> | 2023-08-11 11:19:46 +0300 |
commit | 24346716eec298b8e568074e63f01e89b304aaae (patch) | |
tree | 734be65f4036fc7f22398be8b7e14cf1a64ff115 | |
parent | fd7c47251710d4b4ada857f26ac35460e65c3422 (diff) | |
download | ydb-24346716eec298b8e568074e63f01e89b304aaae.tar.gz |
KIKIMR-19019: temporary fix before remove splitting into CS
-rw-r--r-- | ydb/core/formats/arrow/size_calcer.cpp | 5 | ||||
-rw-r--r-- | ydb/core/formats/arrow/size_calcer.h | 2 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp | 5 | ||||
-rw-r--r-- | ydb/core/tx/ev_write/columnshard_splitter.h | 4 |
4 files changed, 14 insertions, 2 deletions
diff --git a/ydb/core/formats/arrow/size_calcer.cpp b/ydb/core/formats/arrow/size_calcer.cpp index 9f9e98fc72..56a8ee7c29 100644 --- a/ydb/core/formats/arrow/size_calcer.cpp +++ b/ydb/core/formats/arrow/size_calcer.cpp @@ -4,6 +4,7 @@ #include "dictionary/conversion.h" #include <contrib/libs/apache/arrow/cpp/src/arrow/type.h> #include <util/system/yassert.h> +#include <util/string/builder.h> namespace NKikimr::NArrow { @@ -261,4 +262,8 @@ bool TSerializedBatch::BuildWithLimit(std::shared_ptr<arrow::RecordBatch> batch, return true; } +TString TSerializedBatch::DebugString() const { + return TStringBuilder() << "(data_size=" << Data.size() << ";schema_data_size=" << SchemaData.size() << ";rows_count=" << RowsCount << ";raw_bytes=" << RawBytes << ";)"; +} + } diff --git a/ydb/core/formats/arrow/size_calcer.h b/ydb/core/formats/arrow/size_calcer.h index f17566c85c..a7ca8095bf 100644 --- a/ydb/core/formats/arrow/size_calcer.h +++ b/ydb/core/formats/arrow/size_calcer.h @@ -56,6 +56,8 @@ public: return Data.size(); } + TString DebugString() const; + static bool BuildWithLimit(std::shared_ptr<arrow::RecordBatch> batch, const ui32 sizeLimit, std::vector<TSerializedBatch>& result, TString* errorMessage); static bool BuildWithLimit(std::shared_ptr<arrow::RecordBatch> batch, const ui32 sizeLimit, std::optional<TSerializedBatch>& sbL, std::optional<TSerializedBatch>& sbR, TString* errorMessage); static TSerializedBatch Build(std::shared_ptr<arrow::RecordBatch> batch); diff --git a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp index 0ab5872d39..cf1037451f 100644 --- a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp +++ b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp @@ -51,6 +51,11 @@ bool TIndexedWriteController::TBlobConstructor::Init() { return false; } BlobsSplitted = splitResult.ReleaseResult(); + if (BlobsSplitted.size() > 1) { + for (auto&& i : BlobsSplitted) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "strange_blobs_splitting")("blob", i.DebugString())("original_size", Owner.WriteData.GetSize()); + } + } return true; } diff --git a/ydb/core/tx/ev_write/columnshard_splitter.h b/ydb/core/tx/ev_write/columnshard_splitter.h index 4d6026e71d..6e41a6b2ec 100644 --- a/ydb/core/tx/ev_write/columnshard_splitter.h +++ b/ydb/core/tx/ev_write/columnshard_splitter.h @@ -115,7 +115,7 @@ private: TFullSplitData result(numShards); if (numShards == 1) { - NArrow::TSplitBlobResult blobsSplitted = NArrow::SplitByBlobSize(batch, NColumnShard::TLimits::GetMaxBlobSize()); + NArrow::TSplitBlobResult blobsSplitted = NArrow::SplitByBlobSize(batch, NColumnShard::TLimits::GetMaxBlobSize() - 1024 * 1024); if (!blobsSplitted) { return TYdbConclusionStatus::Fail(Ydb::StatusIds::SCHEME_ERROR, "cannot split batch in according to limits: " + blobsSplitted.GetErrorMessage()); } @@ -143,7 +143,7 @@ private: THashMap<ui64, TString> out; for (size_t i = 0; i < sharded.size(); ++i) { if (sharded[i]) { - NArrow::TSplitBlobResult blobsSplitted = NArrow::SplitByBlobSize(sharded[i], NColumnShard::TLimits::GetMaxBlobSize()); + NArrow::TSplitBlobResult blobsSplitted = NArrow::SplitByBlobSize(sharded[i], NColumnShard::TLimits::GetMaxBlobSize() - 1024 * 1024); if (!blobsSplitted) { return TYdbConclusionStatus::Fail(Ydb::StatusIds::SCHEME_ERROR, "cannot split batch in according to limits: " + blobsSplitted.GetErrorMessage()); } |