aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorivanmorozov <ivanmorozov@yandex-team.com>2023-08-11 10:35:37 +0300
committerivanmorozov <ivanmorozov@yandex-team.com>2023-08-11 11:19:46 +0300
commit24346716eec298b8e568074e63f01e89b304aaae (patch)
tree734be65f4036fc7f22398be8b7e14cf1a64ff115
parentfd7c47251710d4b4ada857f26ac35460e65c3422 (diff)
downloadydb-24346716eec298b8e568074e63f01e89b304aaae.tar.gz
KIKIMR-19019: temporary fix before remove splitting into CS
-rw-r--r--ydb/core/formats/arrow/size_calcer.cpp5
-rw-r--r--ydb/core/formats/arrow/size_calcer.h2
-rw-r--r--ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp5
-rw-r--r--ydb/core/tx/ev_write/columnshard_splitter.h4
4 files changed, 14 insertions, 2 deletions
diff --git a/ydb/core/formats/arrow/size_calcer.cpp b/ydb/core/formats/arrow/size_calcer.cpp
index 9f9e98fc72..56a8ee7c29 100644
--- a/ydb/core/formats/arrow/size_calcer.cpp
+++ b/ydb/core/formats/arrow/size_calcer.cpp
@@ -4,6 +4,7 @@
#include "dictionary/conversion.h"
#include <contrib/libs/apache/arrow/cpp/src/arrow/type.h>
#include <util/system/yassert.h>
+#include <util/string/builder.h>
namespace NKikimr::NArrow {
@@ -261,4 +262,8 @@ bool TSerializedBatch::BuildWithLimit(std::shared_ptr<arrow::RecordBatch> batch,
return true;
}
+TString TSerializedBatch::DebugString() const {
+ return TStringBuilder() << "(data_size=" << Data.size() << ";schema_data_size=" << SchemaData.size() << ";rows_count=" << RowsCount << ";raw_bytes=" << RawBytes << ";)";
+}
+
}
diff --git a/ydb/core/formats/arrow/size_calcer.h b/ydb/core/formats/arrow/size_calcer.h
index f17566c85c..a7ca8095bf 100644
--- a/ydb/core/formats/arrow/size_calcer.h
+++ b/ydb/core/formats/arrow/size_calcer.h
@@ -56,6 +56,8 @@ public:
return Data.size();
}
+ TString DebugString() const;
+
static bool BuildWithLimit(std::shared_ptr<arrow::RecordBatch> batch, const ui32 sizeLimit, std::vector<TSerializedBatch>& result, TString* errorMessage);
static bool BuildWithLimit(std::shared_ptr<arrow::RecordBatch> batch, const ui32 sizeLimit, std::optional<TSerializedBatch>& sbL, std::optional<TSerializedBatch>& sbR, TString* errorMessage);
static TSerializedBatch Build(std::shared_ptr<arrow::RecordBatch> batch);
diff --git a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp
index 0ab5872d39..cf1037451f 100644
--- a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp
+++ b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp
@@ -51,6 +51,11 @@ bool TIndexedWriteController::TBlobConstructor::Init() {
return false;
}
BlobsSplitted = splitResult.ReleaseResult();
+ if (BlobsSplitted.size() > 1) {
+ for (auto&& i : BlobsSplitted) {
+ AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "strange_blobs_splitting")("blob", i.DebugString())("original_size", Owner.WriteData.GetSize());
+ }
+ }
return true;
}
diff --git a/ydb/core/tx/ev_write/columnshard_splitter.h b/ydb/core/tx/ev_write/columnshard_splitter.h
index 4d6026e71d..6e41a6b2ec 100644
--- a/ydb/core/tx/ev_write/columnshard_splitter.h
+++ b/ydb/core/tx/ev_write/columnshard_splitter.h
@@ -115,7 +115,7 @@ private:
TFullSplitData result(numShards);
if (numShards == 1) {
- NArrow::TSplitBlobResult blobsSplitted = NArrow::SplitByBlobSize(batch, NColumnShard::TLimits::GetMaxBlobSize());
+ NArrow::TSplitBlobResult blobsSplitted = NArrow::SplitByBlobSize(batch, NColumnShard::TLimits::GetMaxBlobSize() - 1024 * 1024);
if (!blobsSplitted) {
return TYdbConclusionStatus::Fail(Ydb::StatusIds::SCHEME_ERROR, "cannot split batch in according to limits: " + blobsSplitted.GetErrorMessage());
}
@@ -143,7 +143,7 @@ private:
THashMap<ui64, TString> out;
for (size_t i = 0; i < sharded.size(); ++i) {
if (sharded[i]) {
- NArrow::TSplitBlobResult blobsSplitted = NArrow::SplitByBlobSize(sharded[i], NColumnShard::TLimits::GetMaxBlobSize());
+ NArrow::TSplitBlobResult blobsSplitted = NArrow::SplitByBlobSize(sharded[i], NColumnShard::TLimits::GetMaxBlobSize() - 1024 * 1024);
if (!blobsSplitted) {
return TYdbConclusionStatus::Fail(Ydb::StatusIds::SCHEME_ERROR, "cannot split batch in according to limits: " + blobsSplitted.GetErrorMessage());
}