aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornsofya <nsofya@yandex-team.com>2023-08-02 21:32:21 +0300
committernsofya <nsofya@yandex-team.com>2023-08-02 21:32:21 +0300
commite51b96599181f0c806cea4bbade9c5ef7c231df7 (patch)
tree2bc57df304913127c636b9ede56619e254d5f683
parente1858b92f55a7b35864002810b6f0b84ff5c6001 (diff)
downloadydb-e51b96599181f0c806cea4bbade9c5ef7c231df7.tar.gz
KIKIMR-18343: Remove BatchCache
Убрала BatchCache по нескольким присинам 1) Маленький объем 8 мб (на фоне рейса записи в логи) 2) По факту не работал на при SetupIndexation (где он мог потенциально экономить время на паркинге) 3) Вклад в поиск учитывая объем незначителен 4) и основное: на текущий момент усложняет внесение разбивки данных на уровень колумншарда, выигрыша после разбиения битве не будет, их придется парить повторно в любом случае 5) Если в нем все-таки возникнет необходимость: то лучше его внести в InsertTable
-rw-r--r--ydb/core/tx/columnshard/columnshard.cpp1
-rw-r--r--ydb/core/tx/columnshard/columnshard__progress_tx.cpp5
-rw-r--r--ydb/core/tx/columnshard/columnshard__read.cpp2
-rw-r--r--ydb/core/tx/columnshard/columnshard__read_base.cpp3
-rw-r--r--ydb/core/tx/columnshard/columnshard__read_base.h1
-rw-r--r--ydb/core/tx/columnshard/columnshard__scan.cpp2
-rw-r--r--ydb/core/tx/columnshard/columnshard__write.cpp4
-rw-r--r--ydb/core/tx/columnshard/columnshard_impl.cpp6
-rw-r--r--ydb/core/tx/columnshard/columnshard_impl.h1
-rw-r--r--ydb/core/tx/columnshard/columnshard_private_events.h4
-rw-r--r--ydb/core/tx/columnshard/engines/changes/indexation.cpp1
-rw-r--r--ydb/core/tx/columnshard/engines/reader/read_metadata.cpp15
-rw-r--r--ydb/core/tx/columnshard/engines/reader/read_metadata.h5
-rw-r--r--ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp8
-rw-r--r--ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h2
-rw-r--r--ydb/core/tx/columnshard/operations/write.cpp3
16 files changed, 12 insertions, 51 deletions
diff --git a/ydb/core/tx/columnshard/columnshard.cpp b/ydb/core/tx/columnshard/columnshard.cpp
index 32792aed4c..da1892ee2e 100644
--- a/ydb/core/tx/columnshard/columnshard.cpp
+++ b/ydb/core/tx/columnshard/columnshard.cpp
@@ -256,7 +256,6 @@ ui64 TColumnShard::MemoryUsage() const {
TabletCounters->Simple()[COUNTER_PREPARED_RECORDS].Get() * sizeof(NOlap::TInsertedData) +
TabletCounters->Simple()[COUNTER_COMMITTED_RECORDS].Get() * sizeof(NOlap::TInsertedData);
memory += TablesManager.GetMemoryUsage();
- memory += BatchCache.Bytes();
return memory;
}
diff --git a/ydb/core/tx/columnshard/columnshard__progress_tx.cpp b/ydb/core/tx/columnshard/columnshard__progress_tx.cpp
index ece932faeb..c403116e0b 100644
--- a/ydb/core/tx/columnshard/columnshard__progress_tx.cpp
+++ b/ydb/core/tx/columnshard/columnshard__progress_tx.cpp
@@ -102,11 +102,6 @@ public:
TBlobGroupSelector dsGroupSelector(Self->Info());
NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector);
- // CacheInserted -> CacheCommitted
- for (auto& writeId : meta.WriteIds) {
- Self->BatchCache.Commit(writeId);
- }
-
auto pathExists = [&](ui64 pathId) {
return Self->TablesManager.HasTable(pathId);
};
diff --git a/ydb/core/tx/columnshard/columnshard__read.cpp b/ydb/core/tx/columnshard/columnshard__read.cpp
index 466d4db639..a2182f63e4 100644
--- a/ydb/core/tx/columnshard/columnshard__read.cpp
+++ b/ydb/core/tx/columnshard/columnshard__read.cpp
@@ -92,7 +92,7 @@ bool TTxRead::Execute(TTransactionContext& txc, const TActorContext& ctx) {
std::shared_ptr<NOlap::TReadMetadata> metadata;
if (parseResult) {
- metadata = PrepareReadMetadata(read, Self->InsertTable, Self->TablesManager.GetPrimaryIndex(), Self->BatchCache,
+ metadata = PrepareReadMetadata(read, Self->InsertTable, Self->TablesManager.GetPrimaryIndex(),
ErrorDescription, false);
}
diff --git a/ydb/core/tx/columnshard/columnshard__read_base.cpp b/ydb/core/tx/columnshard/columnshard__read_base.cpp
index 6ebc47ddc7..46b3a1ccf4 100644
--- a/ydb/core/tx/columnshard/columnshard__read_base.cpp
+++ b/ydb/core/tx/columnshard/columnshard__read_base.cpp
@@ -9,7 +9,6 @@ std::shared_ptr<NOlap::TReadMetadata>
TTxReadBase::PrepareReadMetadata(const NOlap::TReadDescription& read,
const std::unique_ptr<NOlap::TInsertTable>& insertTable,
const std::unique_ptr<NOlap::IColumnEngine>& index,
- const TBatchCache& batchCache,
TString& error, const bool isReverse) const {
if (!insertTable || !index) {
return nullptr;
@@ -20,7 +19,7 @@ TTxReadBase::PrepareReadMetadata(const NOlap::TReadDescription& read,
return nullptr;
}
- NOlap::TDataStorageAccessor dataAccessor(insertTable, index, batchCache);
+ NOlap::TDataStorageAccessor dataAccessor(insertTable, index);
auto readMetadata = std::make_shared<NOlap::TReadMetadata>(index->GetVersionedIndex(), read.GetSnapshot(),
isReverse ? NOlap::TReadMetadata::ESorting::DESC : NOlap::TReadMetadata::ESorting::ASC, read.GetProgram());
diff --git a/ydb/core/tx/columnshard/columnshard__read_base.h b/ydb/core/tx/columnshard/columnshard__read_base.h
index 148971bb22..15347cba87 100644
--- a/ydb/core/tx/columnshard/columnshard__read_base.h
+++ b/ydb/core/tx/columnshard/columnshard__read_base.h
@@ -15,7 +15,6 @@ protected:
const NOlap::TReadDescription& readDescription,
const std::unique_ptr<NOlap::TInsertTable>& insertTable,
const std::unique_ptr<NOlap::IColumnEngine>& index,
- const TBatchCache& batchCache,
TString& error, const bool isReverse) const;
protected:
diff --git a/ydb/core/tx/columnshard/columnshard__scan.cpp b/ydb/core/tx/columnshard/columnshard__scan.cpp
index 465738719f..570cebfb50 100644
--- a/ydb/core/tx/columnshard/columnshard__scan.cpp
+++ b/ydb/core/tx/columnshard/columnshard__scan.cpp
@@ -832,7 +832,7 @@ std::shared_ptr<NOlap::TReadMetadataBase> TTxScan::CreateReadMetadata(NOlap::TRe
if (indexStats) {
metadata = PrepareStatsReadMetadata(Self->TabletID(), read, Self->TablesManager.GetPrimaryIndex(), ErrorDescription, isReverse);
} else {
- metadata = PrepareReadMetadata(read, Self->InsertTable, Self->TablesManager.GetPrimaryIndex(), Self->BatchCache,
+ metadata = PrepareReadMetadata(read, Self->InsertTable, Self->TablesManager.GetPrimaryIndex(),
ErrorDescription, isReverse);
}
diff --git a/ydb/core/tx/columnshard/columnshard__write.cpp b/ydb/core/tx/columnshard/columnshard__write.cpp
index 9e5c688ca7..84eef1582f 100644
--- a/ydb/core/tx/columnshard/columnshard__write.cpp
+++ b/ydb/core/tx/columnshard/columnshard__write.cpp
@@ -96,10 +96,6 @@ bool TTxWrite::Execute(TTransactionContext& txc, const TActorContext&) {
Y_VERIFY(logoBlobId.BlobSize() == data.size());
NBlobCache::AddRangeToCache(NBlobCache::TBlobRange(logoBlobId, 0, data.size()), data);
- // Put new data into batch cache
- Y_VERIFY(blobData.GetParsedBatch());
- Self->BatchCache.Insert(TWriteId(writeId), logoBlobId, blobData.GetParsedBatch());
-
Self->UpdateInsertTableCounters();
const auto& blobBatch(PutBlobResult->Get()->GetPutResult().GetBlobBatch());
diff --git a/ydb/core/tx/columnshard/columnshard_impl.cpp b/ydb/core/tx/columnshard/columnshard_impl.cpp
index 445a47c382..186fce8b66 100644
--- a/ydb/core/tx/columnshard/columnshard_impl.cpp
+++ b/ydb/core/tx/columnshard/columnshard_impl.cpp
@@ -390,7 +390,6 @@ void TColumnShard::TryAbortWrites(NIceDb::TNiceDb& db, NOlap::TDbWrapper& dbTabl
if (!RemoveLongTxWrite(db, writeId)) {
failedAborts.push_back(writeId);
}
- BatchCache.EraseInserted(TWriteId(writeId));
}
for (auto& writeId : failedAborts) {
writesToAbort.erase(writeId);
@@ -711,7 +710,6 @@ void TColumnShard::SetupIndexation() {
LOG_S_DEBUG("Few data for indexation (" << bytesToIndex << " bytes in " << blobs << " blobs, ignored "
<< ignored << ") at tablet " << TabletID());
- // Force small indexations sometimes to keep BatchCache smaller
if (!bytesToIndex || SkippedIndexations < TSettings::MAX_INDEXATIONS_TO_SKIP) {
++SkippedIndexations;
return;
@@ -729,10 +727,6 @@ void TColumnShard::SetupIndexation() {
data.reserve(dataToIndex.size());
for (auto& ptr : dataToIndex) {
data.push_back(*ptr);
- if (auto inserted = BatchCache.GetInserted(TWriteId(ptr->WriteTxId)); inserted.second) {
- Y_VERIFY(ptr->BlobId == inserted.first);
- cachedBlobs.emplace(ptr->BlobId, inserted.second);
- }
}
Y_VERIFY(data.size());
diff --git a/ydb/core/tx/columnshard/columnshard_impl.h b/ydb/core/tx/columnshard/columnshard_impl.h
index db13a0b6ac..42e560fe9f 100644
--- a/ydb/core/tx/columnshard/columnshard_impl.h
+++ b/ydb/core/tx/columnshard/columnshard_impl.h
@@ -517,7 +517,6 @@ private:
TTabletCountersBase* TabletCounters;
std::unique_ptr<NTabletPipe::IClientCache> PipeClientCache;
std::unique_ptr<NOlap::TInsertTable> InsertTable;
- TBatchCache BatchCache;
const TScanCounters ReadCounters;
const TScanCounters ScanCounters;
const TIndexationCounters IndexationCounters = TIndexationCounters("Indexation");
diff --git a/ydb/core/tx/columnshard/columnshard_private_events.h b/ydb/core/tx/columnshard/columnshard_private_events.h
index 396376c921..fa62a2478e 100644
--- a/ydb/core/tx/columnshard/columnshard_private_events.h
+++ b/ydb/core/tx/columnshard/columnshard_private_events.h
@@ -254,15 +254,13 @@ struct TEvPrivate {
class TPutBlobData {
YDB_READONLY_DEF(TUnifiedBlobId, BlobId);
YDB_READONLY_DEF(TString, BlobData);
- YDB_READONLY_DEF(std::shared_ptr<arrow::RecordBatch>, ParsedBatch);
YDB_ACCESSOR_DEF(TString, LogicalMeta);
public:
TPutBlobData() = default;
- TPutBlobData(const TUnifiedBlobId& blobId, const TString& data, const std::shared_ptr<arrow::RecordBatch>& batch)
+ TPutBlobData(const TUnifiedBlobId& blobId, const TString& data)
: BlobId(blobId)
, BlobData(data)
- , ParsedBatch(batch)
{}
};
diff --git a/ydb/core/tx/columnshard/engines/changes/indexation.cpp b/ydb/core/tx/columnshard/engines/changes/indexation.cpp
index df891517b4..1d72dad3bc 100644
--- a/ydb/core/tx/columnshard/engines/changes/indexation.cpp
+++ b/ydb/core/tx/columnshard/engines/changes/indexation.cpp
@@ -33,7 +33,6 @@ void TInsertColumnEngineChanges::DoWriteIndex(NColumnShard::TColumnShard& self,
for (const auto& cmtd : DataToIndex) {
self.InsertTable->EraseCommitted(context.DBWrapper, cmtd);
self.BlobManager->DeleteBlob(cmtd.BlobId, *context.BlobManagerDb);
- self.BatchCache.EraseCommitted(cmtd.BlobId);
}
if (!DataToIndex.empty()) {
self.UpdateInsertTableCounters();
diff --git a/ydb/core/tx/columnshard/engines/reader/read_metadata.cpp b/ydb/core/tx/columnshard/engines/reader/read_metadata.cpp
index 3fd405fc31..5c46f9eefe 100644
--- a/ydb/core/tx/columnshard/engines/reader/read_metadata.cpp
+++ b/ydb/core/tx/columnshard/engines/reader/read_metadata.cpp
@@ -10,11 +10,9 @@
namespace NKikimr::NOlap {
TDataStorageAccessor::TDataStorageAccessor(const std::unique_ptr<NOlap::TInsertTable>& insertTable,
- const std::unique_ptr<NOlap::IColumnEngine>& index,
- const NColumnShard::TBatchCache& batchCache)
+ const std::unique_ptr<NOlap::IColumnEngine>& index)
: InsertTable(insertTable)
, Index(index)
- , BatchCache(batchCache)
{}
std::shared_ptr<NOlap::TSelectInfo> TDataStorageAccessor::Select(const NOlap::TReadDescription& readDescription, const THashSet<ui32>& columnIds) const {
@@ -31,10 +29,6 @@ std::vector<NOlap::TCommittedBlob> TDataStorageAccessor::GetCommitedBlobs(const
return std::move(InsertTable->Read(readDescription.PathId, readDescription.GetSnapshot()));
}
-std::shared_ptr<arrow::RecordBatch> TDataStorageAccessor::GetCachedBatch(const TUnifiedBlobId& blobId) const {
- return BatchCache.Get(blobId);
-}
-
std::unique_ptr<NColumnShard::TScanIteratorBase> TReadMetadata::StartScan(const NOlap::TReadContext& readContext) const {
return std::make_unique<NColumnShard::TColumnShardScanIterator>(this->shared_from_this(), readContext);
}
@@ -92,12 +86,7 @@ bool TReadMetadata::Init(const TReadDescription& readDescription, const TDataSto
}
CommittedBlobs = dataAccessor.GetCommitedBlobs(readDescription);
- for (auto& cmt : CommittedBlobs) {
- if (auto batch = dataAccessor.GetCachedBatch(cmt.GetBlobId())) {
- CommittedBatches.emplace(cmt.GetBlobId(), batch);
- }
- }
-
+
THashSet<ui32> columnIds;
for (auto& columnId : AllColumns) {
columnIds.insert(columnId);
diff --git a/ydb/core/tx/columnshard/engines/reader/read_metadata.h b/ydb/core/tx/columnshard/engines/reader/read_metadata.h
index af5debd211..355e033968 100644
--- a/ydb/core/tx/columnshard/engines/reader/read_metadata.h
+++ b/ydb/core/tx/columnshard/engines/reader/read_metadata.h
@@ -58,15 +58,12 @@ class TDataStorageAccessor {
private:
const std::unique_ptr<NOlap::TInsertTable>& InsertTable;
const std::unique_ptr<NOlap::IColumnEngine>& Index;
- const NColumnShard::TBatchCache& BatchCache;
public:
TDataStorageAccessor(const std::unique_ptr<NOlap::TInsertTable>& insertTable,
- const std::unique_ptr<NOlap::IColumnEngine>& index,
- const NColumnShard::TBatchCache& batchCache);
+ const std::unique_ptr<NOlap::IColumnEngine>& index);
std::shared_ptr<NOlap::TSelectInfo> Select(const NOlap::TReadDescription& readDescription, const THashSet<ui32>& columnIds) const;
std::vector<NOlap::TCommittedBlob> GetCommitedBlobs(const NOlap::TReadDescription& readDescription) const;
- std::shared_ptr<arrow::RecordBatch> GetCachedBatch(const TUnifiedBlobId& blobId) const;
};
// Holds all metadata that is needed to perform read/scan
diff --git a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp
index 20170d772e..164661a4c1 100644
--- a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp
+++ b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp
@@ -49,7 +49,7 @@ IBlobConstructor::EStatus TIndexedWriteController::TBlobConstructor::BuildNext()
bool TIndexedWriteController::TBlobConstructor::RegisterBlobId(const TUnifiedBlobId& blobId) {
Y_VERIFY(blobId.BlobSize() == DataPrepared.size());
- Owner.AddBlob(NColumnShard::TEvPrivate::TEvWriteBlobsResult::TPutBlobData(blobId, DataPrepared, Batch));
+ Owner.AddBlob(NColumnShard::TEvPrivate::TEvWriteBlobsResult::TPutBlobData(blobId, DataPrepared), Batch->num_rows(), NArrow::GetBatchDataSize(Batch));
return true;
}
@@ -72,14 +72,14 @@ void TIndexedWriteController::DoOnReadyResult(const NActors::TActorContext& ctx,
}
}
-void TIndexedWriteController::AddBlob(NColumnShard::TEvPrivate::TEvWriteBlobsResult::TPutBlobData&& data) {
+void TIndexedWriteController::AddBlob(NColumnShard::TEvPrivate::TEvWriteBlobsResult::TPutBlobData&& data, const ui64 numRows, const ui64 batchSize) {
Y_VERIFY(BlobData.empty());
ui64 dirtyTime = AppData()->TimeProvider->Now().Seconds();
Y_VERIFY(dirtyTime);
NKikimrTxColumnShard::TLogicalMetadata outMeta;
- outMeta.SetNumRows(data.GetParsedBatch()->num_rows());
- outMeta.SetRawBytes(NArrow::GetBatchDataSize(data.GetParsedBatch()));
+ outMeta.SetNumRows(numRows);
+ outMeta.SetRawBytes(batchSize);
outMeta.SetDirtyWriteTimeSeconds(dirtyTime);
TString metaString;
diff --git a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h
index 5d21284db8..659420c714 100644
--- a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h
+++ b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h
@@ -46,7 +46,7 @@ public:
}
public:
- void AddBlob(NColumnShard::TEvPrivate::TEvWriteBlobsResult::TPutBlobData&& data);
+ void AddBlob(NColumnShard::TEvPrivate::TEvWriteBlobsResult::TPutBlobData&& data, const ui64 numRows, const ui64 batchSize);
};
}
diff --git a/ydb/core/tx/columnshard/operations/write.cpp b/ydb/core/tx/columnshard/operations/write.cpp
index b7d01e4b55..59878c0e7d 100644
--- a/ydb/core/tx/columnshard/operations/write.cpp
+++ b/ydb/core/tx/columnshard/operations/write.cpp
@@ -35,8 +35,6 @@ namespace NKikimr::NColumnShard {
TBlobGroupSelector dsGroupSelector(owner.Info());
NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector);
- owner.BatchCache.Commit(WriteId);
-
auto pathExists = [&](ui64 pathId) {
return owner.TablesManager.HasTable(pathId);
};
@@ -60,7 +58,6 @@ namespace NKikimr::NColumnShard {
void TWriteOperation::Abort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) const {
Y_VERIFY(Status == EOperationStatus::Prepared);
- owner.BatchCache.EraseInserted(WriteId);
TBlobGroupSelector dsGroupSelector(owner.Info());
NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector);