diff options
author | nsofya <nsofya@yandex-team.com> | 2023-08-02 21:32:21 +0300 |
---|---|---|
committer | nsofya <nsofya@yandex-team.com> | 2023-08-02 21:32:21 +0300 |
commit | e51b96599181f0c806cea4bbade9c5ef7c231df7 (patch) | |
tree | 2bc57df304913127c636b9ede56619e254d5f683 | |
parent | e1858b92f55a7b35864002810b6f0b84ff5c6001 (diff) | |
download | ydb-e51b96599181f0c806cea4bbade9c5ef7c231df7.tar.gz |
KIKIMR-18343: Remove BatchCache
Убрала BatchCache по нескольким присинам
1) Маленький объем 8 мб (на фоне рейса записи в логи)
2) По факту не работал на при SetupIndexation (где он мог потенциально экономить время на паркинге)
3) Вклад в поиск учитывая объем незначителен
4) и основное: на текущий момент усложняет внесение разбивки данных на уровень колумншарда, выигрыша после разбиения битве не будет, их придется парить повторно в любом случае
5) Если в нем все-таки возникнет необходимость: то лучше его внести в InsertTable
16 files changed, 12 insertions, 51 deletions
diff --git a/ydb/core/tx/columnshard/columnshard.cpp b/ydb/core/tx/columnshard/columnshard.cpp index 32792aed4c..da1892ee2e 100644 --- a/ydb/core/tx/columnshard/columnshard.cpp +++ b/ydb/core/tx/columnshard/columnshard.cpp @@ -256,7 +256,6 @@ ui64 TColumnShard::MemoryUsage() const { TabletCounters->Simple()[COUNTER_PREPARED_RECORDS].Get() * sizeof(NOlap::TInsertedData) + TabletCounters->Simple()[COUNTER_COMMITTED_RECORDS].Get() * sizeof(NOlap::TInsertedData); memory += TablesManager.GetMemoryUsage(); - memory += BatchCache.Bytes(); return memory; } diff --git a/ydb/core/tx/columnshard/columnshard__progress_tx.cpp b/ydb/core/tx/columnshard/columnshard__progress_tx.cpp index ece932faeb..c403116e0b 100644 --- a/ydb/core/tx/columnshard/columnshard__progress_tx.cpp +++ b/ydb/core/tx/columnshard/columnshard__progress_tx.cpp @@ -102,11 +102,6 @@ public: TBlobGroupSelector dsGroupSelector(Self->Info()); NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); - // CacheInserted -> CacheCommitted - for (auto& writeId : meta.WriteIds) { - Self->BatchCache.Commit(writeId); - } - auto pathExists = [&](ui64 pathId) { return Self->TablesManager.HasTable(pathId); }; diff --git a/ydb/core/tx/columnshard/columnshard__read.cpp b/ydb/core/tx/columnshard/columnshard__read.cpp index 466d4db639..a2182f63e4 100644 --- a/ydb/core/tx/columnshard/columnshard__read.cpp +++ b/ydb/core/tx/columnshard/columnshard__read.cpp @@ -92,7 +92,7 @@ bool TTxRead::Execute(TTransactionContext& txc, const TActorContext& ctx) { std::shared_ptr<NOlap::TReadMetadata> metadata; if (parseResult) { - metadata = PrepareReadMetadata(read, Self->InsertTable, Self->TablesManager.GetPrimaryIndex(), Self->BatchCache, + metadata = PrepareReadMetadata(read, Self->InsertTable, Self->TablesManager.GetPrimaryIndex(), ErrorDescription, false); } diff --git a/ydb/core/tx/columnshard/columnshard__read_base.cpp b/ydb/core/tx/columnshard/columnshard__read_base.cpp index 6ebc47ddc7..46b3a1ccf4 100644 --- a/ydb/core/tx/columnshard/columnshard__read_base.cpp +++ b/ydb/core/tx/columnshard/columnshard__read_base.cpp @@ -9,7 +9,6 @@ std::shared_ptr<NOlap::TReadMetadata> TTxReadBase::PrepareReadMetadata(const NOlap::TReadDescription& read, const std::unique_ptr<NOlap::TInsertTable>& insertTable, const std::unique_ptr<NOlap::IColumnEngine>& index, - const TBatchCache& batchCache, TString& error, const bool isReverse) const { if (!insertTable || !index) { return nullptr; @@ -20,7 +19,7 @@ TTxReadBase::PrepareReadMetadata(const NOlap::TReadDescription& read, return nullptr; } - NOlap::TDataStorageAccessor dataAccessor(insertTable, index, batchCache); + NOlap::TDataStorageAccessor dataAccessor(insertTable, index); auto readMetadata = std::make_shared<NOlap::TReadMetadata>(index->GetVersionedIndex(), read.GetSnapshot(), isReverse ? NOlap::TReadMetadata::ESorting::DESC : NOlap::TReadMetadata::ESorting::ASC, read.GetProgram()); diff --git a/ydb/core/tx/columnshard/columnshard__read_base.h b/ydb/core/tx/columnshard/columnshard__read_base.h index 148971bb22..15347cba87 100644 --- a/ydb/core/tx/columnshard/columnshard__read_base.h +++ b/ydb/core/tx/columnshard/columnshard__read_base.h @@ -15,7 +15,6 @@ protected: const NOlap::TReadDescription& readDescription, const std::unique_ptr<NOlap::TInsertTable>& insertTable, const std::unique_ptr<NOlap::IColumnEngine>& index, - const TBatchCache& batchCache, TString& error, const bool isReverse) const; protected: diff --git a/ydb/core/tx/columnshard/columnshard__scan.cpp b/ydb/core/tx/columnshard/columnshard__scan.cpp index 465738719f..570cebfb50 100644 --- a/ydb/core/tx/columnshard/columnshard__scan.cpp +++ b/ydb/core/tx/columnshard/columnshard__scan.cpp @@ -832,7 +832,7 @@ std::shared_ptr<NOlap::TReadMetadataBase> TTxScan::CreateReadMetadata(NOlap::TRe if (indexStats) { metadata = PrepareStatsReadMetadata(Self->TabletID(), read, Self->TablesManager.GetPrimaryIndex(), ErrorDescription, isReverse); } else { - metadata = PrepareReadMetadata(read, Self->InsertTable, Self->TablesManager.GetPrimaryIndex(), Self->BatchCache, + metadata = PrepareReadMetadata(read, Self->InsertTable, Self->TablesManager.GetPrimaryIndex(), ErrorDescription, isReverse); } diff --git a/ydb/core/tx/columnshard/columnshard__write.cpp b/ydb/core/tx/columnshard/columnshard__write.cpp index 9e5c688ca7..84eef1582f 100644 --- a/ydb/core/tx/columnshard/columnshard__write.cpp +++ b/ydb/core/tx/columnshard/columnshard__write.cpp @@ -96,10 +96,6 @@ bool TTxWrite::Execute(TTransactionContext& txc, const TActorContext&) { Y_VERIFY(logoBlobId.BlobSize() == data.size()); NBlobCache::AddRangeToCache(NBlobCache::TBlobRange(logoBlobId, 0, data.size()), data); - // Put new data into batch cache - Y_VERIFY(blobData.GetParsedBatch()); - Self->BatchCache.Insert(TWriteId(writeId), logoBlobId, blobData.GetParsedBatch()); - Self->UpdateInsertTableCounters(); const auto& blobBatch(PutBlobResult->Get()->GetPutResult().GetBlobBatch()); diff --git a/ydb/core/tx/columnshard/columnshard_impl.cpp b/ydb/core/tx/columnshard/columnshard_impl.cpp index 445a47c382..186fce8b66 100644 --- a/ydb/core/tx/columnshard/columnshard_impl.cpp +++ b/ydb/core/tx/columnshard/columnshard_impl.cpp @@ -390,7 +390,6 @@ void TColumnShard::TryAbortWrites(NIceDb::TNiceDb& db, NOlap::TDbWrapper& dbTabl if (!RemoveLongTxWrite(db, writeId)) { failedAborts.push_back(writeId); } - BatchCache.EraseInserted(TWriteId(writeId)); } for (auto& writeId : failedAborts) { writesToAbort.erase(writeId); @@ -711,7 +710,6 @@ void TColumnShard::SetupIndexation() { LOG_S_DEBUG("Few data for indexation (" << bytesToIndex << " bytes in " << blobs << " blobs, ignored " << ignored << ") at tablet " << TabletID()); - // Force small indexations sometimes to keep BatchCache smaller if (!bytesToIndex || SkippedIndexations < TSettings::MAX_INDEXATIONS_TO_SKIP) { ++SkippedIndexations; return; @@ -729,10 +727,6 @@ void TColumnShard::SetupIndexation() { data.reserve(dataToIndex.size()); for (auto& ptr : dataToIndex) { data.push_back(*ptr); - if (auto inserted = BatchCache.GetInserted(TWriteId(ptr->WriteTxId)); inserted.second) { - Y_VERIFY(ptr->BlobId == inserted.first); - cachedBlobs.emplace(ptr->BlobId, inserted.second); - } } Y_VERIFY(data.size()); diff --git a/ydb/core/tx/columnshard/columnshard_impl.h b/ydb/core/tx/columnshard/columnshard_impl.h index db13a0b6ac..42e560fe9f 100644 --- a/ydb/core/tx/columnshard/columnshard_impl.h +++ b/ydb/core/tx/columnshard/columnshard_impl.h @@ -517,7 +517,6 @@ private: TTabletCountersBase* TabletCounters; std::unique_ptr<NTabletPipe::IClientCache> PipeClientCache; std::unique_ptr<NOlap::TInsertTable> InsertTable; - TBatchCache BatchCache; const TScanCounters ReadCounters; const TScanCounters ScanCounters; const TIndexationCounters IndexationCounters = TIndexationCounters("Indexation"); diff --git a/ydb/core/tx/columnshard/columnshard_private_events.h b/ydb/core/tx/columnshard/columnshard_private_events.h index 396376c921..fa62a2478e 100644 --- a/ydb/core/tx/columnshard/columnshard_private_events.h +++ b/ydb/core/tx/columnshard/columnshard_private_events.h @@ -254,15 +254,13 @@ struct TEvPrivate { class TPutBlobData { YDB_READONLY_DEF(TUnifiedBlobId, BlobId); YDB_READONLY_DEF(TString, BlobData); - YDB_READONLY_DEF(std::shared_ptr<arrow::RecordBatch>, ParsedBatch); YDB_ACCESSOR_DEF(TString, LogicalMeta); public: TPutBlobData() = default; - TPutBlobData(const TUnifiedBlobId& blobId, const TString& data, const std::shared_ptr<arrow::RecordBatch>& batch) + TPutBlobData(const TUnifiedBlobId& blobId, const TString& data) : BlobId(blobId) , BlobData(data) - , ParsedBatch(batch) {} }; diff --git a/ydb/core/tx/columnshard/engines/changes/indexation.cpp b/ydb/core/tx/columnshard/engines/changes/indexation.cpp index df891517b4..1d72dad3bc 100644 --- a/ydb/core/tx/columnshard/engines/changes/indexation.cpp +++ b/ydb/core/tx/columnshard/engines/changes/indexation.cpp @@ -33,7 +33,6 @@ void TInsertColumnEngineChanges::DoWriteIndex(NColumnShard::TColumnShard& self, for (const auto& cmtd : DataToIndex) { self.InsertTable->EraseCommitted(context.DBWrapper, cmtd); self.BlobManager->DeleteBlob(cmtd.BlobId, *context.BlobManagerDb); - self.BatchCache.EraseCommitted(cmtd.BlobId); } if (!DataToIndex.empty()) { self.UpdateInsertTableCounters(); diff --git a/ydb/core/tx/columnshard/engines/reader/read_metadata.cpp b/ydb/core/tx/columnshard/engines/reader/read_metadata.cpp index 3fd405fc31..5c46f9eefe 100644 --- a/ydb/core/tx/columnshard/engines/reader/read_metadata.cpp +++ b/ydb/core/tx/columnshard/engines/reader/read_metadata.cpp @@ -10,11 +10,9 @@ namespace NKikimr::NOlap { TDataStorageAccessor::TDataStorageAccessor(const std::unique_ptr<NOlap::TInsertTable>& insertTable, - const std::unique_ptr<NOlap::IColumnEngine>& index, - const NColumnShard::TBatchCache& batchCache) + const std::unique_ptr<NOlap::IColumnEngine>& index) : InsertTable(insertTable) , Index(index) - , BatchCache(batchCache) {} std::shared_ptr<NOlap::TSelectInfo> TDataStorageAccessor::Select(const NOlap::TReadDescription& readDescription, const THashSet<ui32>& columnIds) const { @@ -31,10 +29,6 @@ std::vector<NOlap::TCommittedBlob> TDataStorageAccessor::GetCommitedBlobs(const return std::move(InsertTable->Read(readDescription.PathId, readDescription.GetSnapshot())); } -std::shared_ptr<arrow::RecordBatch> TDataStorageAccessor::GetCachedBatch(const TUnifiedBlobId& blobId) const { - return BatchCache.Get(blobId); -} - std::unique_ptr<NColumnShard::TScanIteratorBase> TReadMetadata::StartScan(const NOlap::TReadContext& readContext) const { return std::make_unique<NColumnShard::TColumnShardScanIterator>(this->shared_from_this(), readContext); } @@ -92,12 +86,7 @@ bool TReadMetadata::Init(const TReadDescription& readDescription, const TDataSto } CommittedBlobs = dataAccessor.GetCommitedBlobs(readDescription); - for (auto& cmt : CommittedBlobs) { - if (auto batch = dataAccessor.GetCachedBatch(cmt.GetBlobId())) { - CommittedBatches.emplace(cmt.GetBlobId(), batch); - } - } - + THashSet<ui32> columnIds; for (auto& columnId : AllColumns) { columnIds.insert(columnId); diff --git a/ydb/core/tx/columnshard/engines/reader/read_metadata.h b/ydb/core/tx/columnshard/engines/reader/read_metadata.h index af5debd211..355e033968 100644 --- a/ydb/core/tx/columnshard/engines/reader/read_metadata.h +++ b/ydb/core/tx/columnshard/engines/reader/read_metadata.h @@ -58,15 +58,12 @@ class TDataStorageAccessor { private: const std::unique_ptr<NOlap::TInsertTable>& InsertTable; const std::unique_ptr<NOlap::IColumnEngine>& Index; - const NColumnShard::TBatchCache& BatchCache; public: TDataStorageAccessor(const std::unique_ptr<NOlap::TInsertTable>& insertTable, - const std::unique_ptr<NOlap::IColumnEngine>& index, - const NColumnShard::TBatchCache& batchCache); + const std::unique_ptr<NOlap::IColumnEngine>& index); std::shared_ptr<NOlap::TSelectInfo> Select(const NOlap::TReadDescription& readDescription, const THashSet<ui32>& columnIds) const; std::vector<NOlap::TCommittedBlob> GetCommitedBlobs(const NOlap::TReadDescription& readDescription) const; - std::shared_ptr<arrow::RecordBatch> GetCachedBatch(const TUnifiedBlobId& blobId) const; }; // Holds all metadata that is needed to perform read/scan diff --git a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp index 20170d772e..164661a4c1 100644 --- a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp +++ b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp @@ -49,7 +49,7 @@ IBlobConstructor::EStatus TIndexedWriteController::TBlobConstructor::BuildNext() bool TIndexedWriteController::TBlobConstructor::RegisterBlobId(const TUnifiedBlobId& blobId) { Y_VERIFY(blobId.BlobSize() == DataPrepared.size()); - Owner.AddBlob(NColumnShard::TEvPrivate::TEvWriteBlobsResult::TPutBlobData(blobId, DataPrepared, Batch)); + Owner.AddBlob(NColumnShard::TEvPrivate::TEvWriteBlobsResult::TPutBlobData(blobId, DataPrepared), Batch->num_rows(), NArrow::GetBatchDataSize(Batch)); return true; } @@ -72,14 +72,14 @@ void TIndexedWriteController::DoOnReadyResult(const NActors::TActorContext& ctx, } } -void TIndexedWriteController::AddBlob(NColumnShard::TEvPrivate::TEvWriteBlobsResult::TPutBlobData&& data) { +void TIndexedWriteController::AddBlob(NColumnShard::TEvPrivate::TEvWriteBlobsResult::TPutBlobData&& data, const ui64 numRows, const ui64 batchSize) { Y_VERIFY(BlobData.empty()); ui64 dirtyTime = AppData()->TimeProvider->Now().Seconds(); Y_VERIFY(dirtyTime); NKikimrTxColumnShard::TLogicalMetadata outMeta; - outMeta.SetNumRows(data.GetParsedBatch()->num_rows()); - outMeta.SetRawBytes(NArrow::GetBatchDataSize(data.GetParsedBatch())); + outMeta.SetNumRows(numRows); + outMeta.SetRawBytes(batchSize); outMeta.SetDirtyWriteTimeSeconds(dirtyTime); TString metaString; diff --git a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h index 5d21284db8..659420c714 100644 --- a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h +++ b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h @@ -46,7 +46,7 @@ public: } public: - void AddBlob(NColumnShard::TEvPrivate::TEvWriteBlobsResult::TPutBlobData&& data); + void AddBlob(NColumnShard::TEvPrivate::TEvWriteBlobsResult::TPutBlobData&& data, const ui64 numRows, const ui64 batchSize); }; } diff --git a/ydb/core/tx/columnshard/operations/write.cpp b/ydb/core/tx/columnshard/operations/write.cpp index b7d01e4b55..59878c0e7d 100644 --- a/ydb/core/tx/columnshard/operations/write.cpp +++ b/ydb/core/tx/columnshard/operations/write.cpp @@ -35,8 +35,6 @@ namespace NKikimr::NColumnShard { TBlobGroupSelector dsGroupSelector(owner.Info()); NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); - owner.BatchCache.Commit(WriteId); - auto pathExists = [&](ui64 pathId) { return owner.TablesManager.HasTable(pathId); }; @@ -60,7 +58,6 @@ namespace NKikimr::NColumnShard { void TWriteOperation::Abort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) const { Y_VERIFY(Status == EOperationStatus::Prepared); - owner.BatchCache.EraseInserted(WriteId); TBlobGroupSelector dsGroupSelector(owner.Info()); NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); |