diff options
author | ivanmorozov <ivanmorozov@yandex-team.com> | 2023-09-21 17:24:45 +0300 |
---|---|---|
committer | ivanmorozov <ivanmorozov@yandex-team.com> | 2023-09-21 17:54:06 +0300 |
commit | 44a989845eb33898188bdb65e86926bf43ce083e (patch) | |
tree | 09fa123f7260ea24f0f607f0ac263ef4cb20dff3 | |
parent | 46510fe73a29904c64c8b51f5d182d4b160b68e9 (diff) | |
download | ydb-44a989845eb33898188bdb65e86926bf43ce083e.tar.gz |
KIKIMR-19215: different storage operators for column shards
235 files changed, 4561 insertions, 4206 deletions
diff --git a/ydb/core/protos/counters_columnshard.proto b/ydb/core/protos/counters_columnshard.proto index 4bb74f05b8f..428621d0f20 100644 --- a/ydb/core/protos/counters_columnshard.proto +++ b/ydb/core/protos/counters_columnshard.proto @@ -175,4 +175,7 @@ enum ETxTypes { TXTYPE_EXPORT = 13 [(TxTypeOpts) = {Name: "TxExport"}]; TXTYPE_FORGET = 14 [(TxTypeOpts) = {Name: "TxForget"}]; TXTYPE_WRITE_DRAFT = 15 [(TxTypeOpts) = {Name: "TxWriteDraft"}]; + TXTYPE_CLEANUP_INSERT_TABLE = 16 [(TxTypeOpts) = {Name: "TxInsertTableCleanup"}]; + TXTYPE_GC_FINISHED = 17 [(TxTypeOpts) = {Name: "TxGarbageCollectionFinished"}]; + } diff --git a/ydb/core/tx/columnshard/CMakeLists.darwin-x86_64.txt b/ydb/core/tx/columnshard/CMakeLists.darwin-x86_64.txt index 46da987a57b..d3e1e8a7d0e 100644 --- a/ydb/core/tx/columnshard/CMakeLists.darwin-x86_64.txt +++ b/ydb/core/tx/columnshard/CMakeLists.darwin-x86_64.txt @@ -68,9 +68,6 @@ target_sources(core-tx-columnshard PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blob.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blob_cache.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blob_manager.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blob_manager_txs.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__export.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__forget.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__init.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__notify_tx_completion.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__plan_step.cpp @@ -79,7 +76,6 @@ target_sources(core-tx-columnshard PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__propose_transaction.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__read.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__read_base.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__read_blob_ranges.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__scan.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__index_scan.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__stats_scan.cpp diff --git a/ydb/core/tx/columnshard/CMakeLists.linux-aarch64.txt b/ydb/core/tx/columnshard/CMakeLists.linux-aarch64.txt index 4cf6358090b..53148378773 100644 --- a/ydb/core/tx/columnshard/CMakeLists.linux-aarch64.txt +++ b/ydb/core/tx/columnshard/CMakeLists.linux-aarch64.txt @@ -69,9 +69,6 @@ target_sources(core-tx-columnshard PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blob.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blob_cache.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blob_manager.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blob_manager_txs.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__export.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__forget.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__init.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__notify_tx_completion.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__plan_step.cpp @@ -80,7 +77,6 @@ target_sources(core-tx-columnshard PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__propose_transaction.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__read.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__read_base.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__read_blob_ranges.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__scan.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__index_scan.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__stats_scan.cpp diff --git a/ydb/core/tx/columnshard/CMakeLists.linux-x86_64.txt b/ydb/core/tx/columnshard/CMakeLists.linux-x86_64.txt index 4cf6358090b..53148378773 100644 --- a/ydb/core/tx/columnshard/CMakeLists.linux-x86_64.txt +++ b/ydb/core/tx/columnshard/CMakeLists.linux-x86_64.txt @@ -69,9 +69,6 @@ target_sources(core-tx-columnshard PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blob.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blob_cache.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blob_manager.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blob_manager_txs.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__export.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__forget.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__init.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__notify_tx_completion.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__plan_step.cpp @@ -80,7 +77,6 @@ target_sources(core-tx-columnshard PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__propose_transaction.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__read.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__read_base.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__read_blob_ranges.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__scan.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__index_scan.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__stats_scan.cpp diff --git a/ydb/core/tx/columnshard/CMakeLists.windows-x86_64.txt b/ydb/core/tx/columnshard/CMakeLists.windows-x86_64.txt index 46da987a57b..d3e1e8a7d0e 100644 --- a/ydb/core/tx/columnshard/CMakeLists.windows-x86_64.txt +++ b/ydb/core/tx/columnshard/CMakeLists.windows-x86_64.txt @@ -68,9 +68,6 @@ target_sources(core-tx-columnshard PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blob.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blob_cache.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blob_manager.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blob_manager_txs.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__export.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__forget.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__init.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__notify_tx_completion.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__plan_step.cpp @@ -79,7 +76,6 @@ target_sources(core-tx-columnshard PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__propose_transaction.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__read.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__read_base.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__read_blob_ranges.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__scan.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__index_scan.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/columnshard__stats_scan.cpp diff --git a/ydb/core/tx/columnshard/blob.h b/ydb/core/tx/columnshard/blob.h index f92e1568780..92cfd009060 100644 --- a/ydb/core/tx/columnshard/blob.h +++ b/ydb/core/tx/columnshard/blob.h @@ -307,6 +307,10 @@ struct TBlobRange { } } + static TBlobRange FromBlobId(const TUnifiedBlobId& blobId) { + return TBlobRange(blobId, 0, blobId.BlobSize()); + } + bool operator == (const TBlobRange& other) const { return BlobId == other.BlobId && @@ -328,16 +332,20 @@ struct TBlobRange { }; class IBlobInUseTracker { -protected: - ~IBlobInUseTracker() = default; - +private: + virtual bool DoFreeBlob(const NOlap::TUnifiedBlobId& blobId) = 0; + virtual bool DoUseBlob(const NOlap::TUnifiedBlobId& blobId) = 0; public: - // Marks the blob as "in use (or no longer in use) by an in-flight request", increments (or decrements) - // it's ref count. This will prevent the blob from beeing physically deleted when DeleteBlob() is called - // until all the references are released. - // NOTE: this ref counts are in-memory only, so the blobs can be deleted if tablet restarts - virtual bool SetBlobInUse(const NOlap::TUnifiedBlobId& blobId, bool inUse) = 0; - virtual bool BlobInUse(const NOlap::TUnifiedBlobId& blobId) const = 0; + virtual ~IBlobInUseTracker() = default; + + bool FreeBlob(const NOlap::TUnifiedBlobId& blobId) { + return DoFreeBlob(blobId); + } + bool UseBlob(const NOlap::TUnifiedBlobId& blobId) { + return DoUseBlob(blobId); + } + + virtual bool IsBlobInUsage(const NOlap::TUnifiedBlobId& blobId) const = 0; }; // Expected blob lifecycle: EVICTING -> SELF_CACHED -> EXTERN <-> CACHED diff --git a/ydb/core/tx/columnshard/blob_cache.cpp b/ydb/core/tx/columnshard/blob_cache.cpp index f3df5daa96f..6a87792851a 100644 --- a/ydb/core/tx/columnshard/blob_cache.cpp +++ b/ydb/core/tx/columnshard/blob_cache.cpp @@ -212,7 +212,6 @@ private: HFunc(TEvBlobStorage::TEvGetResult, Handle); HFunc(TEvTabletPipe::TEvClientConnected, Handle); HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); - HFunc(TEvColumnShard::TEvReadBlobRangesResult, Handle); default: LOG_S_WARN("Unhandled event type: " << ev->GetTypeRewrite() << " event: " << ev->ToString()); @@ -435,15 +434,11 @@ private: // We might need to free some space to accommodate the results of new reads Evict(ctx); - std::vector<ui64> tabletReads; - tabletReads.reserve(groupedBlobRanges.size() + fallbackRanges.size()); - for (auto& [blobId, ranges] : fallbackRanges) { Y_VERIFY(blobId.IsDsBlob()); ui64 cookie = ++ReadCookie; CookieToRange[cookie] = std::move(ranges); - tabletReads.push_back(cookie); } ui64 cookie = ++ReadCookie; @@ -453,17 +448,13 @@ private: ui64 requestSize = 0; ui32 dsGroup = std::get<1>(target); TReadItem::EReadVariant readVariant = std::get<2>(target); - bool isDS = rangesGroup.begin()->BlobId.IsDsBlob(); + Y_VERIFY(rangesGroup.begin()->BlobId.IsDsBlob()); std::vector<ui64> dsReads; for (auto& blobRange : rangesGroup) { if (requestSize && (requestSize + blobRange.Size > MAX_REQUEST_BYTES)) { - if (isDS) { - dsReads.push_back(cookie); - } else { - tabletReads.push_back(cookie); - } + dsReads.push_back(cookie); cookie = ++ReadCookie; requestSize = 0; } @@ -472,11 +463,7 @@ private: CookieToRange[cookie].emplace_back(std::move(blobRange)); } if (requestSize) { - if (isDS) { - dsReads.push_back(cookie); - } else { - tabletReads.push_back(cookie); - } + dsReads.push_back(cookie); cookie = ++ReadCookie; requestSize = 0; } @@ -485,10 +472,6 @@ private: SendBatchReadRequestToDS(CookieToRange[cookie], cookie, dsGroup, readVariant, ctx); } } - - for (ui64 cookie : tabletReads) { - SendBatchReadRequestToTablet(CookieToRange[cookie], cookie, ctx); - } } void SendResult(const TActorId& to, const TBlobRange& blobRange, NKikimrProto::EReplyStatus status, @@ -577,35 +560,6 @@ private: OutstandingReads.erase(readIt); } - void SendBatchReadRequestToTablet(const std::vector<TBlobRange>& blobRanges, - const ui64 cookie, const TActorContext& ctx) - { - Y_VERIFY(!blobRanges.empty()); - ui64 tabletId = blobRanges.front().BlobId.GetTabletId(); - - LOG_S_INFO("Sending read from Tablet: " << tabletId - << " ranges: " << JoinStrings(blobRanges.begin(), blobRanges.end(), " ") - << " cookie: " << cookie); - - if (!ShardPipes.contains(tabletId)) { - NTabletPipe::TClientConfig clientConfig; - clientConfig.AllowFollower = false; - clientConfig.CheckAliveness = true; - clientConfig.RetryPolicy = { - .RetryLimitCount = 10, - .MinRetryTime = TDuration::MilliSeconds(5), - }; - ShardPipes[tabletId] = ctx.Register(NTabletPipe::CreateClient(ctx.SelfID, tabletId, clientConfig)); - } - - auto ev = std::make_unique<TEvColumnShard::TEvReadBlobRanges>(blobRanges); - - InFlightTabletRequests[tabletId].insert(cookie); - NTabletPipe::SendData(ctx, ShardPipes[tabletId], ev.release(), cookie); - - ReadRequests->Inc(); - } - // Frogets the pipe to the tablet and fails all in-flight requests to it void DestroyPipe(ui64 tabletId, const TActorContext& ctx) { ShardPipes.erase(tabletId); @@ -652,64 +606,6 @@ private: DestroyPipe(tabletId, ctx); } - void Handle(TEvColumnShard::TEvReadBlobRangesResult::TPtr& ev, const TActorContext& ctx) { - const auto& record = ev->Get()->Record; - ui64 tabletId = record.GetTabletId(); - ui64 readCookie = ev->Cookie; - LOG_S_INFO("Got read result from tablet: " << tabletId); - - auto cookieIt = CookieToRange.find(readCookie); - if (cookieIt == CookieToRange.end()) { - // This might only happen in case fo race between response and pipe close - LOG_S_NOTICE("Unknown read result cookie: " << readCookie); - return; - } - - std::vector<TBlobRange> blobRanges = std::move(cookieIt->second); - - Y_VERIFY(record.ResultsSize(), "Zero results for read request!"); - Y_VERIFY(blobRanges.size() >= record.ResultsSize(), "Mismatched number of results for read request"); - - if (blobRanges.size() == record.ResultsSize()) { - InFlightTabletRequests[tabletId].erase(readCookie); - CookieToRange.erase(readCookie); - } else { - // Extract blobRanges for returned blobId. Keep others ordered. - TString strReturnedBlobId = record.GetResults(0).GetBlobRange().GetBlobId(); - std::vector<TBlobRange> same; - std::vector<TBlobRange> others; - same.reserve(record.ResultsSize()); - others.reserve(blobRanges.size() - record.ResultsSize()); - - for (auto&& blobRange : blobRanges) { - TString strBlobId = blobRange.BlobId.ToStringNew(); - if (strBlobId == strReturnedBlobId) { - same.emplace_back(std::move(blobRange)); - } else { - others.emplace_back(std::move(blobRange)); - } - } - blobRanges.swap(same); - - CookieToRange[readCookie] = std::move(others); - } - - for (size_t i = 0; i < record.ResultsSize(); ++i) { - const auto& res = record.GetResults(i); - const auto& blobRange = blobRanges[i]; - if (!blobRange.BlobId.IsSmallBlob()) { - FallbackDataSize -= blobRange.Size; - } - - Y_VERIFY(blobRange.BlobId.ToStringNew() == res.GetBlobRange().GetBlobId()); - Y_VERIFY(blobRange.Offset == res.GetBlobRange().GetOffset()); - Y_VERIFY(blobRange.Size == res.GetBlobRange().GetSize()); - ProcessSingleRangeResult(blobRange, readCookie, res.GetStatus(), res.GetData(), ctx); - } - - MakeReadRequests(ctx); - } - void InsertIntoCache(const TBlobRange& blobRange, TString data) { // Shrink the buffer if it has to much extra capacity if (data.capacity() > data.size() * 1.1) { diff --git a/ydb/core/tx/columnshard/blob_manager.cpp b/ydb/core/tx/columnshard/blob_manager.cpp index 1bda48bfe1a..c6ad1b82fa8 100644 --- a/ydb/core/tx/columnshard/blob_manager.cpp +++ b/ydb/core/tx/columnshard/blob_manager.cpp @@ -5,6 +5,7 @@ #include <ydb/core/tx/columnshard/blobs_action/blob_manager_db.h> #include <ydb/core/base/blobstorage.h> +#include "blobs_action/bs/gc.h" namespace NKikimr::NColumnShard { @@ -29,7 +30,6 @@ struct TBlobBatch::TBatchInfo : TNonCopyable { std::vector<bool> InFlight; i32 InFlightCount; ui64 TotalSizeBytes; - std::vector<TString> SmallBlobs; TBatchInfo(TIntrusivePtr<TTabletStorageInfo> tabletInfo, TAllocatedGenStepConstPtr genStep, ui32 channel, const TBlobsManagerCounters& counters) : TabletInfo(tabletInfo) @@ -55,18 +55,6 @@ struct TBlobBatch::TBatchInfo : TNonCopyable { const ui32 dsGroup = TabletInfo->GroupFor(Channel, Gen); return TUnifiedBlobId(dsGroup, TLogoBlobID(TabletInfo->TabletID, Gen, Step, Channel, BlobSizes[i], i)); } - - TUnifiedBlobId AddSmallBlob(const TString& data) { - // NOTE: small blobs are not included into TotalSizeBytes - Counters.OnAddSmallBlob(data.size()); - SmallBlobs.push_back(data); - return MakeSmallBlobId(SmallBlobs.size() - 1); - } - - TUnifiedBlobId MakeSmallBlobId(ui32 i) const { - Y_VERIFY(i < SmallBlobs.size()); - return TUnifiedBlobId(TabletInfo->TabletID, Gen, Step, i, SmallBlobs[i].size()); - } }; TBlobBatch::TBlobBatch(std::unique_ptr<TBatchInfo> batchInfo) @@ -129,11 +117,6 @@ ui64 TBlobBatch::GetTotalSize() const { } -TUnifiedBlobId TBlobBatch::AddSmallBlob(const TString& data) { - Y_VERIFY(BatchInfo); - return BatchInfo->AddSmallBlob(data); -} - TUnifiedBlobId TBlobBatch::AllocateNextBlobId(const TString& blobData) { return BatchInfo->NextBlobId(blobData.size()); } @@ -166,10 +149,7 @@ bool TBlobManager::LoadState(IBlobManagerDb& db) { } for (const auto& unifiedBlobId : blobsToDelete) { - if (unifiedBlobId.IsSmallBlob()) { - BlobsManagerCounters.OnDeleteSmallBlob(unifiedBlobId.BlobSize()); - SmallBlobsToDelete.insert(unifiedBlobId); - } else if (unifiedBlobId.IsDsBlob()) { + if (unifiedBlobId.IsDsBlob()) { BlobsToDelete.insert(unifiedBlobId.GetLogoBlobId()); BlobsManagerCounters.OnDeleteBlobMarker(unifiedBlobId.BlobSize()); } else { @@ -185,14 +165,7 @@ bool TBlobManager::LoadState(IBlobManagerDb& db) { TLogoBlobID blobId = unifiedBlobId.GetLogoBlobId(); TGenStep genStep{blobId.Generation(), blobId.Step()}; - if (genStep <= LastCollectedGenStep) { - LOG_S_WARN("BlobManager at tablet " << TabletInfo->TabletID - << " Load not keeped blob " << unifiedBlobId << " collected by GenStep: " - << std::get<0>(LastCollectedGenStep) << ":" << std::get<1>(LastCollectedGenStep)); - BlobsManagerCounters.OnBrokenKeep(unifiedBlobId.BlobSize()); - KeepsToErase.emplace_back(unifiedBlobId); - continue; - } + Y_VERIFY(genStep > LastCollectedGenStep); BlobsToKeep.insert(blobId); BlobsManagerCounters.OnKeepMarker(blobId.BlobSize()); @@ -220,37 +193,6 @@ bool TBlobManager::LoadState(IBlobManagerDb& db) { return true; } -bool TBlobManager::CanCollectGarbage(bool cleanupOnly) const { - if (KeepsToErase.size() || DeletesToErase.size()) { - return true; - } - if (cleanupOnly) { - return false; - } - return NeedStorageCG(); -} - -bool TBlobManager::NeedStorageCG() const { - // Check that there is no GC request in flight - if (!PerGroupGCListsInFlight.empty()) { - return false; - } - - if (BlobsToKeep.empty() && BlobsToDelete.empty() && LastCollectedGenStep == TGenStep{ CurrentGen, CurrentStep }) { - return false; - } - - // Delay GC if there are to few blobs and last GC was not long ago - if ((i64)BlobsToKeep.size() < BlobCountToTriggerGC && - (i64)BlobsToDelete.size() < BlobCountToTriggerGC && - PreviousGCTime + TDuration::Seconds(GCIntervalSeconds) > AppData()->TimeProvider->Now()) - { - return false; - } - - return true; -} - TGenStep TBlobManager::FindNewGCBarrier() { TGenStep newCollectGenStep = LastCollectedGenStep; size_t numFinished = 0; @@ -273,23 +215,21 @@ TGenStep TBlobManager::FindNewGCBarrier() { return newCollectGenStep; } -THashMap<ui32, std::unique_ptr<TEvBlobStorage::TEvCollectGarbage>> TBlobManager::PreparePerGroupGCRequests() { - if (!NeedStorageCG()) { - return {}; +std::shared_ptr<NOlap::NBlobOperations::NBlobStorage::TGCTask> TBlobManager::BuildGCTask(const TString& storageId, const std::shared_ptr<TBlobManager>& manager) { + if (BlobsToKeep.empty() && BlobsToDelete.empty() && LastCollectedGenStep == TGenStep{CurrentGen, CurrentStep}) { + ACFL_DEBUG("event", "TBlobManager::NeedStorageGC skip"); + return nullptr; } TGenStep newCollectGenStep = FindNewGCBarrier(); Y_VERIFY(newCollectGenStep >= LastCollectedGenStep); - if (newCollectGenStep == LastCollectedGenStep) { - return {}; - } PreviousGCTime = AppData()->TimeProvider->Now(); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "PreparePerGroupGCRequests")("gen", std::get<0>(newCollectGenStep))("step", std::get<1>(newCollectGenStep)); BlobsManagerCounters.OnNewCollectStep(std::get<0>(newCollectGenStep), std::get<1>(newCollectGenStep)); const ui32 channelIdx = BLOB_CHANNEL; - Y_VERIFY(PerGroupGCListsInFlight.empty()); + NOlap::NBlobOperations::NBlobStorage::TGCTask::TGCListsByGroup perGroupGCListsInFlight; // Clear all possibly not keeped trash in channel's groups: create an event for each group if (FirstGC) { @@ -299,11 +239,13 @@ THashMap<ui32, std::unique_ptr<TEvBlobStorage::TEvCollectGarbage>> TBlobManager: const auto& channelHistory = TabletInfo->ChannelInfo(channelIdx)->History; for (auto it = channelHistory.begin(); it != channelHistory.end(); ++it) { - PerGroupGCListsInFlight[it->GroupID]; + perGroupGCListsInFlight[it->GroupID]; } } // Make per-group Keep/DontKeep lists + std::deque<TUnifiedBlobId> keepsToErase; + std::deque<TUnifiedBlobId> deletesToErase; { // Add all blobs to keep auto keepBlobIt = BlobsToKeep.begin(); @@ -313,7 +255,7 @@ THashMap<ui32, std::unique_ptr<TEvBlobStorage::TEvCollectGarbage>> TBlobManager: break; } ui32 blobGroup = TabletInfo->GroupFor(keepBlobIt->Channel(), keepBlobIt->Generation()); - PerGroupGCListsInFlight[blobGroup].KeepList.insert(*keepBlobIt); + perGroupGCListsInFlight[blobGroup].KeepList.insert(*keepBlobIt); } BlobsToKeep.erase(BlobsToKeep.begin(), keepBlobIt); BlobsManagerCounters.OnBlobsKeep(BlobsToKeep); @@ -326,11 +268,11 @@ THashMap<ui32, std::unique_ptr<TEvBlobStorage::TEvCollectGarbage>> TBlobManager: break; } ui32 blobGroup = TabletInfo->GroupFor(blobIt->Channel(), blobIt->Generation()); - TGCLists& gl = PerGroupGCListsInFlight[blobGroup]; + NOlap::NBlobOperations::NBlobStorage::TGCTask::TGCLists& gl = perGroupGCListsInFlight[blobGroup]; bool skipDontKeep = false; if (gl.KeepList.erase(*blobIt)) { // Skipped blobs still need to be deleted from BlobsToKeep table - KeepsToErase.emplace_back(TUnifiedBlobId(blobGroup, *blobIt)); + keepsToErase.emplace_back(TUnifiedBlobId(blobGroup, *blobIt)); if (CurrentGen == blobIt->Generation()) { // If this blob was created and deleted in the current generation then @@ -339,7 +281,7 @@ THashMap<ui32, std::unique_ptr<TEvBlobStorage::TEvCollectGarbage>> TBlobManager: // a scenario when Keep flag was sent in the old generation and then tablet restarted // before getting the result and removing the blob from the Keep list. skipDontKeep = true; - DeletesToErase.emplace_back(TUnifiedBlobId(blobGroup, *blobIt)); + deletesToErase.emplace_back(TUnifiedBlobId(blobGroup, *blobIt)); ++CountersUpdate.BlobSkippedEntries; } } @@ -355,104 +297,7 @@ THashMap<ui32, std::unique_ptr<TEvBlobStorage::TEvCollectGarbage>> TBlobManager: } CollectGenStepInFlight = newCollectGenStep; - - // Make per group requests - THashMap<ui32, std::unique_ptr<TEvBlobStorage::TEvCollectGarbage>> requests; - { - for (const auto& gl : PerGroupGCListsInFlight) { - ui32 group = gl.first; - for (auto&& i : gl.second.KeepList) { - BlobsManagerCounters.OnCollectKeep(i.BlobSize()); - } - requests[group] = std::make_unique<TEvBlobStorage::TEvCollectGarbage>( - TabletInfo->TabletID, CurrentGen, PerGenerationCounter, - channelIdx, true, - std::get<0>(CollectGenStepInFlight), std::get<1>(CollectGenStepInFlight), - new TVector<TLogoBlobID>(gl.second.KeepList.begin(), gl.second.KeepList.end()), - new TVector<TLogoBlobID>(gl.second.DontKeepList.begin(), gl.second.DontKeepList.end()), - TInstant::Max(), true); - - CounterToGroupInFlight[PerGenerationCounter] = group; - - PerGenerationCounter += requests[group]->PerGenerationCounterStepSize(); - } - } - - return requests; -} - -size_t TBlobManager::CleanupFlaggedBlobs(IBlobManagerDb& db, size_t maxBlobsToCleanup) { - if (KeepsToErase.empty() && DeletesToErase.empty()) { - return 0; - } - - size_t numBlobs = 0; - - for (; !KeepsToErase.empty() && numBlobs < maxBlobsToCleanup; ++numBlobs) { - db.EraseBlobToKeep(KeepsToErase.front()); - KeepsToErase.pop_front(); - } - - for (; !DeletesToErase.empty() && numBlobs < maxBlobsToCleanup; ++numBlobs) { - db.EraseBlobToDelete(DeletesToErase.front()); - DeletesToErase.pop_front(); - } - - Y_VERIFY(numBlobs <= maxBlobsToCleanup); - return numBlobs; -} - -void TBlobManager::OnGCResult(TEvBlobStorage::TEvCollectGarbageResult::TPtr ev, IBlobManagerDb& db) { - Y_VERIFY(ev->Get()->Status == NKikimrProto::OK, "The caller must handle unsuccessful status"); - Y_VERIFY(!CounterToGroupInFlight.empty()); - Y_VERIFY(!PerGroupGCListsInFlight.empty()); - - // Find the group for this result - ui64 counterFromRequest = ev->Get()->PerGenerationCounter; - Y_VERIFY(CounterToGroupInFlight.contains(counterFromRequest)); - ui32 group = CounterToGroupInFlight[counterFromRequest]; - - auto it = PerGroupGCListsInFlight.find(group); - const auto& keepList = it->second.KeepList; - const auto& dontKeepList = it->second.DontKeepList; - - // NOTE: It clears blobs of different groups. - // It's expected to be safe cause we have GC result for the blobs or don't need such result. - size_t maxBlobsToCleanup = TLimits::MAX_BLOBS_TO_DELETE; - maxBlobsToCleanup -= CleanupFlaggedBlobs(db, maxBlobsToCleanup); - - size_t blobsToForget = keepList.size() + dontKeepList.size(); - - if (blobsToForget < maxBlobsToCleanup) { - for (const auto& blobId : keepList) { - db.EraseBlobToKeep(TUnifiedBlobId(group, blobId)); - } - for (const auto& blobId : dontKeepList) { - db.EraseBlobToDelete(TUnifiedBlobId(group, blobId)); - } - } else { - for (const auto& blobId : keepList) { - KeepsToErase.emplace_back(TUnifiedBlobId(group, blobId)); - } - for (const auto& blobId : dontKeepList) { - DeletesToErase.emplace_back(TUnifiedBlobId(group, blobId)); - } - } - - ++CountersUpdate.GcRequestsSent; - CountersUpdate.BlobKeepEntries += keepList.size(); - CountersUpdate.BlobDontKeepEntries += dontKeepList.size(); - - PerGroupGCListsInFlight.erase(it); - CounterToGroupInFlight.erase(group); - - // All requests done? - if (PerGroupGCListsInFlight.empty()) { - LastCollectedGenStep = CollectGenStepInFlight; - db.SaveLastGcBarrier(LastCollectedGenStep); - } - - PerformDelayedDeletes(db); + return std::make_shared<NOlap::NBlobOperations::NBlobStorage::TGCTask>(storageId, std::move(perGroupGCListsInFlight), newCollectGenStep, std::move(keepsToErase), std::move(deletesToErase), manager); } TBlobBatch TBlobManager::StartBlobBatch(ui32 channel) { @@ -483,9 +328,7 @@ void TBlobManager::DoSaveBlobBatch(TBlobBatch&& blobBatch, IBlobManagerDb& db) { auto logoblobId = blobId.GetLogoBlobId(); TGenStep genStep{logoblobId.Generation(), logoblobId.Step()}; - Y_VERIFY(genStep > edgeGenStep, - "Trying to keep blob %s that could be already collected by edge barrier (%" PRIu32 ":%" PRIu32 ")", - blobId.ToStringNew().c_str(), std::get<0>(edgeGenStep), std::get<1>(edgeGenStep)); + AFL_VERIFY(genStep > edgeGenStep)("gen_step", genStep)("edge_gen_step", edgeGenStep)("blob_id", blobId.ToStringNew()); BlobsManagerCounters.OnKeepMarker(logoblobId.BlobSize()); BlobsToKeep.insert(std::move(logoblobId)); @@ -493,50 +336,24 @@ void TBlobManager::DoSaveBlobBatch(TBlobBatch&& blobBatch, IBlobManagerDb& db) { } BlobsManagerCounters.OnBlobsKeep(BlobsToKeep); - // Save all small blobs - for (ui32 i = 0; i < blobBatch.BatchInfo->SmallBlobs.size(); ++i) { - const TUnifiedBlobId blobId = blobBatch.BatchInfo->MakeSmallBlobId(i); - LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Save Small Blob " << blobId); - db.WriteSmallBlob(blobId, blobBatch.BatchInfo->SmallBlobs[i]); - ++CountersUpdate.SmallBlobsWritten; - CountersUpdate.SmallBlobsBytesWritten += blobId.BlobSize(); - } - blobBatch.BatchInfo->GenStepRef.Reset(); } void TBlobManager::DeleteBlob(const TUnifiedBlobId& blobId, IBlobManagerDb& db) { - PerformDelayedDeletes(db); - ++CountersUpdate.BlobsDeleted; - if (blobId.IsSmallBlob()) { - BlobsManagerCounters.OnDeleteSmallBlob(blobId.BlobSize()); - if (BlobsUseCount.contains(blobId) == 0) { - DeleteSmallBlob(blobId, db); - } else { - LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Delay Delete Small Blob " << blobId); - db.AddBlobToDelete(blobId); - SmallBlobsToDeleteDelayed.insert(blobId); - } - return; - } - // Persist deletion intent db.AddBlobToDelete(blobId); // Check if the deletion needs to be delayed until the blob is no longer // used by in-flight requests - if (BlobsUseCount.contains(blobId) == 0) { + if (!IsBlobInUsage(blobId)) { LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Delete Blob " << blobId); TLogoBlobID logoBlobId = blobId.GetLogoBlobId(); if (BlobsToDelete.emplace(logoBlobId).second) { BlobsManagerCounters.OnDeleteBlobMarker(blobId.BlobSize()); BlobsManagerCounters.OnBlobsDelete(BlobsToDelete); } - if (!EvictedBlobs.contains(TEvictedBlob{.Blob = blobId})) { - NBlobCache::ForgetBlob(blobId); - } } else { BlobsManagerCounters.OnDeleteBlobDelayedMarker(blobId.BlobSize()); LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Delay Delete Blob " << blobId); @@ -544,265 +361,22 @@ void TBlobManager::DeleteBlob(const TUnifiedBlobId& blobId, IBlobManagerDb& db) } } -bool TBlobManager::ExportOneToOne(TEvictedBlob&& evict, const NKikimrTxColumnShard::TEvictMetadata& meta, - IBlobManagerDb& db) -{ - if (EvictedBlobs.contains(evict) || DroppedEvictedBlobs.contains(evict)) { - return false; - } - - Y_VERIFY(!meta.GetTierName().empty()); - TString strMeta; - Y_PROTOBUF_SUPPRESS_NODISCARD meta.SerializeToString(&strMeta); - - db.UpdateEvictBlob(evict, strMeta); - EvictedBlobs.emplace(std::move(evict), meta); - return true; -} - -bool TBlobManager::DropOneToOne(const TUnifiedBlobId& blobId, IBlobManagerDb& db) { - NOlap::TEvictedBlob evict{ - .State = EEvictState::UNKNOWN, - .Blob = blobId - }; - - TEvictMetadata meta; - bool extracted = ExtractEvicted(evict, meta); - if (!extracted) { - LOG_S_DEBUG("Drop not exported blob '" << blobId << "' at tablet " << TabletInfo->TabletID); - return false; - } -#if 0 // TODO: SELF_CACHED logic - if (evict.State == EEvictState::SELF_CACHED) { - evict.State = EEvictState::EXTERN; // SELF_CACHED -> EXTERN for dropped - } -#endif - Y_VERIFY(!meta.GetTierName().empty()); - db.DropEvictBlob(evict); - DroppedEvictedBlobs.emplace(std::move(evict), std::move(meta)); - return true; -} - -bool TBlobManager::UpdateOneToOne(TEvictedBlob& evict, IBlobManagerDb& db, bool& dropped) { - TEvictMetadata meta; - - TEvictedBlob old{.Blob = evict.Blob}; - bool extracted = ExtractEvicted(old, meta); - dropped = false; - if (!extracted) { - dropped = DroppedEvictedBlobs.contains(evict); - if (!dropped) { - LOG_S_NOTICE("Update after forget '" << evict.Blob << "' at tablet " << TabletInfo->TabletID); - return false; - } - extracted = ExtractEvicted(old, meta, true); - } - Y_VERIFY(extracted); - Y_VERIFY(!meta.GetTierName().empty()); - - switch (evict.State) { - case EEvictState::EVICTING: - Y_FAIL(); - case EEvictState::SELF_CACHED: - Y_VERIFY(old.State == EEvictState::EVICTING); - break; - case EEvictState::EXTERN: - if (old.State != EEvictState::EVICTING && old.State != EEvictState::SELF_CACHED) { - LOG_S_ERROR("Unexpected update '" << evict.Blob << "' state " << (ui32) old.State - << " tier '" << meta.GetTierName() << "' at tablet " << TabletInfo->TabletID); - return false; - } - break; - default: - break; - } - - if (dropped) { - if (evict.State == EEvictState::SELF_CACHED) { - evict.State = EEvictState::EXTERN; // SELF_CACHED -> EXTERN for dropped - } - DroppedEvictedBlobs.emplace(evict, meta); - } else { - EvictedBlobs.emplace(evict, meta); - } - - // TODO: update meta if needed - db.UpdateEvictBlob(evict, {}); - return true; -} - -bool TBlobManager::EraseOneToOne(const TEvictedBlob& evict, IBlobManagerDb& db) { - Y_VERIFY(!EvictedBlobs.contains(evict)); // erase before drop - - if (DroppedEvictedBlobs.erase(evict)) { - db.EraseEvictBlob(evict); - return true; - } - return false; -} - -bool TBlobManager::LoadOneToOneExport(IBlobManagerDb& db, THashSet<TUnifiedBlobId>& droppedEvicting) { - EvictedBlobs.clear(); - DroppedEvictedBlobs.clear(); - - TBlobGroupSelector dsGroupSelector(TabletInfo); - THashMap<TEvictedBlob, TString> evicted; - THashMap<TEvictedBlob, TString> dropped; - if (!db.LoadEvicted(evicted, dropped, dsGroupSelector)) { - return false; - } - - for (auto& [evict, metadata] : evicted) { - NKikimrTxColumnShard::TEvictMetadata meta; - Y_VERIFY(meta.ParseFromString(metadata)); - - EvictedBlobs.emplace(evict, meta); - } - - for (auto& [evict, metadata] : dropped) { - if (evict.IsEvicting()) { - droppedEvicting.insert(evict.Blob); - } - - NKikimrTxColumnShard::TEvictMetadata meta; - Y_VERIFY(meta.ParseFromString(metadata)); - Y_VERIFY(!meta.GetTierName().empty()); - - DroppedEvictedBlobs.emplace(evict, meta); - } - - return true; -} - -TEvictedBlob TBlobManager::GetEvicted(const TUnifiedBlobId& blobId, TEvictMetadata& meta) { - auto it = EvictedBlobs.find(TEvictedBlob{.Blob = blobId}); - if (it != EvictedBlobs.end()) { - meta = it->second; - return it->first; - } - return {}; +void TBlobManager::OnGCFinished(const TGenStep& genStep, IBlobManagerDb& db) { + LastCollectedGenStep = genStep; + db.SaveLastGcBarrier(LastCollectedGenStep); + CollectGenStepInFlight.reset(); } -TEvictedBlob TBlobManager::GetDropped(const TUnifiedBlobId& blobId, TEvictMetadata& meta) { - auto it = DroppedEvictedBlobs.find(TEvictedBlob{.Blob = blobId}); - if (it != DroppedEvictedBlobs.end()) { - meta = it->second; - return it->first; - } - return {}; -} - -void TBlobManager::GetCleanupBlobs(THashMap<TString, THashSet<TEvictedBlob>>& tierBlobs, - const THashSet<TUnifiedBlobId>& allowList) const { - TStringBuilder strBlobs; - for (auto& [evict, meta] : DroppedEvictedBlobs) { - if (!allowList.empty() && !allowList.contains(evict.Blob)) { - continue; - } - if (evict.State != EEvictState::EVICTING) { - strBlobs << "'" << evict.Blob << "' "; - auto& tierName = meta.GetTierName(); - Y_VERIFY(!tierName.empty()); - tierBlobs[tierName].emplace(evict); - } - } - if (!strBlobs.empty()) { - LOG_S_DEBUG("Cleanup evicted blobs " << strBlobs << "at tablet " << TabletInfo->TabletID); - } -} - -void TBlobManager::GetReexportBlobs(THashMap<TString, THashSet<TEvictedBlob>>& tierBlobs) const { - for (auto& [evict, meta] : EvictedBlobs) { - if (evict.State == EEvictState::EVICTING) { - auto& tierName = meta.GetTierName(); - tierBlobs[tierName].emplace(evict); - } - } -} - -void TBlobManager::DeleteSmallBlob(const TUnifiedBlobId& blobId, IBlobManagerDb& db) { - LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Delete Small Blob " << blobId); - db.EraseSmallBlob(blobId); - NBlobCache::ForgetBlob(blobId); - ++CountersUpdate.SmallBlobsDeleted; - CountersUpdate.SmallBlobsBytesDeleted += blobId.BlobSize(); -} - -void TBlobManager::PerformDelayedDeletes(IBlobManagerDb& db) { - for (const auto& blobId : SmallBlobsToDelete) { - DeleteSmallBlob(blobId, db); - db.EraseBlobToDelete(blobId); - } - SmallBlobsToDelete.clear(); -} - -bool TBlobManager::BlobInUse(const NOlap::TUnifiedBlobId& blobId) const { - return BlobsUseCount.contains(blobId); -} - -bool TBlobManager::SetBlobInUse(const TUnifiedBlobId& blobId, bool inUse) { - if (inUse) { - BlobsUseCount[blobId]++; - return true; - } - - auto useIt = BlobsUseCount.find(blobId); - Y_VERIFY(useIt != BlobsUseCount.end(), "Trying to un-use an unknown blob %s", blobId.ToStringNew().c_str()); - --useIt->second; - - if (useIt->second > 0) { - // Blob is still in use - return false; - } - - LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Blob " << blobId << " is no longer in use"); - BlobsUseCount.erase(useIt); - +void TBlobManager::OnBlobFree(const TUnifiedBlobId& blobId) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "blob_free")("blob_id", blobId); // Check if the blob is marked for delayed deletion - if (blobId.IsSmallBlob()) { - if (SmallBlobsToDeleteDelayed.erase(blobId)) { - LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Delayed Small Blob " << blobId - << " is no longer in use" ); - SmallBlobsToDelete.insert(blobId); - BlobsManagerCounters.OnDeleteSmallBlob(blobId.BlobSize()); - } - } else { - TLogoBlobID logoBlobId = blobId.GetLogoBlobId(); - if (BlobsToDeleteDelayed.erase(logoBlobId)) { - LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Delete Delayed Blob " << blobId); - BlobsToDelete.insert(logoBlobId); - BlobsManagerCounters.OnBlobsDelete(BlobsToDelete); - BlobsManagerCounters.OnDeleteBlobMarker(logoBlobId.BlobSize()); - - if (!EvictedBlobs.contains(TEvictedBlob{.Blob = blobId})) { - NBlobCache::ForgetBlob(blobId); - } - } - } - return true; -} - -bool TBlobManager::ExtractEvicted(TEvictedBlob& evict, TEvictMetadata& meta, bool fromDropped /*= false*/) { - if (fromDropped) { - if (DroppedEvictedBlobs.contains(evict)) { - auto node = DroppedEvictedBlobs.extract(evict); - if (!node.empty()) { - evict = node.key(); - meta = node.mapped(); - return true; - } - } - } else { - if (EvictedBlobs.contains(evict)) { - auto node = EvictedBlobs.extract(evict); - if (!node.empty()) { - evict = node.key(); - meta = node.mapped(); - return true; - } - } + const TLogoBlobID logoBlobId = blobId.GetLogoBlobId(); + if (BlobsToDeleteDelayed.erase(logoBlobId)) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("blob_id", blobId)("event", "blob_delayed_deleted"); + BlobsToDelete.insert(logoBlobId); + BlobsManagerCounters.OnBlobsDelete(BlobsToDelete); + BlobsManagerCounters.OnDeleteBlobMarker(logoBlobId.BlobSize()); } - return false; } } diff --git a/ydb/core/tx/columnshard/blob_manager.h b/ydb/core/tx/columnshard/blob_manager.h index 24777f3a2bc..2bf7c3f5912 100644 --- a/ydb/core/tx/columnshard/blob_manager.h +++ b/ydb/core/tx/columnshard/blob_manager.h @@ -1,6 +1,8 @@ #pragma once #include "blob.h" +#include "blobs_action/blob_manager_db.h" +#include "blobs_action/abstract/storage.h" #include "counters/blobs_manager.h" #include <ydb/core/tablet_flat/flat_executor.h> @@ -9,6 +11,10 @@ #include <util/generic/string.h> +namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { +class TGCTask; +} + namespace NKikimr::NColumnShard { using NOlap::TUnifiedBlobId; @@ -55,9 +61,6 @@ public: // Size of all blobs in the batch ui64 GetTotalSize() const; - - // Small blobs will be saved as rows in SmallBlobs local table when the batch gets saved - TUnifiedBlobId AddSmallBlob(const TString& data); }; class IBlobManagerDb; @@ -66,9 +69,9 @@ class IBlobManagerDb; // All garbage collection related logic is hidden inside the implementation. class IBlobManager { protected: - static constexpr ui32 BLOB_CHANNEL = 2; virtual void DoSaveBlobBatch(TBlobBatch&& blobBatch, IBlobManagerDb& db) = 0; public: + static constexpr ui32 BLOB_CHANNEL = 2; virtual ~IBlobManager() = default; // Allocates a temporary blob batch with the BlobManager. If the tablet crashes or if @@ -111,9 +114,6 @@ public: virtual bool HasExternBlobs() const = 0; }; -// Garbage Collection generation and step -using TGenStep = std::tuple<ui32, ui32>; - // A ref-counted object to keep track when GC barrier can be moved to some step. // This means that all needed blobs below this step have been KeepFlag-ed and Ack-ed struct TAllocatedGenStep : public TThrRefBase { @@ -140,14 +140,10 @@ struct TBlobManagerCounters { ui64 BlobDontKeepEntries = 0; ui64 BlobSkippedEntries = 0; ui64 GcRequestsSent = 0; - ui64 SmallBlobsWritten = 0; - ui64 SmallBlobsBytesWritten = 0; - ui64 SmallBlobsDeleted = 0; - ui64 SmallBlobsBytesDeleted = 0; }; // The implementation of BlobManager that hides all GC-related details -class TBlobManager : public IBlobManager, public IBlobExporter, public NOlap::IBlobInUseTracker { +class TBlobManager : public IBlobManager, public NOlap::TCommonBlobsTracker { private: static constexpr size_t BLOB_COUNT_TO_TRIGGER_GC_DEFAULT = 1000; static constexpr ui64 GC_INTERVAL_SECONDS_DEFAULT = 60; @@ -158,7 +154,7 @@ private: ui32 CurrentStep; TControlWrapper BlobCountToTriggerGC; TControlWrapper GCIntervalSeconds; - + std::optional<TGenStep> CollectGenStepInFlight; // Lists of blobs that need Keep flag to be set TSet<TLogoBlobID> BlobsToKeep; // Lists of blobs that need DoNotKeep flag to be set @@ -167,39 +163,13 @@ private: // List of blobs that are marked for deletion but are still used by in-flight requests TSet<TLogoBlobID> BlobsToDeleteDelayed; - // List of small blobs that are marked for deletion but are still used by in-flight requests - THashSet<TUnifiedBlobId> SmallBlobsToDeleteDelayed; - - // List of small blobs that that were in-use when DeleteBlob was called and are no longer in-use - // Now they can now be deleted - THashSet<TUnifiedBlobId> SmallBlobsToDelete; - - // List of blobs that are used by in-flight requests - THashMap<TUnifiedBlobId, i64> BlobsUseCount; - // Sorted queue of GenSteps that have in-flight BlobBatches TDeque<TAllocatedGenStepConstPtr> AllocatedGenSteps; // The Gen:Step that has been acknowledged by the Distributed Storage TGenStep LastCollectedGenStep = {0, 0}; - // Distributed Storage requires a monotonically increasing counter for GC requests - ui64 PerGenerationCounter = 1; - - // GC requests that are currently in-flight: they have been - // sent to Distributed Storage and we are waiting for the results - struct TGCLists { - THashSet<TLogoBlobID> KeepList; - THashSet<TLogoBlobID> DontKeepList; - }; - THashMap<ui32, TGCLists> PerGroupGCListsInFlight; - // NOTE: blobs still need to be removed from local db - TDeque<TUnifiedBlobId> KeepsToErase; - TDeque<TUnifiedBlobId> DeletesToErase; - // Maps PerGenerationCounter value to the group in PerGroupGCListsInFlight - THashMap<ui64, ui32> CounterToGroupInFlight; // The barrier in the current in-flight GC request(s) - TGenStep CollectGenStepInFlight = {0, 0}; bool FirstGC = true; const TBlobsManagerCounters BlobsManagerCounters = TBlobsManagerCounters("BlobsManager"); @@ -208,37 +178,37 @@ private: // Then the counters are reset and start accumulating new delta TBlobManagerCounters CountersUpdate; - TInstant PreviousGCTime; // Used for delaying next GC if there are too few blobs to collect + ui64 PerGenerationCounter = 1; - // - std::unordered_map<TEvictedBlob, TEvictMetadata, THash<NKikimr::NOlap::TEvictedBlob>> EvictedBlobs; - std::unordered_map<TEvictedBlob, TEvictMetadata, THash<NKikimr::NOlap::TEvictedBlob>> DroppedEvictedBlobs; + TInstant PreviousGCTime; // Used for delaying next GC if there are too few blobs to collect virtual void DoSaveBlobBatch(TBlobBatch&& blobBatch, IBlobManagerDb& db) override; - public: TBlobManager(TIntrusivePtr<TTabletStorageInfo> tabletInfo, ui32 gen); + virtual void OnBlobFree(const TUnifiedBlobId& blobId) override; + const TBlobsManagerCounters& GetCounters() const { return BlobsManagerCounters; } + ui64 GetTabletId() const { + return TabletInfo->TabletID; + } + + ui64 GetCurrentGen() const { + return CurrentGen; + } + void RegisterControls(NKikimr::TControlBoard& icb); // Loads the state at startup bool LoadState(IBlobManagerDb& db); - bool CanCollectGarbage(bool cleanupOnly = false) const; - bool NeedStorageCG() const; - // Prepares Keep/DontKeep lists and GC barrier - THashMap<ui32, std::unique_ptr<TEvBlobStorage::TEvCollectGarbage>> PreparePerGroupGCRequests(); - - // Cleanup blobs that have correct flags (skipped or already marked with correct flags) - size_t CleanupFlaggedBlobs(IBlobManagerDb& db, size_t maxBlobsToCleanup); + std::shared_ptr<NOlap::NBlobOperations::NBlobStorage::TGCTask> BuildGCTask(const TString& storageId, const std::shared_ptr<TBlobManager>& manager); - // Called with GC result received from Distributed Storage - void OnGCResult(TEvBlobStorage::TEvCollectGarbageResult::TPtr ev, IBlobManagerDb& db); + void OnGCFinished(const TGenStep& genStep, IBlobManagerDb& db); TBlobManagerCounters GetCountersUpdate() { TBlobManagerCounters res = CountersUpdate; @@ -249,38 +219,13 @@ public: // Implementation of IBlobManager interface TBlobBatch StartBlobBatch(ui32 channel = BLOB_CHANNEL) override; void DeleteBlob(const TUnifiedBlobId& blobId, IBlobManagerDb& db) override; - - // Implementation of IBlobExporter - bool ExportOneToOne(TEvictedBlob&& evict, const TEvictMetadata& meta, IBlobManagerDb& db) override; - bool DropOneToOne(const TUnifiedBlobId& blob, IBlobManagerDb& db) override; - bool UpdateOneToOne(TEvictedBlob& evict, IBlobManagerDb& db, bool& dropped) override; - bool EraseOneToOne(const TEvictedBlob& evict, IBlobManagerDb& db) override; - bool LoadOneToOneExport(IBlobManagerDb& db, THashSet<TUnifiedBlobId>& droppedEvicting) override; - TEvictedBlob GetEvicted(const TUnifiedBlobId& blobId, TEvictMetadata& meta) override; - TEvictedBlob GetDropped(const TUnifiedBlobId& blobId, TEvictMetadata& meta) override; - void GetCleanupBlobs(THashMap<TString, THashSet<TEvictedBlob>>& tierBlobs, - const THashSet<TUnifiedBlobId>& allowList = {}) const override; - void GetReexportBlobs(THashMap<TString, THashSet<TEvictedBlob>>& tierBlobs) const override; - - bool HasExternBlobs() const override { - return EvictedBlobs.size() || DroppedEvictedBlobs.size(); - } - - // Implementation of IBlobInUseTracker - bool SetBlobInUse(const TUnifiedBlobId& blobId, bool inUse) override; - bool BlobInUse(const NOlap::TUnifiedBlobId& blobId) const override; - private: TGenStep FindNewGCBarrier(); - void DeleteSmallBlob(const TUnifiedBlobId& blobId, IBlobManagerDb& db); - - // Delete small blobs that were previously in use and could not be deleted - void PerformDelayedDeletes(IBlobManagerDb& db); bool ExtractEvicted(TEvictedBlob& evict, TEvictMetadata& meta, bool fromDropped = false); TGenStep EdgeGenStep() const { - return (CollectGenStepInFlight == TGenStep{0, 0}) ? LastCollectedGenStep : CollectGenStepInFlight; + return CollectGenStepInFlight ? *CollectGenStepInFlight : LastCollectedGenStep; } }; diff --git a/ydb/core/tx/columnshard/blob_manager_txs.cpp b/ydb/core/tx/columnshard/blob_manager_txs.cpp deleted file mode 100644 index f3548cff1cc..00000000000 --- a/ydb/core/tx/columnshard/blob_manager_txs.cpp +++ /dev/null @@ -1,95 +0,0 @@ -#include "defs.h" -#include "columnshard_impl.h" -#include "blob_manager.h" - -#include <ydb/core/tx/columnshard/blobs_action/blob_manager_db.h> - -#include <ydb/core/base/blobstorage.h> - -namespace NKikimr::NColumnShard { - -// Run GC related logic of the BlobManager -class TTxRunGC : public NTabletFlatExecutor::TTransactionBase<TColumnShard> { - THashMap<ui32, std::unique_ptr<TEvBlobStorage::TEvCollectGarbage>> Requests; - bool Cleanup = false; - -public: - TTxRunGC(TColumnShard* self) - : TBase(self) - {} - - bool Execute(TTransactionContext& txc, const TActorContext& ctx) override { - LOG_S_TRACE("TTxRunGC.Execute at tablet " << Self->TabletID()); - Y_UNUSED(ctx); - - // Cleanup delayed blobs before next GC - TBlobManagerDb blobManagerDb(txc.DB); - if (Self->BlobManager->CleanupFlaggedBlobs(blobManagerDb, TLimits::MAX_BLOBS_TO_DELETE)) { - Cleanup = true; - return true; - } - - Requests = Self->BlobManager->PreparePerGroupGCRequests(); - return true; - } - - void Complete(const TActorContext& ctx) override { - LOG_S_TRACE("TTxRunGC.Complete at tablet " << Self->TabletID()); - - /// @warning it's a loop Complete -> Execute. We must exit from it fo sure. - if (Cleanup) { - Self->ScheduleNextGC(ctx, true); - } - - for (auto& r : Requests) { - ui32 groupId = r.first; - auto ev = std::move(r.second); - LOG_S_DEBUG("BlobManager at tablet " << Self->TabletID() - << " Sending GC to group " << groupId << ": " << ev->Print(true)); - - SendToBSProxy(ctx, groupId, ev.release()); - } - } -}; - -ITransaction* TColumnShard::CreateTxRunGc() { - return new TTxRunGC(this); -} - - -// Update the BlobManager with the GC result -class TTxProcessGCResult : public NTabletFlatExecutor::TTransactionBase<TColumnShard> { - TEvBlobStorage::TEvCollectGarbageResult::TPtr Ev; -public: - TTxProcessGCResult(TColumnShard* self, TEvBlobStorage::TEvCollectGarbageResult::TPtr& ev) - : TBase(self) - , Ev(ev) - {} - - bool Execute(TTransactionContext& txc, const TActorContext& ctx) override { - if (Ev->Get()->Status != NKikimrProto::OK) { - LOG_S_WARN("BlobManager at tablet " << Self->TabletID() - << " GC Failed: " << Ev->Get()->Print(true)); - Self->BecomeBroken(ctx); - return true; - } - - LOG_S_DEBUG("BlobManager at tablet " << Self->TabletID() - << " GC Result: " << Ev->Get()->Print(true)); - - // Update Keep/DontKeep lists and last GC barrier - TBlobManagerDb blobManagerDb(txc.DB); - Self->BlobManager->OnGCResult(Ev, blobManagerDb); - return true; - } - - void Complete(const TActorContext& ctx) override { - Self->ScheduleNextGC(ctx); - } -}; - -void TColumnShard::Handle(TEvBlobStorage::TEvCollectGarbageResult::TPtr& ev, const TActorContext& ctx) { - Execute(new TTxProcessGCResult(this, ev), ctx); -} - -} diff --git a/ydb/core/tx/columnshard/blobs_action/CMakeLists.darwin-x86_64.txt b/ydb/core/tx/columnshard/blobs_action/CMakeLists.darwin-x86_64.txt index 1558e932a75..de7066a8421 100644 --- a/ydb/core/tx/columnshard/blobs_action/CMakeLists.darwin-x86_64.txt +++ b/ydb/core/tx/columnshard/blobs_action/CMakeLists.darwin-x86_64.txt @@ -6,6 +6,10 @@ # original buildsystem will not be accepted. +add_subdirectory(abstract) +add_subdirectory(bs) +add_subdirectory(tier) +add_subdirectory(transaction) add_library(tx-columnshard-blobs_action) target_link_libraries(tx-columnshard-blobs_action PUBLIC @@ -15,9 +19,12 @@ target_link_libraries(tx-columnshard-blobs_action PUBLIC libs-apache-arrow ydb-core-tablet_flat core-tx-tiering + columnshard-blobs_action-bs + columnshard-blobs_action-abstract + columnshard-blobs_action-transaction + columnshard-blobs_action-tier ) target_sources(tx-columnshard-blobs_action PRIVATE - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/blob_manager_db.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/memory.cpp ) diff --git a/ydb/core/tx/columnshard/blobs_action/CMakeLists.linux-aarch64.txt b/ydb/core/tx/columnshard/blobs_action/CMakeLists.linux-aarch64.txt index bed25b4c44c..8e9f9f0fe85 100644 --- a/ydb/core/tx/columnshard/blobs_action/CMakeLists.linux-aarch64.txt +++ b/ydb/core/tx/columnshard/blobs_action/CMakeLists.linux-aarch64.txt @@ -6,6 +6,10 @@ # original buildsystem will not be accepted. +add_subdirectory(abstract) +add_subdirectory(bs) +add_subdirectory(tier) +add_subdirectory(transaction) add_library(tx-columnshard-blobs_action) target_link_libraries(tx-columnshard-blobs_action PUBLIC @@ -16,9 +20,12 @@ target_link_libraries(tx-columnshard-blobs_action PUBLIC libs-apache-arrow ydb-core-tablet_flat core-tx-tiering + columnshard-blobs_action-bs + columnshard-blobs_action-abstract + columnshard-blobs_action-transaction + columnshard-blobs_action-tier ) target_sources(tx-columnshard-blobs_action PRIVATE - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/blob_manager_db.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/memory.cpp ) diff --git a/ydb/core/tx/columnshard/blobs_action/CMakeLists.linux-x86_64.txt b/ydb/core/tx/columnshard/blobs_action/CMakeLists.linux-x86_64.txt index bed25b4c44c..8e9f9f0fe85 100644 --- a/ydb/core/tx/columnshard/blobs_action/CMakeLists.linux-x86_64.txt +++ b/ydb/core/tx/columnshard/blobs_action/CMakeLists.linux-x86_64.txt @@ -6,6 +6,10 @@ # original buildsystem will not be accepted. +add_subdirectory(abstract) +add_subdirectory(bs) +add_subdirectory(tier) +add_subdirectory(transaction) add_library(tx-columnshard-blobs_action) target_link_libraries(tx-columnshard-blobs_action PUBLIC @@ -16,9 +20,12 @@ target_link_libraries(tx-columnshard-blobs_action PUBLIC libs-apache-arrow ydb-core-tablet_flat core-tx-tiering + columnshard-blobs_action-bs + columnshard-blobs_action-abstract + columnshard-blobs_action-transaction + columnshard-blobs_action-tier ) target_sources(tx-columnshard-blobs_action PRIVATE - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/blob_manager_db.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/memory.cpp ) diff --git a/ydb/core/tx/columnshard/blobs_action/CMakeLists.windows-x86_64.txt b/ydb/core/tx/columnshard/blobs_action/CMakeLists.windows-x86_64.txt index 1558e932a75..f1469f8ee49 100644 --- a/ydb/core/tx/columnshard/blobs_action/CMakeLists.windows-x86_64.txt +++ b/ydb/core/tx/columnshard/blobs_action/CMakeLists.windows-x86_64.txt @@ -6,8 +6,14 @@ # original buildsystem will not be accepted. +add_subdirectory(abstract) +add_subdirectory(bs) +add_subdirectory(transaction) add_library(tx-columnshard-blobs_action) +target_compile_options(tx-columnshard-blobs_action PRIVATE + -DKIKIMR_DISABLE_S3_OPS +) target_link_libraries(tx-columnshard-blobs_action PUBLIC contrib-libs-cxxsupp yutil @@ -15,9 +21,11 @@ target_link_libraries(tx-columnshard-blobs_action PUBLIC libs-apache-arrow ydb-core-tablet_flat core-tx-tiering + columnshard-blobs_action-bs + columnshard-blobs_action-abstract + columnshard-blobs_action-transaction ) target_sources(tx-columnshard-blobs_action PRIVATE - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/blob_manager_db.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/memory.cpp ) diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/CMakeLists.darwin-x86_64.txt b/ydb/core/tx/columnshard/blobs_action/abstract/CMakeLists.darwin-x86_64.txt new file mode 100644 index 00000000000..124be7647cf --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,28 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(columnshard-blobs_action-abstract) +target_link_libraries(columnshard-blobs_action-abstract PUBLIC + contrib-libs-cxxsupp + yutil + ydb-core-protos + libs-apache-arrow + ydb-core-tablet_flat + core-tx-tiering +) +target_sources(columnshard-blobs_action-abstract PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/gc.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/common.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/read.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/write.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/remove.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/storage.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/action.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.cpp +) diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/CMakeLists.linux-aarch64.txt b/ydb/core/tx/columnshard/blobs_action/abstract/CMakeLists.linux-aarch64.txt new file mode 100644 index 00000000000..88d4d58e34e --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/CMakeLists.linux-aarch64.txt @@ -0,0 +1,29 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(columnshard-blobs_action-abstract) +target_link_libraries(columnshard-blobs_action-abstract PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + ydb-core-protos + libs-apache-arrow + ydb-core-tablet_flat + core-tx-tiering +) +target_sources(columnshard-blobs_action-abstract PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/gc.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/common.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/read.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/write.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/remove.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/storage.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/action.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.cpp +) diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/CMakeLists.linux-x86_64.txt b/ydb/core/tx/columnshard/blobs_action/abstract/CMakeLists.linux-x86_64.txt new file mode 100644 index 00000000000..88d4d58e34e --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/CMakeLists.linux-x86_64.txt @@ -0,0 +1,29 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(columnshard-blobs_action-abstract) +target_link_libraries(columnshard-blobs_action-abstract PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + ydb-core-protos + libs-apache-arrow + ydb-core-tablet_flat + core-tx-tiering +) +target_sources(columnshard-blobs_action-abstract PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/gc.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/common.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/read.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/write.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/remove.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/storage.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/action.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.cpp +) diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/CMakeLists.txt b/ydb/core/tx/columnshard/blobs_action/abstract/CMakeLists.txt new file mode 100644 index 00000000000..f8b31df0c11 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/CMakeLists.windows-x86_64.txt b/ydb/core/tx/columnshard/blobs_action/abstract/CMakeLists.windows-x86_64.txt new file mode 100644 index 00000000000..124be7647cf --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/CMakeLists.windows-x86_64.txt @@ -0,0 +1,28 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(columnshard-blobs_action-abstract) +target_link_libraries(columnshard-blobs_action-abstract PUBLIC + contrib-libs-cxxsupp + yutil + ydb-core-protos + libs-apache-arrow + ydb-core-tablet_flat + core-tx-tiering +) +target_sources(columnshard-blobs_action-abstract PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/gc.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/common.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/read.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/write.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/remove.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/storage.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/action.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.cpp +) diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/action.cpp b/ydb/core/tx/columnshard/blobs_action/abstract/action.cpp new file mode 100644 index 00000000000..5e860f338fa --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/action.cpp @@ -0,0 +1,18 @@ +#include "action.h" +#include <ydb/core/tx/columnshard/engines/portions/portion_info.h> + +namespace NKikimr::NOlap { + +std::shared_ptr<NKikimr::NOlap::IBlobsWritingAction> TBlobsAction::GetWriting(const TPortionInfo& portionInfo) { + return GetStorageAction(portionInfo.GetBlobsStorage()->GetStorageId()).GetWriting(); +} + +std::shared_ptr<NKikimr::NOlap::IBlobsReadingAction> TBlobsAction::GetReading(const TPortionInfo& portionInfo) { + return GetStorageAction(portionInfo.GetBlobsStorage()->GetStorageId()).GetReading(); +} + +std::shared_ptr<NKikimr::NOlap::IBlobsDeclareRemovingAction> TBlobsAction::GetRemoving(const TPortionInfo& portionInfo) { + return GetStorageAction(portionInfo.GetBlobsStorage()->GetStorageId()).GetRemoving(); +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/action.h b/ydb/core/tx/columnshard/blobs_action/abstract/action.h new file mode 100644 index 00000000000..95c48dedb8a --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/action.h @@ -0,0 +1,177 @@ +#pragma once +#include "storage.h" +#include "remove.h" +#include "write.h" +#include "read.h" +#include "storages_manager.h" + +namespace NKikimr::NOlap { + +struct TPortionInfo; + +class TStorageAction { +private: + std::shared_ptr<IBlobsStorageOperator> Storage; + std::shared_ptr<IBlobsDeclareRemovingAction> Removing; + std::shared_ptr<IBlobsWritingAction> Writing; + std::shared_ptr<IBlobsReadingAction> Reading; + +public: + TStorageAction(const std::shared_ptr<IBlobsStorageOperator>& storage) + : Storage(storage) { + + } + + const std::shared_ptr<IBlobsDeclareRemovingAction>& GetRemoving() { + if (!Removing) { + Removing = Storage->StartDeclareRemovingAction(); + } + return Removing; + } + const std::shared_ptr<IBlobsWritingAction>& GetWriting() { + if (!Writing) { + Writing = Storage->StartWritingAction(); + } + return Writing; + } + const std::shared_ptr<IBlobsWritingAction>& GetWritingOptional() const { + return Writing; + } + const std::shared_ptr<IBlobsReadingAction>& GetReading() { + if (!Reading) { + Reading = Storage->StartReadingAction(); + } + return Reading; + } + + std::shared_ptr<IBlobsReadingAction> GetReadingOptional() const { + return Reading; + } + + bool HasReading() const { + return !!Reading; + } + bool HasWriting() const { + return !!Writing; + } + + void OnExecuteTxAfterAction(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs, const bool success) { + if (Removing) { + Removing->OnExecuteTxAfterRemoving(self, dbBlobs, success); + } + if (Writing) { + Writing->OnExecuteTxAfterWrite(self, dbBlobs, success); + } + } + + void OnCompleteTxAfterAction(NColumnShard::TColumnShard& self) { + if (Removing) { + Removing->OnCompleteTxAfterRemoving(self); + } + if (Writing) { + Writing->OnCompleteTxAfterWrite(self); + } + } +}; + +class TBlobsAction { +private: + std::shared_ptr<IStoragesManager> Storages; + THashMap<TString, TStorageAction> StorageActions; + + TStorageAction& GetStorageAction(const TString& storageId) { + auto it = StorageActions.find(storageId); + if (it == StorageActions.end()) { + it = StorageActions.emplace(storageId, Storages->GetOperator(storageId)).first; + } + return it->second; + } +public: + TBlobsAction(std::shared_ptr<IStoragesManager> storages) + : Storages(storages) + { + + } + + ui32 GetWritingBlobsCount() const { + ui32 result = 0; + for (auto&& [_, action] : StorageActions) { + if (!!action.GetWritingOptional()) { + result += action.GetWritingOptional()->GetBlobsCount(); + } + } + return result; + } + + ui64 GetWritingTotalSize() const { + ui64 result = 0; + for (auto&& [_, action] : StorageActions) { + if (!!action.GetWritingOptional()) { + result += action.GetWritingOptional()->GetTotalSize(); + } + } + return result; + } + + std::vector<std::shared_ptr<IBlobsReadingAction>> GetReadingActions() const { + std::vector<std::shared_ptr<IBlobsReadingAction>> result; + for (auto&& i : StorageActions) { + if (i.second.HasReading()) { + result.emplace_back(i.second.GetReadingOptional()); + } + } + return result; + } + + std::vector<std::shared_ptr<IBlobsWritingAction>> GetWritingActions() const { + std::vector<std::shared_ptr<IBlobsWritingAction>> result; + for (auto&& i : StorageActions) { + if (i.second.HasWriting()) { + result.emplace_back(i.second.GetWritingOptional()); + } + } + return result; + } + + bool NeedDraftWritingTransaction() const { + for (auto&& i : GetWritingActions()) { + if (i->NeedDraftTransaction()) { + return true; + } + } + return false; + } + + void OnExecuteTxAfterAction(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs, const bool success) { + for (auto&& i : StorageActions) { + i.second.OnExecuteTxAfterAction(self, dbBlobs, success); + } + } + + void OnCompleteTxAfterAction(NColumnShard::TColumnShard& self) { + for (auto&& i : StorageActions) { + i.second.OnCompleteTxAfterAction(self); + } + } + + std::shared_ptr<IBlobsDeclareRemovingAction> GetRemoving(const TString& storageId) { + return GetStorageAction(storageId).GetRemoving(); + } + + std::shared_ptr<IBlobsDeclareRemovingAction> GetRemoving(const TPortionInfo& portionInfo); + + std::shared_ptr<IBlobsWritingAction> GetWriting(const TString& storageId) { + return GetStorageAction(storageId).GetWriting(); + } + + std::shared_ptr<IBlobsWritingAction> GetWriting(const TPortionInfo& portionInfo); + + std::shared_ptr<IBlobsReadingAction> GetReading(const TString& storageId) { + return GetStorageAction(storageId).GetReading(); + } + + std::shared_ptr<IBlobsReadingAction> GetReading(const TPortionInfo& portionInfo); + +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/common.cpp b/ydb/core/tx/columnshard/blobs_action/abstract/common.cpp new file mode 100644 index 00000000000..d8dfaff298e --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/common.cpp @@ -0,0 +1,15 @@ +#include "common.h" +#include <util/generic/refcount.h> + +namespace NKikimr::NOlap { + +namespace { +static TAtomicCounter ActionIdCounter = 0; +} +ICommonBlobsAction::ICommonBlobsAction(const TString& storageId) + : StorageId(storageId) + , ActionId(ActionIdCounter.Inc()) +{ +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/common.h b/ydb/core/tx/columnshard/blobs_action/abstract/common.h new file mode 100644 index 00000000000..65e1dcf1fe5 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/common.h @@ -0,0 +1,21 @@ +#pragma once +#include <util/system/types.h> +#include <ydb/library/accessor/accessor.h> +#include <util/generic/string.h> + +namespace NKikimr::NOlap { + +class ICommonBlobsAction { +private: + YDB_READONLY_DEF(TString, StorageId); + const i64 ActionId = 0; +public: + i64 GetActionId() const { + return ActionId; + } + + ICommonBlobsAction(const TString& storageId); + virtual ~ICommonBlobsAction() = default; +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/gc.cpp b/ydb/core/tx/columnshard/blobs_action/abstract/gc.cpp new file mode 100644 index 00000000000..5ade97e6d66 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/gc.cpp @@ -0,0 +1,11 @@ +#include "gc.h" +#include <ydb/core/tx/columnshard/columnshard_impl.h> + +namespace NKikimr::NOlap { + +void IBlobsGCAction::OnCompleteTxAfterCleaning(NColumnShard::TColumnShard& self, const std::shared_ptr<IBlobsGCAction>& taskAction) { + self.GetStoragesManager()->GetOperator(GetStorageId())->FinishGC(); + return DoOnCompleteTxAfterCleaning(self, taskAction); +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/gc.h b/ydb/core/tx/columnshard/blobs_action/abstract/gc.h new file mode 100644 index 00000000000..fa4a462b698 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/gc.h @@ -0,0 +1,32 @@ +#pragma once +#include "common.h" +#include <util/generic/string.h> + +namespace NKikimr::NColumnShard { +class TColumnShard; +class TBlobManagerDb; +} + +namespace NKikimr::NOlap { + +class IBlobsGCAction: public ICommonBlobsAction { +private: + using TBase = ICommonBlobsAction; +protected: + + virtual void DoOnExecuteTxAfterCleaning(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs) = 0; + virtual void DoOnCompleteTxAfterCleaning(NColumnShard::TColumnShard& self, const std::shared_ptr<IBlobsGCAction>& taskAction) = 0; +public: + void OnExecuteTxAfterCleaning(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs) { + return DoOnExecuteTxAfterCleaning(self, dbBlobs); + } + void OnCompleteTxAfterCleaning(NColumnShard::TColumnShard& self, const std::shared_ptr<IBlobsGCAction>& taskAction); + + IBlobsGCAction(const TString& storageId) + : TBase(storageId) + { + + } +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/read.cpp b/ydb/core/tx/columnshard/blobs_action/abstract/read.cpp new file mode 100644 index 00000000000..0f8dd0161ce --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/read.cpp @@ -0,0 +1,14 @@ +#include "read.h" +#include <library/cpp/actors/core/log.h> + +namespace NKikimr::NOlap { + +void IBlobsReadingAction::StartReading(THashMap<TUnifiedBlobId, THashSet<TBlobRange>>&& ranges) { + AFL_VERIFY(ranges.size()); + for (auto&& i : ranges) { + AFL_VERIFY(i.second.size()); + } + return DoStartReading(ranges); +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/read.h b/ydb/core/tx/columnshard/blobs_action/abstract/read.h new file mode 100644 index 00000000000..3cc93ce60b7 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/read.h @@ -0,0 +1,107 @@ +#pragma once +#include "common.h" +#include <ydb/core/tx/columnshard/blob.h> +#include <ydb/core/protos/base.pb.h> +#include <util/generic/hash_set.h> + +namespace NKikimr::NOlap { + +class IBlobsReadingAction: public ICommonBlobsAction { +private: + using TBase = ICommonBlobsAction; + THashMap<TUnifiedBlobId, THashSet<TBlobRange>> RangesForRead; + THashSet<TBlobRange> WaitingRanges; + THashMap<TBlobRange, TString> Replies; + THashMap<TBlobRange, NKikimrProto::EReplyStatus> Fails; + bool Started = false; +protected: + virtual void DoStartReading(const THashMap<TUnifiedBlobId, THashSet<TBlobRange>>& range) = 0; + void StartReading(THashMap<TUnifiedBlobId, THashSet<TBlobRange>>&& ranges); +public: + IBlobsReadingAction(const TString& storageId) + : TBase(storageId) + { + + } + + ui64 GetExpectedBlobsSize() const { + ui64 result = 0; + for (auto&& i : RangesForRead) { + for (auto&& b : i.second) { + result += b.Size; + } + } + return result; + } + + ui64 GetExpectedBlobsCount() const { + ui64 result = 0; + for (auto&& i : RangesForRead) { + result += i.second.size(); + } + return result; + } + + void FillExpectedRanges(THashSet<TBlobRange>& ranges) const { + for (auto&& i : RangesForRead) { + for (auto&& b : i.second) { + Y_VERIFY(ranges.emplace(b).second); + } + } + } + + const THashMap<TUnifiedBlobId, THashSet<TBlobRange>>& GetRangesForRead() const { + return RangesForRead; + } + + void AddRange(const TBlobRange& range) { + Y_VERIFY(!Started); + Y_VERIFY(RangesForRead[range.BlobId].emplace(range).second); + } + + void Start(const THashSet<TBlobRange>& rangesInProgress) { + Y_VERIFY(!Started); + Y_VERIFY(RangesForRead.size()); + for (auto&& i : RangesForRead) { + for (auto&& r : i.second) { + WaitingRanges.emplace(r); + } + } + THashMap<TUnifiedBlobId, THashSet<TBlobRange>> rangesFiltered; + if (rangesInProgress.empty()) { + rangesFiltered = RangesForRead; + } else { + for (auto&& i : RangesForRead) { + for (auto&& r : i.second) { + if (!rangesInProgress.contains(r)) { + rangesFiltered[r.BlobId].emplace(r); + } + } + } + } + if (rangesFiltered.size()) { + StartReading(std::move(rangesFiltered)); + } + Started = true; + } + + void OnReadResult(const TBlobRange& range, const TString& data) { + Y_VERIFY(WaitingRanges.erase(range)); + Replies.emplace(range, data); + } + + void OnReadError(const TBlobRange& range, const NKikimrProto::EReplyStatus replyStatus) { + Y_VERIFY(WaitingRanges.erase(range)); + Fails.emplace(range, replyStatus); + } + + bool HasFails() const { + return Fails.size(); + } + + bool IsFinished() const { + return WaitingRanges.size() == 0; + } +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/remove.cpp b/ydb/core/tx/columnshard/blobs_action/abstract/remove.cpp new file mode 100644 index 00000000000..47c75900b34 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/remove.cpp @@ -0,0 +1,13 @@ +#include "remove.h" +#include <library/cpp/actors/core/log.h> + +namespace NKikimr::NOlap { + +void IBlobsDeclareRemovingAction::DeclareRemove(const TUnifiedBlobId& blobId) { + if (DeclaredBlobs.emplace(blobId).second) { + ACFL_DEBUG("event", "DeclareRemove")("blob_id", blobId); + return DoDeclareRemove(blobId); + } +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/remove.h b/ydb/core/tx/columnshard/blobs_action/abstract/remove.h new file mode 100644 index 00000000000..85b092f5de3 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/remove.h @@ -0,0 +1,38 @@ +#pragma once +#include "common.h" +#include <util/generic/hash_set.h> +#include <ydb/core/tx/columnshard/blob.h> +#include <ydb/library/accessor/accessor.h> + +namespace NKikimr::NColumnShard { +class TColumnShard; +class TBlobManagerDb; +} + +namespace NKikimr::NOlap { + +class IBlobsDeclareRemovingAction: public ICommonBlobsAction { +private: + using TBase = ICommonBlobsAction; + YDB_READONLY_DEF(THashSet<TUnifiedBlobId>, DeclaredBlobs); +protected: + virtual void DoDeclareRemove(const TUnifiedBlobId& blobId) = 0; + virtual void DoOnExecuteTxAfterRemoving(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs, const bool success) = 0; + virtual void DoOnCompleteTxAfterRemoving(NColumnShard::TColumnShard& self) = 0; +public: + IBlobsDeclareRemovingAction(const TString& storageId) + : TBase(storageId) + { + + } + + void DeclareRemove(const TUnifiedBlobId& blobId); + void OnExecuteTxAfterRemoving(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs, const bool success) { + return DoOnExecuteTxAfterRemoving(self, dbBlobs, success); + } + void OnCompleteTxAfterRemoving(NColumnShard::TColumnShard& self) { + return DoOnCompleteTxAfterRemoving(self); + } +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/storage.cpp b/ydb/core/tx/columnshard/blobs_action/abstract/storage.cpp new file mode 100644 index 00000000000..501e6a82487 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/storage.cpp @@ -0,0 +1,37 @@ +#include "storage.h" + +namespace NKikimr::NOlap { + +bool TCommonBlobsTracker::IsBlobInUsage(const NOlap::TUnifiedBlobId& blobId) const { + return BlobsUseCount.contains(blobId); +} + +bool TCommonBlobsTracker::DoUseBlob(const TUnifiedBlobId& blobId) { + auto it = BlobsUseCount.find(blobId); + if (it == BlobsUseCount.end()) { + BlobsUseCount.emplace(blobId, 1); + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("method", "DoUseBlob")("blob_id", blobId)("count", 1); + return true; + } else { + ++it->second; + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("method", "DoUseBlob")("blob_id", blobId)("count", it->second); + return false; + } +} + +bool TCommonBlobsTracker::DoFreeBlob(const TUnifiedBlobId& blobId) { + auto useIt = BlobsUseCount.find(blobId); + AFL_VERIFY(useIt != BlobsUseCount.end())("reason", "Unknown blob")("blob_id", blobId.ToStringNew()); + AFL_VERIFY(useIt->second); + --useIt->second; + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("method", "DoFreeBlob")("blob_id", blobId)("count", useIt->second); + + if (useIt->second > 0) { + return false; + } + BlobsUseCount.erase(useIt); + OnBlobFree(blobId); + return true; +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/storage.h b/ydb/core/tx/columnshard/blobs_action/abstract/storage.h new file mode 100644 index 00000000000..6e2ad258dc0 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/storage.h @@ -0,0 +1,86 @@ +#pragma once +#include "remove.h" +#include "write.h" +#include "read.h" + +#include <ydb/core/tx/columnshard/blobs_action/blob_manager_db.h> +#include <ydb/library/accessor/accessor.h> + +namespace NKikimr::NColumnShard { +class TTiersManager; +} + +namespace NKikimr::NOlap { + +class TCommonBlobsTracker: public IBlobInUseTracker { +private: + // List of blobs that are used by in-flight requests + THashMap<TUnifiedBlobId, i64> BlobsUseCount; +protected: + virtual bool DoUseBlob(const TUnifiedBlobId& blobId) override; + virtual bool DoFreeBlob(const TUnifiedBlobId& blobId) override; +public: + virtual bool IsBlobInUsage(const NOlap::TUnifiedBlobId& blobId) const override; + virtual void OnBlobFree(const TUnifiedBlobId& blobId) = 0; +}; + +class IBlobsStorageOperator { +private: + YDB_READONLY_DEF(TString, StorageId); + friend class IBlobsGCAction; + bool GCActivity = false; + + void FinishGC() { + Y_VERIFY(GCActivity); + GCActivity = false; + } +protected: + virtual std::shared_ptr<IBlobsDeclareRemovingAction> DoStartDeclareRemovingAction() = 0; + virtual std::shared_ptr<IBlobsWritingAction> DoStartWritingAction() = 0; + virtual std::shared_ptr<IBlobsReadingAction> DoStartReadingAction() = 0; + virtual bool DoStartGC() = 0; + virtual bool DoLoad(NColumnShard::IBlobManagerDb& dbBlobs) = 0; + + virtual void DoOnTieringModified(const std::shared_ptr<NColumnShard::TTiersManager>& tiers) = 0; + virtual TString DoDebugString() const { + return ""; + } +public: + IBlobsStorageOperator(const TString& storageId) + : StorageId(storageId) { + + } + + virtual std::shared_ptr<IBlobInUseTracker> GetBlobsTracker() const = 0; + + virtual ~IBlobsStorageOperator() = default; + + TString DebugString() const { + return TStringBuilder() << "(storage_id=" << StorageId << ";details=(" << DoDebugString() << "))"; + } + + bool Load(NColumnShard::IBlobManagerDb& dbBlobs) { + return DoLoad(dbBlobs); + } + void OnTieringModified(const std::shared_ptr<NColumnShard::TTiersManager>& tiers) { + return DoOnTieringModified(tiers); + } + std::shared_ptr<IBlobsDeclareRemovingAction> StartDeclareRemovingAction() { + return DoStartDeclareRemovingAction(); + } + std::shared_ptr<IBlobsWritingAction> StartWritingAction() { + return DoStartWritingAction(); + } + std::shared_ptr<IBlobsReadingAction> StartReadingAction() { + return DoStartReadingAction(); + } + bool StartGC() { + if (!GCActivity) { + GCActivity = DoStartGC(); + return GCActivity; + } + return false; + } +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.cpp b/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.cpp new file mode 100644 index 00000000000..5d3178d6f70 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.cpp @@ -0,0 +1,41 @@ +#include "storages_manager.h" +#include <ydb/core/tx/columnshard/engines/portions/portion_info.h> +#include <ydb/core/tx/tiering/manager.h> + +namespace NKikimr::NOlap { + +std::shared_ptr<NKikimr::NOlap::IBlobsStorageOperator> IStoragesManager::GetOperator(const TString& storageId) { + TReadGuard rg(RWMutex); + auto it = Constructed.find(storageId); + if (it == Constructed.end()) { + rg.Release(); + TWriteGuard wg(RWMutex); + it = Constructed.find(storageId); + if (it == Constructed.end()) { + it = Constructed.emplace(storageId, BuildOperator(storageId)).first; + } + return it->second; + } + return it->second; +} + +std::shared_ptr<IBlobsStorageOperator> IStoragesManager::InitializePortionOperator(const TPortionInfo& portionInfo) { + Y_VERIFY(!portionInfo.HasStorageOperator()); + if (portionInfo.GetMeta().GetTierName()) { + return GetOperator(portionInfo.GetMeta().GetTierName()); + } else { + return GetOperator(DefaultStorageId); + } +} + +void IStoragesManager::OnTieringModified(const std::shared_ptr<NColumnShard::TTiersManager>& tiers) { + for (auto&& i : tiers->GetManagers()) { + GetOperator(i.second.GetTierName())->OnTieringModified(tiers); + } +} + +void IStoragesManager::InitializeNecessaryStorages() { + GetOperator(DefaultStorageId); +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.h b/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.h new file mode 100644 index 00000000000..7c52f4c7987 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.h @@ -0,0 +1,47 @@ +#pragma once +#include "storage.h" + +namespace NKikimr::NOlap { + +struct TPortionInfo; + +class IStoragesManager { +private: + TRWMutex RWMutex; +protected: + virtual std::shared_ptr<IBlobsStorageOperator> DoBuildOperator(const TString& storageId) = 0; + THashMap<TString, std::shared_ptr<IBlobsStorageOperator>> Constructed; + std::shared_ptr<IBlobsStorageOperator> BuildOperator(const TString& storageId) { + auto result = DoBuildOperator(storageId); + Y_VERIFY(result); + return result; + } + + virtual void InitializeNecessaryStorages(); +public: + static const inline TString DefaultStorageId = "__DEFAULT"; + virtual ~IStoragesManager() = default; + + IStoragesManager() = default; + + std::shared_ptr<IBlobsStorageOperator> GetDefaultOperator() { + return GetOperator(DefaultStorageId); + } + + std::shared_ptr<IBlobsStorageOperator> GetInsertOperator() { + return GetDefaultOperator(); + } + + const THashMap<TString, std::shared_ptr<IBlobsStorageOperator>>& GetStorages() { + InitializeNecessaryStorages(); + return Constructed; + } + + void OnTieringModified(const std::shared_ptr<NColumnShard::TTiersManager>& tiers); + + std::shared_ptr<IBlobsStorageOperator> GetOperator(const TString& storageIdExt); + std::shared_ptr<IBlobsStorageOperator> InitializePortionOperator(const TPortionInfo& portionInfo); +}; + + +} diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/write.cpp b/ydb/core/tx/columnshard/blobs_action/abstract/write.cpp new file mode 100644 index 00000000000..65d05cee02c --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/write.cpp @@ -0,0 +1,28 @@ +#include "write.h" +#include <library/cpp/actors/core/log.h> + +namespace NKikimr::NOlap { + +TUnifiedBlobId IBlobsWritingAction::AddDataForWrite(const TString& data) { + Y_VERIFY(!WritingStarted); + auto blobId = AllocateNextBlobId(data); + AFL_VERIFY(BlobsForWrite.emplace(blobId, data).second); + SumSize += data.size(); + return blobId; +} + +void IBlobsWritingAction::OnBlobWriteResult(const TUnifiedBlobId& blobId, const NKikimrProto::EReplyStatus status) { + Y_VERIFY(BlobsForWrite.erase(blobId)); + return DoOnBlobWriteResult(blobId, status); +} + +bool IBlobsWritingAction::IsReady() const { + Y_VERIFY(WritingStarted); + return BlobsForWrite.empty(); +} + +IBlobsWritingAction::~IBlobsWritingAction() { + AFL_VERIFY(BlobsForWrite.empty() || Aborted); +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/abstract.h b/ydb/core/tx/columnshard/blobs_action/abstract/write.h index 49f7409b84d..a90479fa97b 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract.h +++ b/ydb/core/tx/columnshard/blobs_action/abstract/write.h @@ -1,50 +1,64 @@ #pragma once -#include "blob_manager_db.h" - +#include "common.h" +#include <util/generic/hash.h> #include <ydb/core/protos/base.pb.h> #include <ydb/core/tx/columnshard/blob.h> -#include <ydb/core/tx/columnshard/counters/blobs_manager.h> - -#include <ydb/core/tablet_flat/flat_executor.h> -#include <ydb/core/util/backoff.h> -#include <ydb/core/protos/tx_columnshard.pb.h> -#include <util/generic/string.h> +namespace NKikimr::NColumnShard { +class TColumnShard; +class TBlobManagerDb; +} namespace NKikimr::NOlap { -using NOlap::TUnifiedBlobId; -using NOlap::TBlobRange; -using NOlap::TEvictedBlob; -using NOlap::EEvictState; -using NKikimrTxColumnShard::TEvictMetadata; - -class IBlobsAction { +class IBlobsWritingAction: public ICommonBlobsAction { +private: + using TBase = ICommonBlobsAction; + bool WritingStarted = false; + ui64 SumSize = 0; + THashMap<TUnifiedBlobId, TString> BlobsForWrite; + bool Aborted = false; protected: virtual void DoOnExecuteTxBeforeWrite(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs) = 0; virtual void DoOnCompleteTxBeforeWrite(NColumnShard::TColumnShard& self) = 0; virtual void DoSendWriteBlobRequest(const TString& data, const TUnifiedBlobId& blobId) = 0; - virtual void DoOnBlobWriteResult(const TLogoBlobID& blobId, const NKikimrProto::EReplyStatus status) = 0; + virtual void DoOnBlobWriteResult(const TUnifiedBlobId& blobId, const NKikimrProto::EReplyStatus status) = 0; - virtual void DoOnExecuteTxAfterWrite(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs) = 0; + virtual void DoOnExecuteTxAfterWrite(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs, const bool success) = 0; virtual void DoOnCompleteTxAfterWrite(NColumnShard::TColumnShard& self) = 0; -public: - virtual ~IBlobsAction() = default; - virtual bool IsReady() const = 0; virtual TUnifiedBlobId AllocateNextBlobId(const TString& data) = 0; +public: + IBlobsWritingAction(const TString& storageId) + : TBase(storageId) + { - void OnBlobWriteResult(const TLogoBlobID& blobId, const NKikimrProto::EReplyStatus status) { - return DoOnBlobWriteResult(blobId, status); } + virtual ~IBlobsWritingAction(); + bool IsReady() const; + + const THashMap<TUnifiedBlobId, TString>& GetBlobsForWrite() const { + return BlobsForWrite; + } + + void Abort() { + Aborted = true; + } + TUnifiedBlobId AddDataForWrite(const TString& data); + + void OnBlobWriteResult(const TUnifiedBlobId& blobId, const NKikimrProto::EReplyStatus status); void OnExecuteTxBeforeWrite(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs) { return DoOnExecuteTxBeforeWrite(self, dbBlobs); } - virtual ui32 GetBlobsCount() const = 0; - virtual ui32 GetTotalSize() const = 0; + ui32 GetBlobsCount() const { + return BlobsForWrite.size(); + } + ui32 GetTotalSize() const { + return SumSize; + } virtual bool NeedDraftTransaction() const = 0; @@ -52,8 +66,8 @@ public: return DoOnCompleteTxBeforeWrite(self); } - void OnExecuteTxAfterWrite(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs) { - return DoOnExecuteTxAfterWrite(self, dbBlobs); + void OnExecuteTxAfterWrite(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs, const bool success) { + return DoOnExecuteTxAfterWrite(self, dbBlobs, success); } void OnCompleteTxAfterWrite(NColumnShard::TColumnShard& self) { @@ -61,6 +75,7 @@ public: } void SendWriteBlobRequest(const TString& data, const TUnifiedBlobId& blobId) { + WritingStarted = true; return DoSendWriteBlobRequest(data, blobId); } }; diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/ya.make b/ydb/core/tx/columnshard/blobs_action/abstract/ya.make new file mode 100644 index 00000000000..2f0b074a602 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/ya.make @@ -0,0 +1,21 @@ +LIBRARY() + +SRCS( + gc.cpp + common.cpp + read.cpp + write.cpp + remove.cpp + storage.cpp + action.cpp + storages_manager.cpp +) + +PEERDIR( + ydb/core/protos + contrib/libs/apache/arrow + ydb/core/tablet_flat + ydb/core/tx/tiering +) + +END() diff --git a/ydb/core/tx/columnshard/blobs_action/blob_manager_db.cpp b/ydb/core/tx/columnshard/blobs_action/blob_manager_db.cpp index 2920654f511..58e2a41ab0d 100644 --- a/ydb/core/tx/columnshard/blobs_action/blob_manager_db.cpp +++ b/ydb/core/tx/columnshard/blobs_action/blob_manager_db.cpp @@ -22,7 +22,7 @@ void TBlobManagerDb::SaveLastGcBarrier(const TGenStep& lastCollectedGenStep) { Schema::SaveSpecialValue(db, Schema::EValueIds::LastGcBarrierStep, std::get<1>(lastCollectedGenStep)); } -bool TBlobManagerDb::LoadLists(std::vector<TUnifiedBlobId>& blobsToKeep, std::vector<TUnifiedBlobId>& blobsToDelete, +bool TBlobManagerDb::LoadLists(std::vector<NOlap::TUnifiedBlobId>& blobsToKeep, std::vector<NOlap::TUnifiedBlobId>& blobsToDelete, const NOlap::IBlobGroupSelector* dsGroupSelector) { blobsToKeep.clear(); @@ -39,7 +39,7 @@ bool TBlobManagerDb::LoadLists(std::vector<TUnifiedBlobId>& blobsToKeep, std::ve while (!rowset.EndOfSet()) { const TString blobIdStr = rowset.GetValue<Schema::BlobsToKeep::BlobId>(); - TUnifiedBlobId unifiedBlobId = TUnifiedBlobId::ParseFromString(blobIdStr, dsGroupSelector, error); + NOlap::TUnifiedBlobId unifiedBlobId = NOlap::TUnifiedBlobId::ParseFromString(blobIdStr, dsGroupSelector, error); Y_VERIFY(unifiedBlobId.IsValid(), "%s", error.c_str()); blobsToKeep.push_back(unifiedBlobId); @@ -57,7 +57,7 @@ bool TBlobManagerDb::LoadLists(std::vector<TUnifiedBlobId>& blobsToKeep, std::ve while (!rowset.EndOfSet()) { const TString blobIdStr = rowset.GetValue<Schema::BlobsToDelete::BlobId>(); - TUnifiedBlobId unifiedBlobId = TUnifiedBlobId::ParseFromString(blobIdStr, dsGroupSelector, error); + NOlap::TUnifiedBlobId unifiedBlobId = NOlap::TUnifiedBlobId::ParseFromString(blobIdStr, dsGroupSelector, error); Y_VERIFY(unifiedBlobId.IsValid(), "%s", error.c_str()); blobsToDelete.push_back(unifiedBlobId); if (!rowset.Next()) @@ -68,150 +68,91 @@ bool TBlobManagerDb::LoadLists(std::vector<TUnifiedBlobId>& blobsToKeep, std::ve return true; } -void TBlobManagerDb::AddBlobToKeep(const TUnifiedBlobId& blobId) { +void TBlobManagerDb::AddBlobToKeep(const NOlap::TUnifiedBlobId& blobId) { NIceDb::TNiceDb db(Database); db.Table<Schema::BlobsToKeep>().Key(blobId.ToStringLegacy()).Update(); } -void TBlobManagerDb::EraseBlobToKeep(const TUnifiedBlobId& blobId) { +void TBlobManagerDb::EraseBlobToKeep(const NOlap::TUnifiedBlobId& blobId) { NIceDb::TNiceDb db(Database); db.Table<Schema::BlobsToKeep>().Key(blobId.ToStringLegacy()).Delete(); db.Table<Schema::BlobsToKeep>().Key(blobId.ToStringNew()).Delete(); } -void TBlobManagerDb::AddBlobToDelete(const TUnifiedBlobId& blobId) { +void TBlobManagerDb::AddBlobToDelete(const NOlap::TUnifiedBlobId& blobId) { NIceDb::TNiceDb db(Database); db.Table<Schema::BlobsToDelete>().Key(blobId.ToStringLegacy()).Update(); } -void TBlobManagerDb::EraseBlobToDelete(const TUnifiedBlobId& blobId) { +void TBlobManagerDb::EraseBlobToDelete(const NOlap::TUnifiedBlobId& blobId) { NIceDb::TNiceDb db(Database); db.Table<Schema::BlobsToDelete>().Key(blobId.ToStringLegacy()).Delete(); db.Table<Schema::BlobsToDelete>().Key(blobId.ToStringNew()).Delete(); } -void TBlobManagerDb::WriteSmallBlob(const TUnifiedBlobId& blobId, const TString& data) { - Y_VERIFY(blobId.IsSmallBlob()); - NIceDb::TNiceDb db(Database); - db.Table<Schema::SmallBlobs>().Key(blobId.ToStringNew()).Update( - NIceDb::TUpdate<Schema::SmallBlobs::Data>(data) - ); -} - -void TBlobManagerDb::EraseSmallBlob(const TUnifiedBlobId& blobId) { - Y_VERIFY(blobId.IsSmallBlob()); - NIceDb::TNiceDb db(Database); - db.Table<Schema::SmallBlobs>().Key(blobId.ToStringLegacy()).Delete(); - db.Table<Schema::SmallBlobs>().Key(blobId.ToStringNew()).Delete(); -} - -bool TBlobManagerDb::LoadEvicted(THashMap<TEvictedBlob, TString>& evicted, THashMap<TEvictedBlob, TString>& dropped, - const NOlap::IBlobGroupSelector& dsGroupSelector) { - evicted.clear(); - dropped.clear(); +bool TBlobManagerDb::LoadTierLists(const TString& storageId, std::deque<NOlap::TUnifiedBlobId>& blobsToDelete, std::deque<NOlap::TUnifiedBlobId>& draftBlobsToDelete) { + draftBlobsToDelete.clear(); + blobsToDelete.clear(); NIceDb::TNiceDb db(Database); - auto rowset = db.Table<Schema::OneToOneEvictedBlobs>().Select(); - if (!rowset.IsReady()) { - return false; - } + { + auto rowset = db.Table<Schema::TierBlobsToDelete>().Prefix(storageId).Select(); + if (!rowset.IsReady()) + return false; - TString error; + TString error; - while (!rowset.EndOfSet()) { - TString strBlobId = rowset.GetValue<Schema::OneToOneEvictedBlobs::BlobId>(); - //ui64 size = rowset.GetValue<Schema::OneToOneEvictedBlobs::Size>(); - EEvictState state = (EEvictState)rowset.GetValue<Schema::OneToOneEvictedBlobs::State>(); - bool isDropped = rowset.GetValue<Schema::OneToOneEvictedBlobs::Dropped>(); - TString meta = rowset.GetValue<Schema::OneToOneEvictedBlobs::Metadata>(); - TString strExternId = rowset.GetValue<Schema::OneToOneEvictedBlobs::ExternBlobId>(); - // TODO: CachedBlob + while (!rowset.EndOfSet()) { + const TString blobIdStr = rowset.GetValue<Schema::TierBlobsToDelete::BlobId>(); + NOlap::TUnifiedBlobId unifiedBlobId = NOlap::TUnifiedBlobId::ParseFromString(blobIdStr, nullptr, error); + Y_VERIFY(unifiedBlobId.IsValid(), "%s", error.c_str()); - Y_VERIFY(state != EEvictState::UNKNOWN); + blobsToDelete.emplace_back(std::move(unifiedBlobId)); + if (!rowset.Next()) + return false; + } + } - TUnifiedBlobId blobId = TUnifiedBlobId::ParseFromString(strBlobId, &dsGroupSelector, error); - Y_VERIFY(blobId.IsValid(), "%s", error.c_str()); + { + auto rowset = db.Table<Schema::TierBlobsDraft>().Prefix(storageId).Select(); + if (!rowset.IsReady()) + return false; - TUnifiedBlobId externId = TUnifiedBlobId::ParseFromString(strExternId, nullptr, error); - if (NOlap::IsExported(state)) { - Y_VERIFY(externId.IsValid(), "%s", error.c_str()); - } + TString error; - TEvictedBlob evict{ - .State = state, - .Blob = std::move(blobId), - .ExternBlob = std::move(externId), - }; + while (!rowset.EndOfSet()) { + const TString blobIdStr = rowset.GetValue<Schema::TierBlobsDraft::BlobId>(); + NOlap::TUnifiedBlobId unifiedBlobId = NOlap::TUnifiedBlobId::ParseFromString(blobIdStr, nullptr, error); + Y_VERIFY(unifiedBlobId.IsValid(), "%s", error.c_str()); - if (isDropped) { - dropped.emplace(std::move(evict), std::move(meta)); - } else { - evicted.emplace(std::move(evict), std::move(meta)); + draftBlobsToDelete.emplace_back(std::move(unifiedBlobId)); + if (!rowset.Next()) + return false; } - - if (!rowset.Next()) - return false; } return true; } -void TBlobManagerDb::UpdateEvictBlob(const TEvictedBlob& evict, const TString& meta) { +void TBlobManagerDb::AddTierBlobToDelete(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) { NIceDb::TNiceDb db(Database); - - TString serializedBlobId = evict.Blob.ToStringNew(); - - switch (evict.State) { - case EEvictState::EVICTING: { - Y_VERIFY(!meta.empty()); - Y_VERIFY(evict.ExternBlob.IsS3Blob()); - TString serializedExternId = evict.ExternBlob.ToStringNew(); - - db.Table<Schema::OneToOneEvictedBlobs>().Key(serializedBlobId).Update( - NIceDb::TUpdate<Schema::OneToOneEvictedBlobs::Size>(evict.Blob.BlobSize()), - NIceDb::TUpdate<Schema::OneToOneEvictedBlobs::State>((ui8)evict.State), - NIceDb::TUpdate<Schema::OneToOneEvictedBlobs::Metadata>(meta), - NIceDb::TUpdate<Schema::OneToOneEvictedBlobs::ExternBlobId>(serializedExternId) - ); - break; - } - case EEvictState::SELF_CACHED: - case EEvictState::EXTERN: { - Y_VERIFY(meta.empty()); - Y_VERIFY(evict.ExternBlob.IsS3Blob()); - db.Table<Schema::OneToOneEvictedBlobs>().Key(serializedBlobId).Update( - NIceDb::TUpdate<Schema::OneToOneEvictedBlobs::State>((ui8)evict.State) - ); - break; - } - case EEvictState::ERASING: - Y_VERIFY(meta.empty()); - db.Table<Schema::OneToOneEvictedBlobs>().Key(serializedBlobId).Update( - NIceDb::TUpdate<Schema::OneToOneEvictedBlobs::State>((ui8)evict.State) - ); - break; - case EEvictState::UNKNOWN: - case EEvictState::CACHED: - Y_VERIFY(false); - break; - } + db.Table<Schema::TierBlobsToDelete>().Key(storageId, blobId.ToStringNew()).Update(); } -void TBlobManagerDb::DropEvictBlob(const TEvictedBlob& evict) { +void TBlobManagerDb::RemoveTierBlobToDelete(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) { NIceDb::TNiceDb db(Database); - - TString serializedBlobId = evict.Blob.ToStringNew(); - db.Table<Schema::OneToOneEvictedBlobs>().Key(serializedBlobId).Update( - NIceDb::TUpdate<Schema::OneToOneEvictedBlobs::State>((ui8)evict.State), - NIceDb::TUpdate<Schema::OneToOneEvictedBlobs::Dropped>(true)); + db.Table<Schema::TierBlobsToDelete>().Key(storageId, blobId.ToStringNew()).Delete(); } -void TBlobManagerDb::EraseEvictBlob(const TEvictedBlob& evict) { +void TBlobManagerDb::AddTierDraftBlobId(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) { NIceDb::TNiceDb db(Database); + db.Table<Schema::TierBlobsDraft>().Key(storageId, blobId.ToStringNew()).Update(); +} - TString serializedBlobId = evict.Blob.ToStringNew(); - db.Table<Schema::OneToOneEvictedBlobs>().Key(serializedBlobId).Delete(); +void TBlobManagerDb::RemoveTierDraftBlobId(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) { + NIceDb::TNiceDb db(Database); + db.Table<Schema::TierBlobsDraft>().Key(storageId, blobId.ToStringNew()).Delete(); } } diff --git a/ydb/core/tx/columnshard/blobs_action/blob_manager_db.h b/ydb/core/tx/columnshard/blobs_action/blob_manager_db.h index 2c807387c1b..bdfc4ddd12f 100644 --- a/ydb/core/tx/columnshard/blobs_action/blob_manager_db.h +++ b/ydb/core/tx/columnshard/blobs_action/blob_manager_db.h @@ -1,6 +1,7 @@ #pragma once #include <ydb/core/tx/columnshard/defs.h> -#include <ydb/core/tx/columnshard/blob_manager.h> +#include <ydb/core/tablet_flat/flat_database.h> +#include <ydb/core/tx/columnshard/blob.h> namespace NKikimr::NTable { class TDatabase; @@ -8,6 +9,9 @@ class TDatabase; namespace NKikimr::NColumnShard { +// Garbage Collection generation and step +using TGenStep = std::tuple<ui32, ui32>; + class IBlobManagerDb { public: virtual ~IBlobManagerDb() = default; @@ -15,21 +19,19 @@ public: virtual bool LoadLastGcBarrier(TGenStep& lastCollectedGenStep) = 0; virtual void SaveLastGcBarrier(const TGenStep& lastCollectedGenStep) = 0; - virtual bool LoadLists(std::vector<TUnifiedBlobId>& blobsToKeep, std::vector<TUnifiedBlobId>& blobsToDelete, + virtual bool LoadLists(std::vector<NOlap::TUnifiedBlobId>& blobsToKeep, std::vector<NOlap::TUnifiedBlobId>& blobsToDelete, const NOlap::IBlobGroupSelector* dsGroupSelector) = 0; - virtual void AddBlobToKeep(const TUnifiedBlobId& blobId) = 0; - virtual void EraseBlobToKeep(const TUnifiedBlobId& blobId) = 0; - virtual void AddBlobToDelete(const TUnifiedBlobId& blobId) = 0; - virtual void EraseBlobToDelete(const TUnifiedBlobId& blobId) = 0; - virtual void WriteSmallBlob(const TUnifiedBlobId& blobId, const TString& data) = 0; - virtual void EraseSmallBlob(const TUnifiedBlobId& blobId) = 0; - - virtual bool LoadEvicted(THashMap<TEvictedBlob, TString>& evicted, - THashMap<TEvictedBlob, TString>& dropped, - const NOlap::IBlobGroupSelector& dsGroupSelector) = 0; - virtual void UpdateEvictBlob(const TEvictedBlob& evict, const TString& meta) = 0; - virtual void DropEvictBlob(const TEvictedBlob& evict) = 0; - virtual void EraseEvictBlob(const TEvictedBlob& evict) = 0; + virtual void AddBlobToKeep(const NOlap::TUnifiedBlobId& blobId) = 0; + virtual void EraseBlobToKeep(const NOlap::TUnifiedBlobId& blobId) = 0; + virtual void AddBlobToDelete(const NOlap::TUnifiedBlobId& blobId) = 0; + virtual void EraseBlobToDelete(const NOlap::TUnifiedBlobId& blobId) = 0; + + virtual bool LoadTierLists(const TString& storageId, std::deque<NOlap::TUnifiedBlobId>& blobsToDelete, std::deque<NOlap::TUnifiedBlobId>& draftBlobsToDelete) = 0; + + virtual void AddTierBlobToDelete(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) = 0; + virtual void RemoveTierBlobToDelete(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) = 0; + virtual void AddTierDraftBlobId(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) = 0; + virtual void RemoveTierDraftBlobId(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) = 0; }; @@ -42,21 +44,22 @@ public: bool LoadLastGcBarrier(TGenStep& lastCollectedGenStep) override; void SaveLastGcBarrier(const TGenStep& lastCollectedGenStep) override; - bool LoadLists(std::vector<TUnifiedBlobId>& blobsToKeep, std::vector<TUnifiedBlobId>& blobsToDelete, + bool LoadLists(std::vector<NOlap::TUnifiedBlobId>& blobsToKeep, std::vector<NOlap::TUnifiedBlobId>& blobsToDelete, const NOlap::IBlobGroupSelector* dsGroupSelector) override; - void AddBlobToKeep(const TUnifiedBlobId& blobId) override; - void EraseBlobToKeep(const TUnifiedBlobId& blobId) override; - void AddBlobToDelete(const TUnifiedBlobId& blobId) override; - void EraseBlobToDelete(const TUnifiedBlobId& blobId) override; - void WriteSmallBlob(const TUnifiedBlobId& blobId, const TString& data) override; - void EraseSmallBlob(const TUnifiedBlobId& blobId) override; - - virtual bool LoadEvicted(THashMap<TEvictedBlob, TString>& evicted, - THashMap<TEvictedBlob, TString>& dropped, - const NOlap::IBlobGroupSelector& dsGroupSelector) override; - void UpdateEvictBlob(const TEvictedBlob& evict, const TString& meta) override; - void DropEvictBlob(const TEvictedBlob& evict) override; - void EraseEvictBlob(const TEvictedBlob& evict) override; + + void AddBlobToKeep(const NOlap::TUnifiedBlobId& blobId) override; + void EraseBlobToKeep(const NOlap::TUnifiedBlobId& blobId) override; + void AddBlobToDelete(const NOlap::TUnifiedBlobId& blobId) override; + void EraseBlobToDelete(const NOlap::TUnifiedBlobId& blobId) override; + + bool LoadTierLists(const TString& storageId, std::deque<NOlap::TUnifiedBlobId>& blobsToDelete, std::deque<NOlap::TUnifiedBlobId>& draftBlobsToDelete) override; + + void AddTierBlobToDelete(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) override; + + void RemoveTierBlobToDelete(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) override; + + void AddTierDraftBlobId(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) override; + void RemoveTierDraftBlobId(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) override; private: NTable::TDatabase& Database; diff --git a/ydb/core/tx/columnshard/blobs_action/bs/CMakeLists.darwin-x86_64.txt b/ydb/core/tx/columnshard/blobs_action/bs/CMakeLists.darwin-x86_64.txt new file mode 100644 index 00000000000..a21f1b6bd2d --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/bs/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,25 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(columnshard-blobs_action-bs) +target_link_libraries(columnshard-blobs_action-bs PUBLIC + contrib-libs-cxxsupp + yutil + ydb-core-protos + libs-apache-arrow + ydb-core-tablet_flat + core-tx-tiering +) +target_sources(columnshard-blobs_action-bs PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs/gc.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs/gc_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs/write.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs/read.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs/storage.cpp +) diff --git a/ydb/core/tx/columnshard/blobs_action/bs/CMakeLists.linux-aarch64.txt b/ydb/core/tx/columnshard/blobs_action/bs/CMakeLists.linux-aarch64.txt new file mode 100644 index 00000000000..faddb400160 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/bs/CMakeLists.linux-aarch64.txt @@ -0,0 +1,26 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(columnshard-blobs_action-bs) +target_link_libraries(columnshard-blobs_action-bs PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + ydb-core-protos + libs-apache-arrow + ydb-core-tablet_flat + core-tx-tiering +) +target_sources(columnshard-blobs_action-bs PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs/gc.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs/gc_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs/write.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs/read.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs/storage.cpp +) diff --git a/ydb/core/tx/columnshard/blobs_action/bs/CMakeLists.linux-x86_64.txt b/ydb/core/tx/columnshard/blobs_action/bs/CMakeLists.linux-x86_64.txt new file mode 100644 index 00000000000..faddb400160 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/bs/CMakeLists.linux-x86_64.txt @@ -0,0 +1,26 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(columnshard-blobs_action-bs) +target_link_libraries(columnshard-blobs_action-bs PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + ydb-core-protos + libs-apache-arrow + ydb-core-tablet_flat + core-tx-tiering +) +target_sources(columnshard-blobs_action-bs PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs/gc.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs/gc_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs/write.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs/read.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs/storage.cpp +) diff --git a/ydb/core/tx/columnshard/blobs_action/bs/CMakeLists.txt b/ydb/core/tx/columnshard/blobs_action/bs/CMakeLists.txt new file mode 100644 index 00000000000..f8b31df0c11 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/bs/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/core/tx/columnshard/blobs_action/bs/CMakeLists.windows-x86_64.txt b/ydb/core/tx/columnshard/blobs_action/bs/CMakeLists.windows-x86_64.txt new file mode 100644 index 00000000000..a21f1b6bd2d --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/bs/CMakeLists.windows-x86_64.txt @@ -0,0 +1,25 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(columnshard-blobs_action-bs) +target_link_libraries(columnshard-blobs_action-bs PUBLIC + contrib-libs-cxxsupp + yutil + ydb-core-protos + libs-apache-arrow + ydb-core-tablet_flat + core-tx-tiering +) +target_sources(columnshard-blobs_action-bs PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs/gc.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs/gc_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs/write.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs/read.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/bs/storage.cpp +) diff --git a/ydb/core/tx/columnshard/blobs_action/bs/gc.cpp b/ydb/core/tx/columnshard/blobs_action/bs/gc.cpp new file mode 100644 index 00000000000..629dbcdef48 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/bs/gc.cpp @@ -0,0 +1,89 @@ +#include "gc.h" +#include "storage.h" +#include <ydb/core/tx/columnshard/columnshard_private_events.h> +#include <ydb/core/tx/columnshard/columnshard_impl.h> +#include <ydb/core/tx/columnshard/blob_manager.h> + +namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { + +void TGCTask::DoOnExecuteTxAfterCleaning(NColumnShard::TColumnShard& /*self*/, NColumnShard::TBlobManagerDb& dbBlobs) { + size_t numBlobs = 0; + + for (; KeepsToErase.size() && numBlobs < NColumnShard::TLimits::MAX_BLOBS_TO_DELETE; ++numBlobs) { + dbBlobs.EraseBlobToKeep(KeepsToErase.front()); + KeepsToErase.pop_front(); + } + + for (; DeletesToErase.size() && numBlobs < NColumnShard::TLimits::MAX_BLOBS_TO_DELETE; ++numBlobs) { + dbBlobs.EraseBlobToDelete(DeletesToErase.front()); + DeletesToErase.pop_front(); + } + if (KeepsToErase.empty() && DeletesToErase.empty()) { + Manager->OnGCFinished(CollectGenStepInFlight, dbBlobs); + } +} + +void TGCTask::DoOnCompleteTxAfterCleaning(NColumnShard::TColumnShard& self, const std::shared_ptr<IBlobsGCAction>& taskAction) { + if (KeepsToErase.size() || DeletesToErase.size()) { + TActorContext::AsActorContext().Send(self.SelfId(), std::make_unique<NColumnShard::TEvPrivate::TEvGarbageCollectionFinished>(taskAction)); + } +} + +TGCTask::TGCTask(const TString& storageId, TGCListsByGroup&& listsByGroupId, const NColumnShard::TGenStep& collectGenStepInFlight, std::deque<TUnifiedBlobId>&& keepsToErase, std::deque<TUnifiedBlobId>&& deletesToErase, + const std::shared_ptr<NColumnShard::TBlobManager>& manager) + : TBase(storageId) + , ListsByGroupId(std::move(listsByGroupId)) + , CollectGenStepInFlight(collectGenStepInFlight) + , KeepsToErase(std::move(keepsToErase)) + , DeletesToErase(std::move(deletesToErase)) + , Manager(manager) +{ +} + +void TGCTask::OnGCResult(TEvBlobStorage::TEvCollectGarbageResult::TPtr ev) { + Y_VERIFY(ev->Get()->Status == NKikimrProto::OK, "The caller must handle unsuccessful status"); + + // Find the group for this result + ui64 counterFromRequest = ev->Get()->PerGenerationCounter; + auto itCounter = CounterToGroupInFlight.find(counterFromRequest); + Y_VERIFY(itCounter != CounterToGroupInFlight.end()); + const ui32 group = itCounter->second; + + auto itGroup = ListsByGroupId.find(group); + Y_VERIFY(itGroup != ListsByGroupId.end()); + const auto& keepList = itGroup->second.KeepList; + const auto& dontKeepList = itGroup->second.DontKeepList; + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("actor", "OnGCResult")("keep_list", keepList.size())("dont_keep_list", dontKeepList.size()); + + for (const auto& blobId : keepList) { + KeepsToErase.emplace_back(TUnifiedBlobId(group, blobId)); + } + for (const auto& blobId : dontKeepList) { + DeletesToErase.emplace_back(TUnifiedBlobId(group, blobId)); + } + + ListsByGroupId.erase(itGroup); + CounterToGroupInFlight.erase(itCounter); +} + +THashMap<ui32, std::unique_ptr<NKikimr::TEvBlobStorage::TEvCollectGarbage>> TGCTask::BuildRequests(ui64& perGenerationCounter, const ui64 tabletId, const ui64 currentGen) { + const ui32 channelIdx = NColumnShard::IBlobManager::BLOB_CHANNEL; + // Make per group requests + THashMap<ui32, std::unique_ptr<TEvBlobStorage::TEvCollectGarbage>> requests; + for (const auto& gl : ListsByGroupId) { + ui32 group = gl.first; + requests[group] = std::make_unique<TEvBlobStorage::TEvCollectGarbage>( + tabletId, currentGen, perGenerationCounter, + channelIdx, true, + std::get<0>(CollectGenStepInFlight), std::get<1>(CollectGenStepInFlight), + new TVector<TLogoBlobID>(gl.second.KeepList.begin(), gl.second.KeepList.end()), + new TVector<TLogoBlobID>(gl.second.DontKeepList.begin(), gl.second.DontKeepList.end()), + TInstant::Max(), true); + + Y_VERIFY(CounterToGroupInFlight.emplace(perGenerationCounter, group).second); + perGenerationCounter += requests[group]->PerGenerationCounterStepSize(); + } + return std::move(requests); +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/bs/gc.h b/ydb/core/tx/columnshard/blobs_action/bs/gc.h new file mode 100644 index 00000000000..054528db24f --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/bs/gc.h @@ -0,0 +1,46 @@ +#pragma once + +#include <ydb/core/tx/columnshard/blob_cache.h> +#include <ydb/core/tx/columnshard/blobs_action/abstract/gc.h> +#include <ydb/core/tx/columnshard/blob_manager.h> + +namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { + +class TGCTask: public IBlobsGCAction { +private: + using TBase = IBlobsGCAction; +public: + struct TGCLists { + THashSet<TLogoBlobID> KeepList; + THashSet<TLogoBlobID> DontKeepList; + }; + using TGCListsByGroup = THashMap<ui32, TGCLists>; +private: + TGCListsByGroup ListsByGroupId; + NColumnShard::TGenStep CollectGenStepInFlight; + // Maps PerGenerationCounter value to the group in PerGroupGCListsInFlight + THashMap<ui64, ui32> CounterToGroupInFlight; + std::deque<TUnifiedBlobId> KeepsToErase; + std::deque<TUnifiedBlobId> DeletesToErase; + std::shared_ptr<NColumnShard::TBlobManager> Manager; +protected: + virtual void DoOnExecuteTxAfterCleaning(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs) override; + virtual void DoOnCompleteTxAfterCleaning(NColumnShard::TColumnShard& self, const std::shared_ptr<IBlobsGCAction>& taskAction) override; +public: + bool IsEmpty() const { + return ListsByGroupId.empty(); + } + + TGCTask(const TString& storageId, TGCListsByGroup&& listsByGroupId, const NColumnShard::TGenStep& collectGenStepInFlight, std::deque<TUnifiedBlobId>&& keepsToErase, std::deque<TUnifiedBlobId>&& deletesToErase, + const std::shared_ptr<NColumnShard::TBlobManager>& manager); + + bool IsFinished() const { + return ListsByGroupId.empty(); + } + + void OnGCResult(TEvBlobStorage::TEvCollectGarbageResult::TPtr ev); + + THashMap<ui32, std::unique_ptr<TEvBlobStorage::TEvCollectGarbage>> BuildRequests(ui64& perGenerationCounter, const ui64 tabletId, const ui64 currentGen); +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/bs/gc_actor.cpp b/ydb/core/tx/columnshard/blobs_action/bs/gc_actor.cpp new file mode 100644 index 00000000000..40f8f4936a3 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/bs/gc_actor.cpp @@ -0,0 +1,15 @@ +#include "gc_actor.h" +#include <ydb/core/tx/columnshard/columnshard_private_events.h> + +namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { + +void TGarbageCollectionActor::Handle(TEvBlobStorage::TEvCollectGarbageResult::TPtr& ev) { + GCTask->OnGCResult(ev); + if (GCTask->IsFinished()) { + auto g = PassAwayGuard(); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("actor", "TGarbageCollectionActor")("event", "finished"); + TActorContext::AsActorContext().Send(TabletActorId, std::make_unique<NColumnShard::TEvPrivate::TEvGarbageCollectionFinished>(GCTask)); + } +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/bs/gc_actor.h b/ydb/core/tx/columnshard/blobs_action/bs/gc_actor.h new file mode 100644 index 00000000000..7aad02a6293 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/bs/gc_actor.h @@ -0,0 +1,40 @@ +#pragma once +#include "gc.h" + +#include <ydb/core/tx/columnshard/blob_manager.h> +#include <ydb/core/tx/columnshard/blob_cache.h> +#include <ydb/core/base/blobstorage.h> + +namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { + +class TGarbageCollectionActor: public TActorBootstrapped<TGarbageCollectionActor> { +private: + const NActors::TActorId TabletActorId; + THashMap<ui32, std::unique_ptr<TEvBlobStorage::TEvCollectGarbage>> Requests; + std::shared_ptr<TGCTask> GCTask; + void Handle(TEvBlobStorage::TEvCollectGarbageResult::TPtr& ev); +public: + TGarbageCollectionActor(const std::shared_ptr<TGCTask>& task, THashMap<ui32, std::unique_ptr<TEvBlobStorage::TEvCollectGarbage>>&& requests, const NActors::TActorId& tabletActorId) + : TabletActorId(tabletActorId) + , Requests(std::move(requests)) + , GCTask(task) + { + + } + + STFUNC(StateWork) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvBlobStorage::TEvCollectGarbageResult, Handle); + } + } + + void Bootstrap(const TActorContext& ctx) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("actor", "TGarbageCollectionActor")("event", "starting"); + for (auto&& i : Requests) { + SendToBSProxy(ctx, i.first, i.second.release()); + } + Become(&TGarbageCollectionActor::StateWork); + } +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/bs/read.cpp b/ydb/core/tx/columnshard/blobs_action/bs/read.cpp new file mode 100644 index 00000000000..89852a31f4d --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/bs/read.cpp @@ -0,0 +1,5 @@ +#include "read.h" + +namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { + +} diff --git a/ydb/core/tx/columnshard/blobs_action/bs/read.h b/ydb/core/tx/columnshard/blobs_action/bs/read.h new file mode 100644 index 00000000000..870c01f839b --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/bs/read.h @@ -0,0 +1,31 @@ +#pragma once + +#include <ydb/core/tx/columnshard/blobs_action/abstract/read.h> +#include <ydb/core/tx/columnshard/blob_manager.h> +#include <ydb/core/tx/columnshard/blob_cache.h> + +namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { + +class TReadingAction: public IBlobsReadingAction { +private: + using TBase = IBlobsReadingAction; + const TActorId BlobCacheActorId; +protected: + virtual void DoStartReading(const THashMap<TUnifiedBlobId, THashSet<TBlobRange>>& ranges) override { + for (auto&& i : ranges) { + NBlobCache::TReadBlobRangeOptions readOpts{.CacheAfterRead = true, .ForceFallback = false, .IsBackgroud = false, .WithDeadline = true}; + std::vector<TBlobRange> rangesLocal(i.second.begin(), i.second.end()); + TActorContext::AsActorContext().Send(BlobCacheActorId, new NBlobCache::TEvBlobCache::TEvReadBlobRangeBatch(std::move(rangesLocal), std::move(readOpts))); + } + } +public: + + TReadingAction(const TString& storageId, const TActorId& blobCacheActorId) + : TBase(storageId) + , BlobCacheActorId(blobCacheActorId) + { + + } +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/bs/remove.h b/ydb/core/tx/columnshard/blobs_action/bs/remove.h new file mode 100644 index 00000000000..eb35653aa88 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/bs/remove.h @@ -0,0 +1,35 @@ +#pragma once + +#include <ydb/core/tx/columnshard/blobs_action/abstract/remove.h> +#include <ydb/core/tx/columnshard/blob_manager.h> +#include <ydb/core/tx/columnshard/blob_cache.h> + +namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { + +class TDeclareRemovingAction: public IBlobsDeclareRemovingAction { +private: + using TBase = IBlobsDeclareRemovingAction; + NColumnShard::TBlobManager* Manager; +protected: + virtual void DoDeclareRemove(const TUnifiedBlobId& /*blobId*/) { + + } + + virtual void DoOnExecuteTxAfterRemoving(NColumnShard::TColumnShard& /*self*/, NColumnShard::TBlobManagerDb& dbBlobs, const bool /*success*/) { + for (auto&& i : GetDeclaredBlobs()) { + Manager->DeleteBlob(i, dbBlobs); + } + } + virtual void DoOnCompleteTxAfterRemoving(NColumnShard::TColumnShard& /*self*/) { + + } +public: + TDeclareRemovingAction(const TString& storageId, NColumnShard::TBlobManager& manager) + : TBase(storageId) + , Manager(&manager) + { + + } +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/bs/storage.cpp b/ydb/core/tx/columnshard/blobs_action/bs/storage.cpp new file mode 100644 index 00000000000..b2ee8efcbf9 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/bs/storage.cpp @@ -0,0 +1,44 @@ +#include "storage.h" +#include "remove.h" +#include "write.h" +#include "read.h" +#include "gc.h" +#include "gc_actor.h" +#include <ydb/core/tx/columnshard/columnshard_impl.h> + +namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { + +std::shared_ptr<NKikimr::NOlap::IBlobsDeclareRemovingAction> TOperator::DoStartDeclareRemovingAction() { + return std::make_shared<TDeclareRemovingAction>(GetStorageId(), *Manager); +} + +std::shared_ptr<NKikimr::NOlap::IBlobsWritingAction> TOperator::DoStartWritingAction() { + return std::make_shared<TWriteAction>(GetStorageId(), Manager); +} + +std::shared_ptr<NKikimr::NOlap::IBlobsReadingAction> TOperator::DoStartReadingAction() { + return std::make_shared<TReadingAction>(GetStorageId(), BlobCacheActorId); +} + +bool TOperator::DoStartGC() { + auto gcTask = Manager->BuildGCTask(GetStorageId(), Manager); + if (!gcTask || gcTask->IsEmpty()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "StartGCSkipped"); + return false; + } + auto requests = gcTask->BuildRequests(PerGenerationCounter, Manager->GetTabletId(), Manager->GetCurrentGen()); + AFL_VERIFY(requests.size()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "StartGC")("requests_count", requests.size()); + TActorContext::AsActorContext().Register(new TGarbageCollectionActor(gcTask, std::move(requests), TabletActorId)); + return true; +} + +TOperator::TOperator(const TString& storageId, const NActors::TActorId& tabletActorId, const TIntrusivePtr<TTabletStorageInfo>& tabletInfo, const ui64 generation) + : TBase(storageId) + , Manager(std::make_shared<NColumnShard::TBlobManager>(tabletInfo, generation)) + , BlobCacheActorId(NBlobCache::MakeBlobCacheServiceId()) + , TabletActorId(tabletActorId) +{ +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/bs/storage.h b/ydb/core/tx/columnshard/blobs_action/bs/storage.h new file mode 100644 index 00000000000..df8b08f3640 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/bs/storage.h @@ -0,0 +1,35 @@ +#pragma once + +#include <ydb/core/tx/columnshard/blobs_action/abstract/storage.h> +#include <ydb/core/tx/columnshard/blob_manager.h> +#include <ydb/core/tx/columnshard/blob_cache.h> + +namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { + +class TOperator: public IBlobsStorageOperator { +private: + using TBase = IBlobsStorageOperator; + std::shared_ptr<NColumnShard::TBlobManager> Manager; + const TActorId BlobCacheActorId; + ui64 PerGenerationCounter = 1; + const TActorId TabletActorId; +protected: + virtual std::shared_ptr<IBlobsDeclareRemovingAction> DoStartDeclareRemovingAction() override; + virtual std::shared_ptr<IBlobsWritingAction> DoStartWritingAction() override; + virtual std::shared_ptr<IBlobsReadingAction> DoStartReadingAction() override; + virtual bool DoStartGC() override; + virtual bool DoLoad(NColumnShard::IBlobManagerDb& dbBlobs) override { + return Manager->LoadState(dbBlobs); + } + virtual void DoOnTieringModified(const std::shared_ptr<NColumnShard::TTiersManager>& /*tiers*/) override { + return; + } + +public: + TOperator(const TString& storageId, const NActors::TActorId& tabletActorId, const TIntrusivePtr<TTabletStorageInfo>& tabletInfo, const ui64 generation); + virtual std::shared_ptr<IBlobInUseTracker> GetBlobsTracker() const override { + return Manager; + } +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/bs.cpp b/ydb/core/tx/columnshard/blobs_action/bs/write.cpp index ac995159555..38cd277ad16 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs.cpp +++ b/ydb/core/tx/columnshard/blobs_action/bs/write.cpp @@ -1,16 +1,16 @@ -#include "bs.h" +#include "write.h" #include <ydb/core/tx/columnshard/columnshard_impl.h> -namespace NKikimr::NOlap { +namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { -void TBSWriteAction::DoOnExecuteTxAfterWrite(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs) { +void TWriteAction::DoOnExecuteTxAfterWrite(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs, const bool /*success*/) { ui64 blobsWritten = BlobBatch.GetBlobCount(); ui64 bytesWritten = BlobBatch.GetTotalSize(); self.IncCounter(NColumnShard::COUNTER_UPSERT_BLOBS_WRITTEN, blobsWritten); self.IncCounter(NColumnShard::COUNTER_UPSERT_BYTES_WRITTEN, bytesWritten); // self.IncCounter(NColumnShard::COUNTER_RAW_BYTES_UPSERTED, insertedBytes); self.IncCounter(NColumnShard::COUNTER_WRITE_SUCCESS); - self.BlobManager->SaveBlobBatch(std::move(BlobBatch), dbBlobs); + Manager->SaveBlobBatch(std::move(BlobBatch), dbBlobs); } } diff --git a/ydb/core/tx/columnshard/blobs_action/bs.h b/ydb/core/tx/columnshard/blobs_action/bs/write.h index 6a53117a578..ac2b1588385 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs.h +++ b/ydb/core/tx/columnshard/blobs_action/bs/write.h @@ -1,20 +1,23 @@ #pragma once -#include "abstract.h" +#include <ydb/core/tx/columnshard/blobs_action/abstract/write.h> #include <ydb/core/tx/columnshard/blob_manager.h> +#include <ydb/core/tx/columnshard/blob_cache.h> -namespace NKikimr::NOlap { +namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { -class TBSWriteAction: public IBlobsAction { +class TWriteAction: public IBlobsWritingAction { private: + using TBase = IBlobsWritingAction; NColumnShard::TBlobBatch BlobBatch; + std::shared_ptr<NColumnShard::IBlobManager> Manager; protected: virtual void DoSendWriteBlobRequest(const TString& data, const TUnifiedBlobId& blobId) override { return BlobBatch.SendWriteBlobRequest(data, blobId, TInstant::Max(), TActorContext::AsActorContext()); } - virtual void DoOnBlobWriteResult(const TLogoBlobID& blobId, const NKikimrProto::EReplyStatus status) override { - return BlobBatch.OnBlobWriteResult(blobId, status); + virtual void DoOnBlobWriteResult(const TUnifiedBlobId& blobId, const NKikimrProto::EReplyStatus status) override { + return BlobBatch.OnBlobWriteResult(blobId.GetLogoBlobId(), status); } virtual void DoOnExecuteTxBeforeWrite(NColumnShard::TColumnShard& /*self*/, NColumnShard::TBlobManagerDb& /*dbBlobs*/) override { @@ -25,17 +28,11 @@ protected: return; } - virtual void DoOnExecuteTxAfterWrite(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs) override; + virtual void DoOnExecuteTxAfterWrite(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs, const bool success) override; virtual void DoOnCompleteTxAfterWrite(NColumnShard::TColumnShard& /*self*/) override { } public: - virtual ui32 GetBlobsCount() const override { - return BlobBatch.GetBlobCount(); - } - virtual ui32 GetTotalSize() const override { - return BlobBatch.GetTotalSize(); - } virtual bool NeedDraftTransaction() const override { return false; } @@ -43,12 +40,11 @@ public: virtual TUnifiedBlobId AllocateNextBlobId(const TString& data) override { return BlobBatch.AllocateNextBlobId(data); } - virtual bool IsReady() const override { - return BlobBatch.AllBlobWritesCompleted(); - } - TBSWriteAction(NColumnShard::IBlobManager& blobManager) - : BlobBatch(blobManager.StartBlobBatch()) + TWriteAction(const TString& storageId, const std::shared_ptr<NColumnShard::IBlobManager>& manager) + : TBase(storageId) + , BlobBatch(manager->StartBlobBatch()) + , Manager(manager) { } diff --git a/ydb/core/tx/columnshard/blobs_action/bs/ya.make b/ydb/core/tx/columnshard/blobs_action/bs/ya.make new file mode 100644 index 00000000000..20ce33ad81a --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/bs/ya.make @@ -0,0 +1,18 @@ +LIBRARY() + +SRCS( + gc.cpp + gc_actor.cpp + write.cpp + read.cpp + storage.cpp +) + +PEERDIR( + ydb/core/protos + contrib/libs/apache/arrow + ydb/core/tablet_flat + ydb/core/tx/tiering +) + +END() diff --git a/ydb/core/tx/columnshard/blobs_action/abstract.cpp b/ydb/core/tx/columnshard/blobs_action/memory.cpp index 8b6b1715fcd..66ff21fc500 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract.cpp +++ b/ydb/core/tx/columnshard/blobs_action/memory.cpp @@ -1,4 +1,4 @@ -#include "abstract.h" +#include "memory.h" namespace NKikimr::NOlap { diff --git a/ydb/core/tx/columnshard/blobs_action/memory.h b/ydb/core/tx/columnshard/blobs_action/memory.h new file mode 100644 index 00000000000..818d798c628 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/memory.h @@ -0,0 +1,173 @@ +#pragma once + +#include <ydb/core/tx/columnshard/blobs_action/abstract/storage.h> +#include <ydb/core/tx/columnshard/blob_manager.h> +#include <ydb/core/tx/columnshard/blob_cache.h> + +namespace NKikimr::NOlap { + +class TMemoryStorage { +private: + THashMap<TUnifiedBlobId, TString> Data; + THashMap<TUnifiedBlobId, TString> DataWriting; + THashSet<TUnifiedBlobId> DataForRemove; + TMutex Mutex; +public: + std::optional<TString> Read(const TUnifiedBlobId& id) { + TGuard<TMutex> g(Mutex); + auto it = Data.find(id); + if (it == Data.end()) { + return {}; + } else { + return it->second; + } + } + + void DeclareDataForRemove(const TUnifiedBlobId& id) { + TGuard<TMutex> g(Mutex); + DataForRemove.emplace(id); + } + + void StartWriting(const TUnifiedBlobId& id, const TString& data) { + TGuard<TMutex> g(Mutex); + Y_VERIFY(DataWriting.emplace(id, data).second); + } + + void CommitWriting(const TUnifiedBlobId& id) { + TGuard<TMutex> g(Mutex); + auto it = DataWriting.find(id); + Y_VERIFY(it != DataWriting.end()); + Y_VERIFY(Data.emplace(id, it->second).second); + DataWriting.erase(it); + } + + TMemoryStorage() = default; +}; + +class TMemoryWriteAction: public IBlobsWritingAction { +private: + using TBase = IBlobsWritingAction; + const std::shared_ptr<TMemoryStorage> Storage; +protected: + virtual void DoSendWriteBlobRequest(const TString& data, const TUnifiedBlobId& blobId) override { + Storage->StartWriting(blobId, data); + TActorContext::AsActorContext().Send(TActorContext::AsActorContext().SelfID, std::make_unique<TEvBlobStorage::TEvPutResult>( + NKikimrProto::EReplyStatus::OK, blobId.GetLogoBlobId(), TStorageStatusFlags(), 0, 0)); + } + + virtual void DoOnBlobWriteResult(const TUnifiedBlobId& blobId, const NKikimrProto::EReplyStatus status) override { + Y_VERIFY(status == NKikimrProto::EReplyStatus::OK); + Storage->CommitWriting(blobId); + } + + virtual void DoOnExecuteTxBeforeWrite(NColumnShard::TColumnShard& /*self*/, NColumnShard::TBlobManagerDb& /*dbBlobs*/) override { + return; + } + + virtual void DoOnCompleteTxBeforeWrite(NColumnShard::TColumnShard& /*self*/) override { + return; + } + + virtual void DoOnExecuteTxAfterWrite(NColumnShard::TColumnShard& /*self*/, NColumnShard::TBlobManagerDb& /*dbBlobs*/, const bool /*success*/) override { + + } + virtual void DoOnCompleteTxAfterWrite(NColumnShard::TColumnShard& /*self*/) override { + + } +public: + virtual bool NeedDraftTransaction() const override { + return true; + } + + virtual TUnifiedBlobId AllocateNextBlobId(const TString& /*data*/) override { + return TUnifiedBlobId(); +// return BlobBatch.AllocateNextBlobId(data); + } + + TMemoryWriteAction(const TString& storageId, const std::shared_ptr<TMemoryStorage>& storage) + : TBase(storageId) + , Storage(storage) + { + + } +}; + +class TMemoryDeclareRemovingAction: public IBlobsDeclareRemovingAction { +private: + using TBase = IBlobsDeclareRemovingAction; + const std::shared_ptr<TMemoryStorage> Storage; +protected: + virtual void DoDeclareRemove(const TUnifiedBlobId& /*blobId*/) { + + } + + virtual void DoOnExecuteTxAfterRemoving(NColumnShard::TColumnShard& /*self*/, NColumnShard::TBlobManagerDb& /*dbBlobs*/, const bool /*success*/) { + for (auto&& i : GetDeclaredBlobs()) { + Storage->DeclareDataForRemove(i); + } + } + virtual void DoOnCompleteTxAfterRemoving(NColumnShard::TColumnShard& /*self*/) { + + } +public: + + TMemoryDeclareRemovingAction(const TString& storageId, const std::shared_ptr<TMemoryStorage>& storage) + : TBase(storageId) + , Storage(storage) { + + } +}; + +class TMemoryReadingAction: public IBlobsReadingAction { +private: + using TBase = IBlobsReadingAction; + const std::shared_ptr<TMemoryStorage> Storage; +protected: + virtual void DoStartReading(const THashMap<TUnifiedBlobId, THashSet<TBlobRange>>& ranges) override { + for (auto&& i : ranges) { + auto data = Storage->Read(i.first); + for (auto&& r : i.second) { + if (!data) { + TActorContext::AsActorContext().Send(TActorContext::AsActorContext().SelfID, + new NBlobCache::TEvBlobCache::TEvReadBlobRangeResult(r, NKikimrProto::EReplyStatus::NODATA, "")); + } else { + Y_VERIFY(r.Offset + r.Size <= data->size()); + TActorContext::AsActorContext().Send(TActorContext::AsActorContext().SelfID, + new NBlobCache::TEvBlobCache::TEvReadBlobRangeResult(r, NKikimrProto::EReplyStatus::OK, data->substr(r.Offset, r.Size))); + } + } + } + } +public: + + TMemoryReadingAction(const TString& storageId, const std::shared_ptr<TMemoryStorage>& storage) + : TBase(storageId) + , Storage(storage) + { + + } +}; + +class TMemoryOperator: public IBlobsStorageOperator { +private: + using TBase = IBlobsStorageOperator; + std::shared_ptr<TMemoryStorage> Storage; +protected: + virtual std::shared_ptr<IBlobsDeclareRemovingAction> DoStartDeclareRemovingAction() override { + return std::make_shared<TMemoryDeclareRemovingAction>(GetStorageId(), Storage); + } + virtual std::shared_ptr<IBlobsWritingAction> DoStartWritingAction() override { + return std::make_shared<TMemoryWriteAction>(GetStorageId(), Storage); + } + virtual std::shared_ptr<IBlobsReadingAction> DoStartReadingAction() override { + return std::make_shared<TMemoryReadingAction>(GetStorageId(), Storage); + } +public: + TMemoryOperator(const TString& storageId) + : TBase(storageId) + { + Storage = std::make_shared<TMemoryStorage>(); + } +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/tier/CMakeLists.darwin-x86_64.txt b/ydb/core/tx/columnshard/blobs_action/tier/CMakeLists.darwin-x86_64.txt new file mode 100644 index 00000000000..692ee6f39e7 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/tier/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,27 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(columnshard-blobs_action-tier) +target_link_libraries(columnshard-blobs_action-tier PUBLIC + contrib-libs-cxxsupp + yutil + ydb-core-protos + libs-apache-arrow + ydb-core-tablet_flat + core-tx-tiering +) +target_sources(columnshard-blobs_action-tier PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/tier/adapter.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/tier/gc.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/tier/gc_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/tier/gc_info.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/tier/write.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/tier/read.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/tier/storage.cpp +) diff --git a/ydb/core/tx/columnshard/blobs_action/tier/CMakeLists.linux-aarch64.txt b/ydb/core/tx/columnshard/blobs_action/tier/CMakeLists.linux-aarch64.txt new file mode 100644 index 00000000000..1eb491bf6b3 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/tier/CMakeLists.linux-aarch64.txt @@ -0,0 +1,28 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(columnshard-blobs_action-tier) +target_link_libraries(columnshard-blobs_action-tier PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + ydb-core-protos + libs-apache-arrow + ydb-core-tablet_flat + core-tx-tiering +) +target_sources(columnshard-blobs_action-tier PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/tier/adapter.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/tier/gc.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/tier/gc_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/tier/gc_info.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/tier/write.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/tier/read.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/tier/storage.cpp +) diff --git a/ydb/core/tx/columnshard/blobs_action/tier/CMakeLists.linux-x86_64.txt b/ydb/core/tx/columnshard/blobs_action/tier/CMakeLists.linux-x86_64.txt new file mode 100644 index 00000000000..1eb491bf6b3 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/tier/CMakeLists.linux-x86_64.txt @@ -0,0 +1,28 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(columnshard-blobs_action-tier) +target_link_libraries(columnshard-blobs_action-tier PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + ydb-core-protos + libs-apache-arrow + ydb-core-tablet_flat + core-tx-tiering +) +target_sources(columnshard-blobs_action-tier PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/tier/adapter.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/tier/gc.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/tier/gc_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/tier/gc_info.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/tier/write.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/tier/read.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/tier/storage.cpp +) diff --git a/ydb/core/tx/columnshard/blobs_action/tier/CMakeLists.txt b/ydb/core/tx/columnshard/blobs_action/tier/CMakeLists.txt new file mode 100644 index 00000000000..606ff46b4be --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/tier/CMakeLists.txt @@ -0,0 +1,15 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/core/tx/columnshard/blobs_action/tier/adapter.cpp b/ydb/core/tx/columnshard/blobs_action/tier/adapter.cpp new file mode 100644 index 00000000000..14d550328cd --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/tier/adapter.cpp @@ -0,0 +1,37 @@ +#include "adapter.h" +#include <ydb/core/base/logoblob.h> +#include <ydb/core/base/blobstorage.h> +#include <ydb/core/tx/columnshard/blob.h> +#include <ydb/core/tx/columnshard/blob_cache.h> + +namespace NKikimr::NOlap::NBlobOperations::NTier { + +std::unique_ptr<NActors::IEventBase> TRepliesAdapter::RebuildReplyEvent(std::unique_ptr<NWrappers::NExternalStorage::TEvGetObjectResponse>&& ev) const { + TLogoBlobID logoBlobId; + TString error; + AFL_VERIFY(TLogoBlobID::Parse(logoBlobId, *ev->Key, error))("error", error)("str_blob_id", *ev->Key); + TBlobRange bRange(TUnifiedBlobId(Max<ui32>(), logoBlobId), ev->GetReadInterval().first, ev->GetReadIntervalLength()); + if (ev->IsSuccess()) { + AFL_VERIFY(ev->Body.size() == ev->GetReadIntervalLength())("body_size", ev->Body.size())("result", ev->GetReadIntervalLength()); + } + if (ev->IsSuccess()) { + AFL_VERIFY(!!ev->Body)("key", ev->Key)("interval_from", ev->GetReadInterval().first)("interval_to", ev->GetReadInterval().second); + return std::make_unique<NBlobCache::TEvBlobCache::TEvReadBlobRangeResult>(bRange, NKikimrProto::EReplyStatus::OK, ev->Body); + } else { + return std::make_unique<NBlobCache::TEvBlobCache::TEvReadBlobRangeResult>(bRange, NKikimrProto::EReplyStatus::ERROR, ""); + } +} + +std::unique_ptr<NActors::IEventBase> TRepliesAdapter::RebuildReplyEvent(std::unique_ptr<NWrappers::NExternalStorage::TEvPutObjectResponse>&& ev) const { + TLogoBlobID logoBlobId; + TString error; + Y_VERIFY(ev->Key); + AFL_VERIFY(TLogoBlobID::Parse(logoBlobId, *ev->Key, error))("error", error)("str_blob_id", *ev->Key); + if (ev->IsSuccess()) { + return std::make_unique<TEvBlobStorage::TEvPutResult>(NKikimrProto::EReplyStatus::OK, logoBlobId, 0, Max<ui32>(), 0); + } else { + return std::make_unique<TEvBlobStorage::TEvPutResult>(NKikimrProto::EReplyStatus::ERROR, logoBlobId, 0, Max<ui32>(), 0); + } +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/tier/adapter.h b/ydb/core/tx/columnshard/blobs_action/tier/adapter.h new file mode 100644 index 00000000000..e696545a421 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/tier/adapter.h @@ -0,0 +1,51 @@ +#pragma once +#include <ydb/core/wrappers/abstract.h> + +namespace NKikimr::NOlap::NBlobOperations::NTier { + +class TRepliesAdapter: public NWrappers::NExternalStorage::IReplyAdapter { +public: + virtual std::unique_ptr<IEventBase> RebuildReplyEvent(std::unique_ptr<NWrappers::NExternalStorage::TEvGetObjectResponse>&& ev) const override; + virtual std::unique_ptr<IEventBase> RebuildReplyEvent(std::unique_ptr<NWrappers::NExternalStorage::TEvPutObjectResponse>&& ev) const override; + virtual std::unique_ptr<IEventBase> RebuildReplyEvent(std::unique_ptr<NWrappers::NExternalStorage::TEvListObjectsResponse>&& ev) const override { + Y_UNUSED(ev); + Y_VERIFY(false); + } + virtual std::unique_ptr<IEventBase> RebuildReplyEvent(std::unique_ptr<NWrappers::NExternalStorage::TEvHeadObjectResponse>&& ev) const override { + Y_UNUSED(ev); + Y_VERIFY(false); + } + virtual std::unique_ptr<IEventBase> RebuildReplyEvent(std::unique_ptr<NWrappers::NExternalStorage::TEvDeleteObjectResponse>&& ev) const override { + return std::move(ev); + } + virtual std::unique_ptr<IEventBase> RebuildReplyEvent(std::unique_ptr<NWrappers::NExternalStorage::TEvDeleteObjectsResponse>&& ev) const override { + Y_UNUSED(ev); + Y_VERIFY(false); + } + virtual std::unique_ptr<IEventBase> RebuildReplyEvent(std::unique_ptr<NWrappers::NExternalStorage::TEvCreateMultipartUploadResponse>&& ev) const override { + Y_UNUSED(ev); + Y_VERIFY(false); + } + virtual std::unique_ptr<IEventBase> RebuildReplyEvent(std::unique_ptr<NWrappers::NExternalStorage::TEvUploadPartResponse>&& ev) const override { + Y_UNUSED(ev); + Y_VERIFY(false); + } + virtual std::unique_ptr<IEventBase> RebuildReplyEvent(std::unique_ptr<NWrappers::NExternalStorage::TEvCompleteMultipartUploadResponse>&& ev) const override { + Y_UNUSED(ev); + Y_VERIFY(false); + } + virtual std::unique_ptr<IEventBase> RebuildReplyEvent(std::unique_ptr<NWrappers::NExternalStorage::TEvAbortMultipartUploadResponse>&& ev) const override { + Y_UNUSED(ev); + Y_VERIFY(false); + } + virtual std::unique_ptr<IEventBase> RebuildReplyEvent(std::unique_ptr<NWrappers::NExternalStorage::TEvCheckObjectExistsResponse>&& ev) const override { + Y_UNUSED(ev); + Y_VERIFY(false); + } + virtual std::unique_ptr<IEventBase> RebuildReplyEvent(std::unique_ptr<NWrappers::NExternalStorage::TEvUploadPartCopyResponse>&& ev) const override { + Y_UNUSED(ev); + Y_VERIFY(false); + } +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/tier/gc.cpp b/ydb/core/tx/columnshard/blobs_action/tier/gc.cpp new file mode 100644 index 00000000000..3c92504a3a4 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/tier/gc.cpp @@ -0,0 +1,15 @@ +#include "gc.h" +#include <ydb/core/tx/columnshard/blobs_action/blob_manager_db.h> + +namespace NKikimr::NOlap::NBlobOperations::NTier { + +void TGCTask::DoOnExecuteTxAfterCleaning(NColumnShard::TColumnShard& /*self*/, NColumnShard::TBlobManagerDb& dbBlobs) { + for (auto&& i : DraftBlobIds) { + dbBlobs.RemoveTierDraftBlobId(GetStorageId(), i); + } + for (auto&& i : DeleteBlobIds) { + dbBlobs.RemoveTierBlobToDelete(GetStorageId(), i); + } +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/tier/gc.h b/ydb/core/tx/columnshard/blobs_action/tier/gc.h new file mode 100644 index 00000000000..7bc5f8ef5fb --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/tier/gc.h @@ -0,0 +1,32 @@ +#pragma once +#include <ydb/core/tx/columnshard/blobs_action/abstract/gc.h> +#include <ydb/core/tx/columnshard/blob.h> +#include <ydb/core/wrappers/abstract.h> +#include <ydb/library/accessor/accessor.h> + +namespace NKikimr::NOlap::NBlobOperations::NTier { + +class TGCTask: public IBlobsGCAction { +private: + using TBase = IBlobsGCAction; +private: + YDB_READONLY_DEF(std::vector<TUnifiedBlobId>, DraftBlobIds); + YDB_READONLY_DEF(std::vector<TUnifiedBlobId>, DeleteBlobIds); + YDB_READONLY_DEF(NWrappers::NExternalStorage::IExternalStorageOperator::TPtr, ExternalStorageOperator); +protected: + virtual void DoOnExecuteTxAfterCleaning(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs) override; + virtual void DoOnCompleteTxAfterCleaning(NColumnShard::TColumnShard& /*self*/, const std::shared_ptr<IBlobsGCAction>& /*taskAction*/) override { + + } +public: + TGCTask(const TString& storageId, std::vector<TUnifiedBlobId>&& draftBlobIds, std::vector<TUnifiedBlobId>&& deleteBlobIds, + const NWrappers::NExternalStorage::IExternalStorageOperator::TPtr& externalStorageOperator) + : TBase(storageId) + , DraftBlobIds(std::move(draftBlobIds)) + , DeleteBlobIds(std::move(deleteBlobIds)) + , ExternalStorageOperator(externalStorageOperator) + { + } +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/tier/gc_actor.cpp b/ydb/core/tx/columnshard/blobs_action/tier/gc_actor.cpp new file mode 100644 index 00000000000..4df2e935db7 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/tier/gc_actor.cpp @@ -0,0 +1,37 @@ +#include "gc_actor.h" +#include <ydb/core/tx/columnshard/columnshard_private_events.h> + +namespace NKikimr::NOlap::NBlobOperations::NTier { + +void TGarbageCollectionActor::Handle(NWrappers::NExternalStorage::TEvDeleteObjectResponse::TPtr& ev) { + TLogoBlobID logoBlobId; + TString errorMessage; + Y_VERIFY(ev->Get()->Key); + AFL_VERIFY(TLogoBlobID::Parse(logoBlobId, *ev->Get()->Key, errorMessage))("error", errorMessage); + BlobIdsToRemove.erase(logoBlobId); + if (BlobIdsToRemove.empty()) { + auto g = PassAwayGuard(); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("actor", "TGarbageCollectionActor")("event", "finished"); + TActorContext::AsActorContext().Send(TabletActorId, std::make_unique<NColumnShard::TEvPrivate::TEvGarbageCollectionFinished>(GCTask)); + } +} + +void TGarbageCollectionActor::Bootstrap(const TActorContext& /*ctx*/) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("actor", "TGarbageCollectionActor")("event", "starting"); + for (auto&& i : GCTask->GetDraftBlobIds()) { + BlobIdsToRemove.emplace(i.GetLogoBlobId()); + } + for (auto&& i : GCTask->GetDeleteBlobIds()) { + BlobIdsToRemove.emplace(i.GetLogoBlobId()); + } + for (auto&& i : BlobIdsToRemove) { + auto awsRequest = Aws::S3::Model::DeleteObjectRequest().WithKey(i.ToString()); + auto request = std::make_unique<NWrappers::NExternalStorage::TEvDeleteObjectRequest>(awsRequest); + auto hRequest = std::make_unique<IEventHandle>(NActors::TActorId(), TActorContext::AsActorContext().SelfID, request.release()); + TAutoPtr<TEventHandle<NWrappers::NExternalStorage::TEvDeleteObjectRequest>> evPtr((TEventHandle<NWrappers::NExternalStorage::TEvDeleteObjectRequest>*)hRequest.release()); + GCTask->GetExternalStorageOperator()->Execute(evPtr); + } + Become(&TGarbageCollectionActor::StateWork); +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/tier/gc_actor.h b/ydb/core/tx/columnshard/blobs_action/tier/gc_actor.h new file mode 100644 index 00000000000..448c0789965 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/tier/gc_actor.h @@ -0,0 +1,34 @@ +#pragma once +#include "gc.h" + +#include <ydb/core/tx/columnshard/blob_manager.h> +#include <ydb/core/tx/columnshard/blob_cache.h> +#include <ydb/core/base/blobstorage.h> + +namespace NKikimr::NOlap::NBlobOperations::NTier { + +class TGarbageCollectionActor: public TActorBootstrapped<TGarbageCollectionActor> { +private: + const NActors::TActorId TabletActorId; + std::shared_ptr<TGCTask> GCTask; + + THashSet<TLogoBlobID> BlobIdsToRemove; + void Handle(NWrappers::NExternalStorage::TEvDeleteObjectResponse::TPtr& ev); +public: + TGarbageCollectionActor(const std::shared_ptr<TGCTask>& task, const NActors::TActorId& tabletActorId) + : TabletActorId(tabletActorId) + , GCTask(task) + { + + } + + STFUNC(StateWork) { + switch (ev->GetTypeRewrite()) { + hFunc(NWrappers::NExternalStorage::TEvDeleteObjectResponse, Handle); + } + } + + void Bootstrap(const TActorContext& ctx); +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/tier/gc_info.cpp b/ydb/core/tx/columnshard/blobs_action/tier/gc_info.cpp new file mode 100644 index 00000000000..a6a42214324 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/tier/gc_info.cpp @@ -0,0 +1,5 @@ +#include "gc_info.h" + +namespace NKikimr::NOlap::NBlobOperations::NTier { + +} diff --git a/ydb/core/tx/columnshard/blobs_action/tier/gc_info.h b/ydb/core/tx/columnshard/blobs_action/tier/gc_info.h new file mode 100644 index 00000000000..2d42b850a51 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/tier/gc_info.h @@ -0,0 +1,36 @@ +#pragma once +#include <ydb/core/tx/columnshard/blob.h> +#include <ydb/core/tx/columnshard/blobs_action/abstract/storage.h> +#include <ydb/library/accessor/accessor.h> + +namespace NKikimr::NOlap::NBlobOperations::NTier { + +class TGCInfo: public TCommonBlobsTracker { +private: + YDB_ACCESSOR_DEF(std::deque<TUnifiedBlobId>, BlobsToDelete); + YDB_ACCESSOR_DEF(std::deque<TUnifiedBlobId>, DraftBlobIdsToRemove); + YDB_ACCESSOR_DEF(THashSet<TUnifiedBlobId>, BlobsToDeleteInFuture); +public: + virtual void OnBlobFree(const TUnifiedBlobId& blobId) override { + if (BlobsToDeleteInFuture.erase(blobId)) { + BlobsToDelete.emplace_back(blobId); + } + } + + bool ExtractForGC(std::vector<TUnifiedBlobId>& deleteDraftBlobIds, std::vector<TUnifiedBlobId>& deleteBlobIds, const ui32 blobsCountLimit) { + if (DraftBlobIdsToRemove.empty() && BlobsToDelete.empty()) { + return false; + } + while (DraftBlobIdsToRemove.size() && deleteBlobIds.size() + deleteDraftBlobIds.size() < blobsCountLimit) { + deleteDraftBlobIds.emplace_back(DraftBlobIdsToRemove.front()); + DraftBlobIdsToRemove.pop_front(); + } + while (BlobsToDelete.size() && deleteBlobIds.size() + deleteDraftBlobIds.size() < blobsCountLimit) { + deleteBlobIds.emplace_back(BlobsToDelete.front()); + BlobsToDelete.pop_front(); + } + return true; + } +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/tier/read.cpp b/ydb/core/tx/columnshard/blobs_action/tier/read.cpp new file mode 100644 index 00000000000..754dd289d73 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/tier/read.cpp @@ -0,0 +1,19 @@ +#include "read.h" + +namespace NKikimr::NOlap::NBlobOperations::NTier { + +void TReadingAction::DoStartReading(const THashMap<TUnifiedBlobId, THashSet<TBlobRange>>& ranges) { + for (auto&& i : ranges) { + for (auto&& r : i.second) { + auto awsRequest = Aws::S3::Model::GetObjectRequest() + .WithKey(i.first.GetLogoBlobId().ToString()) + .WithRange(TStringBuilder() << "bytes=" << r.Offset << "-" << r.Offset + r.Size - 1); + auto request = std::make_unique<NWrappers::NExternalStorage::TEvGetObjectRequest>(awsRequest); + auto hRequest = std::make_unique<IEventHandle>(NActors::TActorId(), TActorContext::AsActorContext().SelfID, request.release()); + TAutoPtr<TEventHandle<NWrappers::NExternalStorage::TEvGetObjectRequest>> evPtr((TEventHandle<NWrappers::NExternalStorage::TEvGetObjectRequest>*)hRequest.release()); + ExternalStorageOperator->Execute(evPtr); + } + } +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/tier/read.h b/ydb/core/tx/columnshard/blobs_action/tier/read.h new file mode 100644 index 00000000000..b9804d18b66 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/tier/read.h @@ -0,0 +1,23 @@ +#pragma once + +#include <ydb/core/tx/columnshard/blobs_action/abstract/read.h> +#include <ydb/core/wrappers/abstract.h> + +namespace NKikimr::NOlap::NBlobOperations::NTier { + +class TReadingAction: public IBlobsReadingAction { +private: + using TBase = IBlobsReadingAction; + const NWrappers::NExternalStorage::IExternalStorageOperator::TPtr ExternalStorageOperator; +protected: + virtual void DoStartReading(const THashMap<TUnifiedBlobId, THashSet<TBlobRange>>& ranges) override; +public: + + TReadingAction(const TString& storageId, const NWrappers::NExternalStorage::IExternalStorageOperator::TPtr& storageOperator) + : TBase(storageId) + , ExternalStorageOperator(storageOperator) { + + } +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/tier/remove.h b/ydb/core/tx/columnshard/blobs_action/tier/remove.h new file mode 100644 index 00000000000..97889674c94 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/tier/remove.h @@ -0,0 +1,42 @@ +#pragma once + +#include <ydb/core/tx/columnshard/blobs_action/abstract/remove.h> +#include <ydb/core/tx/columnshard/blob_manager.h> +#include <ydb/core/tx/columnshard/blob_cache.h> + +namespace NKikimr::NOlap::NBlobOperations::NTier { + +class TDeclareRemovingAction: public IBlobsDeclareRemovingAction { +private: + using TBase = IBlobsDeclareRemovingAction; + std::shared_ptr<TGCInfo> GCInfo; +protected: + virtual void DoDeclareRemove(const TUnifiedBlobId& /*blobId*/) { + + } + + virtual void DoOnExecuteTxAfterRemoving(NColumnShard::TColumnShard& /*self*/, NColumnShard::TBlobManagerDb& dbBlobs, const bool success) { + if (success) { + for (auto&& i : GetDeclaredBlobs()) { + dbBlobs.AddTierBlobToDelete(GetStorageId(), i); + if (GCInfo->IsBlobInUsage(i)) { + Y_VERIFY(GCInfo->MutableBlobsToDeleteInFuture().emplace(i).second); + } else { + GCInfo->MutableBlobsToDelete().emplace_back(i); + } + } + } + } + virtual void DoOnCompleteTxAfterRemoving(NColumnShard::TColumnShard& /*self*/) { + + } +public: + TDeclareRemovingAction(const TString& storageId, const std::shared_ptr<TGCInfo>& gcInfo) + : TBase(storageId) + , GCInfo(gcInfo) + { + + } +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/tier/storage.cpp b/ydb/core/tx/columnshard/blobs_action/tier/storage.cpp new file mode 100644 index 00000000000..3d6dec7bd66 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/tier/storage.cpp @@ -0,0 +1,73 @@ +#include "storage.h" +#include "remove.h" +#include "write.h" +#include "read.h" +#include "gc.h" +#include <ydb/core/tx/columnshard/columnshard_impl.h> +#include <ydb/core/tx/tiering/manager.h> +#include "gc_actor.h" + +namespace NKikimr::NOlap::NBlobOperations::NTier { + +NWrappers::NExternalStorage::IExternalStorageOperator::TPtr TOperator::GetCurrentOperator() const { + const ui32 idx = CurrentOperatorIdx.Val(); + AFL_VERIFY(idx < ExternalStorageOperators.size())("idx", idx)("size", ExternalStorageOperators.size()); + auto result = ExternalStorageOperators[idx]; + Y_VERIFY(result); + return result; +} + +std::shared_ptr<IBlobsDeclareRemovingAction> TOperator::DoStartDeclareRemovingAction() { + return std::make_shared<TDeclareRemovingAction>(GetStorageId(), GCInfo); +} + +std::shared_ptr<IBlobsWritingAction> TOperator::DoStartWritingAction() { + return std::make_shared<TWriteAction>(GetStorageId(), GetCurrentOperator(), TabletId, GCInfo); +} + +std::shared_ptr<IBlobsReadingAction> TOperator::DoStartReadingAction() { + return std::make_shared<TReadingAction>(GetStorageId(), GetCurrentOperator()); +} + +bool TOperator::DoStartGC() { + std::vector<TUnifiedBlobId> draftBlobIds; + std::vector<TUnifiedBlobId> deleteBlobIds; + if (!GCInfo->ExtractForGC(draftBlobIds, deleteBlobIds, 100000)) { + return false; + } + auto gcTask = std::make_shared<TGCTask>(GetStorageId(), std::move(draftBlobIds), std::move(deleteBlobIds), GetCurrentOperator()); + TActorContext::AsActorContext().Register(new TGarbageCollectionActor(gcTask, TabletActorId)); + return true; +} + +TOperator::TOperator(const TString& storageId, const NColumnShard::TColumnShard& shard, const std::shared_ptr<NWrappers::NExternalStorage::IExternalStorageOperator>& externalOperator) + : TBase(storageId) + , TabletId(shard.TabletID()) + , TabletActorId(shard.SelfId()) + , ExternalStorageOperators({externalOperator}) +{ + AFL_VERIFY(externalOperator); +} + +void TOperator::DoOnTieringModified(const std::shared_ptr<NColumnShard::TTiersManager>& tiers) { + AFL_VERIFY(ExternalStorageOperators.size()); + auto* tierManager = tiers->GetManagerOptional(TBase::GetStorageId()); + ui32 cleanCount = ExternalStorageOperators.size() - 1; + if (tierManager) { + auto bOperator = tiers->GetManagerVerified(TBase::GetStorageId()).GetExternalStorageOperator(); + AFL_VERIFY(bOperator); + ExternalStorageOperators.emplace_back(bOperator); + CurrentOperatorIdx.Inc(); + } else { + cleanCount = ExternalStorageOperators.size(); + } + for (ui32 i = 0; i < cleanCount; ++i) { + if (ExternalStorageOperators[i].use_count() == 1) { + ExternalStorageOperators[i] = nullptr; + } else { + break; + } + } +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/tier/storage.h b/ydb/core/tx/columnshard/blobs_action/tier/storage.h new file mode 100644 index 00000000000..8587a6f5e4e --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/tier/storage.h @@ -0,0 +1,41 @@ +#pragma once + +#include <ydb/core/tx/columnshard/blobs_action/abstract/storage.h> +#include <ydb/core/tx/columnshard/blob_manager.h> +#include <ydb/core/tx/columnshard/blob_cache.h> +#include <ydb/core/wrappers/abstract.h> +#include "gc_info.h" + +namespace NKikimr::NOlap::NBlobOperations::NTier { + +class TOperator: public IBlobsStorageOperator { +private: + using TBase = IBlobsStorageOperator; + const ui64 TabletId; + const NActors::TActorId TabletActorId; + TAtomicCounter CurrentOperatorIdx = 0; + std::deque<NWrappers::NExternalStorage::IExternalStorageOperator::TPtr> ExternalStorageOperators; + std::shared_ptr<TGCInfo> GCInfo = std::make_shared<TGCInfo>(); + NWrappers::NExternalStorage::IExternalStorageOperator::TPtr GetCurrentOperator() const; + virtual TString DoDebugString() const override { + return GetCurrentOperator()->DebugString(); + } +protected: + virtual std::shared_ptr<IBlobsDeclareRemovingAction> DoStartDeclareRemovingAction() override; + virtual std::shared_ptr<IBlobsWritingAction> DoStartWritingAction() override; + virtual std::shared_ptr<IBlobsReadingAction> DoStartReadingAction() override; + virtual bool DoStartGC() override; + virtual bool DoLoad(NColumnShard::IBlobManagerDb& dbBlobs) override { + dbBlobs.LoadTierLists(GetStorageId(), GCInfo->MutableBlobsToDelete(), GCInfo->MutableDraftBlobIdsToRemove()); + return true; + } + virtual void DoOnTieringModified(const std::shared_ptr<NColumnShard::TTiersManager>& tiers) override; + +public: + TOperator(const TString& storageId, const NColumnShard::TColumnShard& shard, const std::shared_ptr<NWrappers::NExternalStorage::IExternalStorageOperator>& externalOperator); + virtual std::shared_ptr<IBlobInUseTracker> GetBlobsTracker() const override { + return GCInfo; + } +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/tier/write.cpp b/ydb/core/tx/columnshard/blobs_action/tier/write.cpp new file mode 100644 index 00000000000..871fe629025 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/tier/write.cpp @@ -0,0 +1,44 @@ +#include "write.h" +#include <ydb/core/tx/columnshard/columnshard_impl.h> +#include <ydb/core/wrappers/events/common.h> +#include <contrib/libs/aws-sdk-cpp/aws-cpp-sdk-s3/include/aws/s3/model/PutObjectRequest.h> + +namespace NKikimr::NOlap::NBlobOperations::NTier { + +void TWriteAction::DoSendWriteBlobRequest(const TString& data, const TUnifiedBlobId& blobId) { + auto awsRequest = Aws::S3::Model::PutObjectRequest().WithKey(blobId.GetLogoBlobId().ToString()); + + TString moveData = data; + auto request = std::make_unique<NWrappers::NExternalStorage::TEvPutObjectRequest>(awsRequest, std::move(moveData)); + auto hRequest = std::make_unique<IEventHandle>(NActors::TActorId(), TActorContext::AsActorContext().SelfID, request.release()); + TAutoPtr<TEventHandle<NWrappers::NExternalStorage::TEvPutObjectRequest>> evPtr((TEventHandle<NWrappers::NExternalStorage::TEvPutObjectRequest>*)hRequest.release()); + ExternalStorageOperator->Execute(evPtr); +} + +void TWriteAction::DoOnExecuteTxAfterWrite(NColumnShard::TColumnShard& /*self*/, NColumnShard::TBlobManagerDb& dbBlobs, const bool success) { + if (success) { + for (auto&& i : GetBlobsForWrite()) { + dbBlobs.RemoveTierDraftBlobId(GetStorageId(), i.first); + } + } else { + for (auto&& i : GetBlobsForWrite()) { + dbBlobs.RemoveTierDraftBlobId(GetStorageId(), i.first); + dbBlobs.AddTierBlobToDelete(GetStorageId(), i.first); + GCInfo->MutableBlobsToDelete().emplace_back(i.first); + } + } +} + +void TWriteAction::DoOnExecuteTxBeforeWrite(NColumnShard::TColumnShard& /*self*/, NColumnShard::TBlobManagerDb& dbBlobs) { + for (auto&& i : GetBlobsForWrite()) { + dbBlobs.AddTierDraftBlobId(GetStorageId(), i.first); + } +} + +NKikimr::NOlap::TUnifiedBlobId TWriteAction::AllocateNextBlobId(const TString& data) { + static TAtomic Counter = 0; + auto now = TInstant::Now(); + return TUnifiedBlobId(Max<ui32>(), TLogoBlobID(TabletId, now.GetValue() >> 32, now.GetValue() & Max<ui32>(), TLogoBlobID::MaxChannel, data.size(), AtomicIncrement(Counter) % TLogoBlobID::MaxCookie, 1)); +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/tier/write.h b/ydb/core/tx/columnshard/blobs_action/tier/write.h new file mode 100644 index 00000000000..1691e9f7d56 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/tier/write.h @@ -0,0 +1,50 @@ +#pragma once + +#include <ydb/core/tx/columnshard/blobs_action/abstract/write.h> +#include <ydb/core/tx/columnshard/blob_manager.h> +#include <ydb/core/tx/columnshard/blob_cache.h> +#include <ydb/core/wrappers/abstract.h> +#include "gc_info.h" + +namespace NKikimr::NOlap::NBlobOperations::NTier { + +class TWriteAction: public IBlobsWritingAction { +private: + using TBase = IBlobsWritingAction; + const NWrappers::NExternalStorage::IExternalStorageOperator::TPtr ExternalStorageOperator; + std::shared_ptr<TGCInfo> GCInfo; + const ui64 TabletId; +protected: + virtual void DoSendWriteBlobRequest(const TString& data, const TUnifiedBlobId& blobId) override; + + virtual void DoOnBlobWriteResult(const TUnifiedBlobId& /*blobId*/, const NKikimrProto::EReplyStatus status) override { + Y_VERIFY(status == NKikimrProto::EReplyStatus::OK); + } + + virtual void DoOnExecuteTxBeforeWrite(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs) override; + virtual void DoOnCompleteTxBeforeWrite(NColumnShard::TColumnShard& /*self*/) override { + return; + } + + virtual void DoOnExecuteTxAfterWrite(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs, const bool success) override; + virtual void DoOnCompleteTxAfterWrite(NColumnShard::TColumnShard& /*self*/) override { + + } +public: + virtual bool NeedDraftTransaction() const override { + return true; + } + + virtual TUnifiedBlobId AllocateNextBlobId(const TString& data) override; + + TWriteAction(const TString& storageId, const NWrappers::NExternalStorage::IExternalStorageOperator::TPtr& storageOperator, const ui64 tabletId, const std::shared_ptr<TGCInfo>& gcInfo) + : TBase(storageId) + , ExternalStorageOperator(storageOperator) + , GCInfo(gcInfo) + , TabletId(tabletId) + { + + } +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/tier/ya.make b/ydb/core/tx/columnshard/blobs_action/tier/ya.make new file mode 100644 index 00000000000..e3526a08325 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/tier/ya.make @@ -0,0 +1,20 @@ +LIBRARY() + +SRCS( + adapter.cpp + gc.cpp + gc_actor.cpp + gc_info.cpp + write.cpp + read.cpp + storage.cpp +) + +PEERDIR( + ydb/core/protos + contrib/libs/apache/arrow + ydb/core/tablet_flat + ydb/core/tx/tiering +) + +END() diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/CMakeLists.darwin-x86_64.txt b/ydb/core/tx/columnshard/blobs_action/transaction/CMakeLists.darwin-x86_64.txt new file mode 100644 index 00000000000..6b12ef8893a --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/transaction/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,25 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(columnshard-blobs_action-transaction) +target_link_libraries(columnshard-blobs_action-transaction PUBLIC + contrib-libs-cxxsupp + yutil + ydb-core-protos + libs-apache-arrow + ydb-core-tablet_flat + core-tx-tiering +) +target_sources(columnshard-blobs_action-transaction PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/transaction/tx_draft.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_indexed.cpp +) diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/CMakeLists.linux-aarch64.txt b/ydb/core/tx/columnshard/blobs_action/transaction/CMakeLists.linux-aarch64.txt new file mode 100644 index 00000000000..4314df0f579 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/transaction/CMakeLists.linux-aarch64.txt @@ -0,0 +1,26 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(columnshard-blobs_action-transaction) +target_link_libraries(columnshard-blobs_action-transaction PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + ydb-core-protos + libs-apache-arrow + ydb-core-tablet_flat + core-tx-tiering +) +target_sources(columnshard-blobs_action-transaction PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/transaction/tx_draft.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_indexed.cpp +) diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/CMakeLists.linux-x86_64.txt b/ydb/core/tx/columnshard/blobs_action/transaction/CMakeLists.linux-x86_64.txt new file mode 100644 index 00000000000..4314df0f579 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/transaction/CMakeLists.linux-x86_64.txt @@ -0,0 +1,26 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(columnshard-blobs_action-transaction) +target_link_libraries(columnshard-blobs_action-transaction PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + ydb-core-protos + libs-apache-arrow + ydb-core-tablet_flat + core-tx-tiering +) +target_sources(columnshard-blobs_action-transaction PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/transaction/tx_draft.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_indexed.cpp +) diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/CMakeLists.txt b/ydb/core/tx/columnshard/blobs_action/transaction/CMakeLists.txt new file mode 100644 index 00000000000..f8b31df0c11 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/transaction/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/CMakeLists.windows-x86_64.txt b/ydb/core/tx/columnshard/blobs_action/transaction/CMakeLists.windows-x86_64.txt new file mode 100644 index 00000000000..6b12ef8893a --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/transaction/CMakeLists.windows-x86_64.txt @@ -0,0 +1,25 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(columnshard-blobs_action-transaction) +target_link_libraries(columnshard-blobs_action-transaction PUBLIC + contrib-libs-cxxsupp + yutil + ydb-core-protos + libs-apache-arrow + ydb-core-tablet_flat + core-tx-tiering +) +target_sources(columnshard-blobs_action-transaction PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/transaction/tx_draft.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_indexed.cpp +) diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_draft.cpp b/ydb/core/tx/columnshard/blobs_action/transaction/tx_draft.cpp new file mode 100644 index 00000000000..c041f6cd412 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_draft.cpp @@ -0,0 +1,5 @@ +#include "tx_draft.h" + +namespace NKikimr::NColumnShard { + +} diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_draft.h b/ydb/core/tx/columnshard/blobs_action/transaction/tx_draft.h new file mode 100644 index 00000000000..a90db85f0f7 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_draft.h @@ -0,0 +1,33 @@ +#pragma once +#include <ydb/core/tx/columnshard/columnshard_impl.h> + +namespace NKikimr::NColumnShard { + +using namespace NTabletFlatExecutor; + +class TTxWriteDraft: public TTransactionBase<TColumnShard> { +private: + const IWriteController::TPtr WriteController; +public: + TTxWriteDraft(TColumnShard* self, const IWriteController::TPtr writeController) + : TBase(self) + , WriteController(writeController) { + } + + bool Execute(TTransactionContext& txc, const TActorContext& /*ctx*/) override { + TBlobManagerDb blobManagerDb(txc.DB); + for (auto&& action : WriteController->GetBlobActions()) { + action->OnExecuteTxBeforeWrite(*Self, blobManagerDb); + } + return true; + } + void Complete(const TActorContext& ctx) override { + for (auto&& action : WriteController->GetBlobActions()) { + action->OnCompleteTxBeforeWrite(*Self); + } + ctx.Register(NColumnShard::CreateWriteActor(Self->TabletID(), WriteController, TInstant::Max())); + } + TTxType GetTxType() const override { return TXTYPE_WRITE_DRAFT; } +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_indexed.cpp b/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_indexed.cpp new file mode 100644 index 00000000000..6c03ae44a96 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_indexed.cpp @@ -0,0 +1,15 @@ +#include "tx_gc_indexed.h" + +namespace NKikimr::NColumnShard { +bool TTxGarbageCollectionFinished::Execute(TTransactionContext& txc, const TActorContext& /*ctx*/) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("tx", "TxGarbageCollectionFinished")("event", "execute"); + TBlobManagerDb blobManagerDb(txc.DB); + Action->OnExecuteTxAfterCleaning(*Self, blobManagerDb); + return true; +} +void TTxGarbageCollectionFinished::Complete(const TActorContext& /*ctx*/) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("tx", "TxGarbageCollectionFinished")("event", "complete"); + Action->OnCompleteTxAfterCleaning(*Self, Action); +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_indexed.h b/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_indexed.h new file mode 100644 index 00000000000..84846ff318d --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_indexed.h @@ -0,0 +1,21 @@ +#pragma once +#include <ydb/core/tx/columnshard/columnshard_impl.h> +#include <ydb/core/tx/columnshard/columnshard_private_events.h> + +namespace NKikimr::NColumnShard { +class TTxGarbageCollectionFinished: public TTransactionBase<TColumnShard> { +private: + std::shared_ptr<NOlap::IBlobsGCAction> Action; +public: + TTxGarbageCollectionFinished(TColumnShard* self, const std::shared_ptr<NOlap::IBlobsGCAction>& action) + : TBase(self) + , Action(action) { + } + + virtual bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; + virtual void Complete(const TActorContext& ctx) override; + TTxType GetTxType() const override { return TXTYPE_GC_FINISHED; } +}; + + +} diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.cpp b/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.cpp new file mode 100644 index 00000000000..27bb2cb777a --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.cpp @@ -0,0 +1,30 @@ +#include "tx_gc_insert_table.h" + +namespace NKikimr::NColumnShard { + +bool TTxInsertTableCleanup::Execute(TTransactionContext& txc, const TActorContext& /*ctx*/) { + TBlobGroupSelector dsGroupSelector(Self->Info()); + NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); + NIceDb::TNiceDb db(txc.DB); + + Self->TryAbortWrites(db, dbTable, std::move(WriteIdsToAbort)); + + TBlobManagerDb blobManagerDb(txc.DB); + auto allAborted = Self->InsertTable->GetAborted(); + auto storage = Self->StoragesManager->GetInsertOperator(); + BlobsAction = storage->StartDeclareRemovingAction(); + for (auto& [abortedWriteId, abortedData] : allAborted) { + Self->InsertTable->EraseAborted(dbTable, abortedData); + Y_VERIFY(abortedData.GetBlobRange().IsFullBlob()); + BlobsAction->DeclareRemove(abortedData.GetBlobRange().GetBlobId()); + } + BlobsAction->OnExecuteTxAfterRemoving(*Self, blobManagerDb, true); + return true; +} +void TTxInsertTableCleanup::Complete(const TActorContext& /*ctx*/) { + Y_VERIFY(BlobsAction); + BlobsAction->OnCompleteTxAfterRemoving(*Self); + Self->EnqueueBackgroundActivities(); +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.h b/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.h new file mode 100644 index 00000000000..57274c1f3b7 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.h @@ -0,0 +1,25 @@ +#pragma once +#include <ydb/core/tx/columnshard/columnshard_impl.h> +#include <ydb/core/tx/columnshard/columnshard_private_events.h> + +namespace NKikimr::NColumnShard { +class TTxInsertTableCleanup: public TTransactionBase<TColumnShard> { +private: + THashSet<TWriteId> WriteIdsToAbort; + std::shared_ptr<NOlap::IBlobsDeclareRemovingAction> BlobsAction; +public: + TTxInsertTableCleanup(TColumnShard* self, THashSet<TWriteId>&& writeIdsToAbort) + : TBase(self) + , WriteIdsToAbort(std::move(writeIdsToAbort)) { + Y_VERIFY(WriteIdsToAbort.size() || self->InsertTable->GetAborted().size()); + } + + ~TTxInsertTableCleanup() { + } + + virtual bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; + virtual void Complete(const TActorContext& ctx) override; + TTxType GetTxType() const override { return TXTYPE_CLEANUP_INSERT_TABLE; } +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp new file mode 100644 index 00000000000..0f25c5b80e4 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp @@ -0,0 +1,90 @@ +#include "tx_write.h" + +namespace NKikimr::NColumnShard { +bool TTxWrite::InsertOneBlob(TTransactionContext& txc, const TEvPrivate::TEvWriteBlobsResult::TPutBlobData& blobData, const TWriteId writeId) { + const NKikimrTxColumnShard::TLogicalMetadata& meta = blobData.GetLogicalMeta(); + + const auto& blobRange = blobData.GetBlobRange(); + Y_VERIFY(blobRange.GetBlobId().IsValid()); + + // First write wins + TBlobGroupSelector dsGroupSelector(Self->Info()); + NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); + + const auto& writeMeta(PutBlobResult->Get()->GetWriteMeta()); + + auto tableSchema = Self->TablesManager.GetPrimaryIndex()->GetVersionedIndex().GetSchemaUnsafe(PutBlobResult->Get()->GetSchemaVersion()); + + NOlap::TInsertedData insertData((ui64)writeId, writeMeta.GetTableId(), writeMeta.GetDedupId(), blobRange, meta, tableSchema->GetSnapshot()); + bool ok = Self->InsertTable->Insert(dbTable, std::move(insertData)); + if (ok) { + // Put new data into blob cache + Y_VERIFY(blobRange.IsFullBlob()); + + Self->UpdateInsertTableCounters(); + return true; + } + return false; +} + + +bool TTxWrite::Execute(TTransactionContext& txc, const TActorContext&) { + LOG_S_DEBUG(TxPrefix() << "execute" << TxSuffix()); + + const auto& writeMeta(PutBlobResult->Get()->GetWriteMeta()); + Y_VERIFY(Self->TablesManager.IsReadyForWrite(writeMeta.GetTableId())); + + txc.DB.NoMoreReadsForTx(); + TWriteOperation::TPtr operation; + if (writeMeta.HasLongTxId()) { + Y_VERIFY_S(PutBlobResult->Get()->GetBlobData().size() == 1, TStringBuilder() << "Blobs count: " << PutBlobResult->Get()->GetBlobData().size()); + } else { + operation = Self->OperationsManager.GetOperation((TWriteId)writeMeta.GetWriteId()); + Y_VERIFY(operation); + Y_VERIFY(operation->GetStatus() == EOperationStatus::Started); + } + + TVector<TWriteId> writeIds; + for (auto blobData : PutBlobResult->Get()->GetBlobData()) { + auto writeId = TWriteId(writeMeta.GetWriteId()); + if (operation) { + writeId = Self->BuildNextWriteId(txc); + } else { + NIceDb::TNiceDb db(txc.DB); + writeId = Self->GetLongTxWrite(db, writeMeta.GetLongTxIdUnsafe(), writeMeta.GetWritePartId()); + } + + if (!InsertOneBlob(txc, blobData, writeId)) { + LOG_S_DEBUG(TxPrefix() << "duplicate writeId " << (ui64)writeId << TxSuffix()); + Self->IncCounter(COUNTER_WRITE_DUPLICATE); + } + writeIds.push_back(writeId); + } + + TBlobManagerDb blobManagerDb(txc.DB); + for (auto&& i : PutBlobResult->Get()->GetActions()) { + i->OnExecuteTxAfterWrite(*Self, blobManagerDb, true); + } + + if (operation) { + operation->OnWriteFinish(txc, writeIds); + auto txInfo = Self->ProgressTxController.RegisterTxWithDeadline(operation->GetTxId(), NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE, "", writeMeta.GetSource(), 0, txc); + Y_UNUSED(txInfo); + NEvents::TDataEvents::TCoordinatorInfo tInfo = Self->ProgressTxController.GetCoordinatorInfo(operation->GetTxId()); + Result = NEvents::TDataEvents::TEvWriteResult::BuildPrepared(operation->GetTxId(), tInfo); + } else { + Y_VERIFY(writeIds.size() == 1); + Result = std::make_unique<TEvColumnShard::TEvWriteResult>(Self->TabletID(), writeMeta, (ui64)writeIds.front(), NKikimrTxColumnShard::EResultStatus::SUCCESS); + } + return true; +} + +void TTxWrite::Complete(const TActorContext& ctx) { + Y_VERIFY(Result); + LOG_S_DEBUG(TxPrefix() << "complete" << TxSuffix()); + Self->CSCounters.OnWriteTxComplete((TMonotonic::Now() - PutBlobResult->Get()->GetWriteMeta().GetWriteStartInstant()).MilliSeconds()); + Self->CSCounters.OnSuccessWriteResponse(); + ctx.Send(PutBlobResult->Get()->GetWriteMeta().GetSource(), Result.release()); +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.h b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.h new file mode 100644 index 00000000000..2bb0adaad8a --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.h @@ -0,0 +1,35 @@ +#pragma once +#include <ydb/core/tx/columnshard/columnshard_impl.h> + +namespace NKikimr::NColumnShard { + +class TTxWrite : public TTransactionBase<TColumnShard> { +public: + TTxWrite(TColumnShard* self, const TEvPrivate::TEvWriteBlobsResult::TPtr& putBlobResult) + : TBase(self) + , PutBlobResult(putBlobResult) + , TabletTxNo(++Self->TabletTxCounter) + {} + + bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; + void Complete(const TActorContext& ctx) override; + TTxType GetTxType() const override { return TXTYPE_WRITE; } + + bool InsertOneBlob(TTransactionContext& txc, const TEvPrivate::TEvWriteBlobsResult::TPutBlobData& blobData, const TWriteId writeId); + +private: + TEvPrivate::TEvWriteBlobsResult::TPtr PutBlobResult; + const ui32 TabletTxNo; + std::unique_ptr<NActors::IEventBase> Result; + + TStringBuilder TxPrefix() const { + return TStringBuilder() << "TxWrite[" << ToString(TabletTxNo) << "] "; + } + + TString TxSuffix() const { + return TStringBuilder() << " at tablet " << Self->TabletID(); + } +}; + + +} diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.cpp b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.cpp new file mode 100644 index 00000000000..b9ceac5f886 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.cpp @@ -0,0 +1,89 @@ +#include "tx_write_index.h" +#include <ydb/core/tx/columnshard/engines/changes/abstract/abstract.h> +#include <ydb/core/tx/columnshard/hooks/abstract/abstract.h> + +namespace NKikimr::NColumnShard { + +bool TTxWriteIndex::Execute(TTransactionContext& txc, const TActorContext& ctx) { + TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())); + Y_VERIFY(Self->InsertTable); + Y_VERIFY(Self->TablesManager.HasPrimaryIndex()); + txc.DB.NoMoreReadsForTx(); + + auto changes = Ev->Get()->IndexChanges; + ACFL_DEBUG("event", "TTxWriteIndex::Execute")("change_type", changes->TypeString())("details", *changes); + if (Ev->Get()->GetPutStatus() == NKikimrProto::OK) { + NOlap::TSnapshot snapshot(Self->LastPlannedStep, Self->LastPlannedTxId); + Y_VERIFY(Ev->Get()->IndexInfo.GetLastSchema()->GetSnapshot() <= snapshot); + + TBlobGroupSelector dsGroupSelector(Self->Info()); + NOlap::TDbWrapper dbWrap(txc.DB, &dsGroupSelector); + AFL_VERIFY(Self->TablesManager.MutablePrimaryIndex().ApplyChanges(dbWrap, changes, snapshot)); + LOG_S_DEBUG(TxPrefix() << "(" << changes->TypeString() << ") apply" << TxSuffix()); + NOlap::TWriteIndexContext context(txc, dbWrap); + changes->WriteIndex(*Self, context); + + changes->GetBlobsAction().OnExecuteTxAfterAction(*Self, *context.BlobManagerDb, true); + + Self->UpdateIndexCounters(); + } else { + TBlobGroupSelector dsGroupSelector(Self->Info()); + NColumnShard::TBlobManagerDb blobsDb(txc.DB); + changes->GetBlobsAction().OnExecuteTxAfterAction(*Self, blobsDb, false); + for (ui32 i = 0; i < changes->GetWritePortionsCount(); ++i) { + for (auto&& i : changes->GetWritePortionInfo(i)->GetPortionInfo().Records) { + LOG_S_WARN(TxPrefix() << "(" << changes->TypeString() << ":" << i.BlobRange << ") blob cannot apply changes: " << TxSuffix()); + } + } + NOlap::TChangesFinishContext context("cannot write index blobs"); + changes->Abort(*Self, context); + LOG_S_ERROR(TxPrefix() << " (" << changes->TypeString() << ") cannot write index blobs" << TxSuffix()); + } + + Self->EnqueueProgressTx(ctx); + return true; +} + +void TTxWriteIndex::Complete(const TActorContext& ctx) { + TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())); + CompleteReady = true; + auto changes = Ev->Get()->IndexChanges; + ACFL_DEBUG("event", "TTxWriteIndex::Complete")("change_type", changes->TypeString())("details", *changes); + + const ui64 blobsWritten = changes->GetBlobsAction().GetWritingBlobsCount(); + const ui64 bytesWritten = changes->GetBlobsAction().GetWritingTotalSize(); + + if (!Ev->Get()->IndexChanges->IsAborted()) { + NOlap::TWriteIndexCompleteContext context(ctx, blobsWritten, bytesWritten, Ev->Get()->Duration, TriggerActivity); + Ev->Get()->IndexChanges->WriteIndexComplete(*Self, context); + } + + if (Ev->Get()->GetPutStatus() == NKikimrProto::TRYLATER) { + ctx.Schedule(Self->FailActivationDelay, new TEvPrivate::TEvPeriodicWakeup(true)); + } else { + Self->EnqueueBackgroundActivities(false, TriggerActivity); + } + + Self->UpdateResourceMetrics(ctx, Ev->Get()->PutResult->GetResourceUsage()); + changes->GetBlobsAction().OnCompleteTxAfterAction(*Self); + NYDBTest::TControllers::GetColumnShardController()->OnWriteIndexComplete(Self->TabletID(), changes->TypeString()); +} + +TTxWriteIndex::~TTxWriteIndex() { + if (Ev) { + auto changes = Ev->Get()->IndexChanges; + if (!CompleteReady && changes) { + changes->AbortEmergency(); + } + } +} + +TTxWriteIndex::TTxWriteIndex(TColumnShard* self, TEvPrivate::TEvWriteIndex::TPtr& ev) + : TBase(self) + , Ev(ev) + , TabletTxNo(++Self->TabletTxCounter) +{ + Y_VERIFY(Ev && Ev->Get()->IndexChanges); +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.h b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.h new file mode 100644 index 00000000000..09922121d15 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.h @@ -0,0 +1,36 @@ +#pragma once +#include <ydb/core/tx/columnshard/columnshard_impl.h> +#include <ydb/core/tx/columnshard/columnshard_private_events.h> + +namespace NKikimr::NColumnShard { + +/// Common transaction for WriteIndex and GranuleCompaction. +/// For WriteIndex it writes new portion from InsertTable into index. +/// For GranuleCompaction it writes new portion of indexed data and mark old data with "switching" snapshot. +class TTxWriteIndex: public TTransactionBase<TColumnShard> { +public: + TTxWriteIndex(TColumnShard* self, TEvPrivate::TEvWriteIndex::TPtr& ev); + + ~TTxWriteIndex(); + + bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; + void Complete(const TActorContext& ctx) override; + TTxType GetTxType() const override { return TXTYPE_WRITE_INDEX; } + +private: + + TEvPrivate::TEvWriteIndex::TPtr Ev; + const ui32 TabletTxNo; + TBackgroundActivity TriggerActivity = TBackgroundActivity::All(); + bool CompleteReady = false; + + TStringBuilder TxPrefix() const { + return TStringBuilder() << "TxWriteIndex[" << ToString(TabletTxNo) << "] "; + } + + TString TxSuffix() const { + return TStringBuilder() << " at tablet " << Self->TabletID(); + } +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/ya.make b/ydb/core/tx/columnshard/blobs_action/transaction/ya.make new file mode 100644 index 00000000000..27268e5fd7f --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/transaction/ya.make @@ -0,0 +1,18 @@ +LIBRARY() + +SRCS( + tx_draft.cpp + tx_write.cpp + tx_write_index.cpp + tx_gc_insert_table.cpp + tx_gc_indexed.cpp +) + +PEERDIR( + ydb/core/protos + contrib/libs/apache/arrow + ydb/core/tablet_flat + ydb/core/tx/tiering +) + +END() diff --git a/ydb/core/tx/columnshard/blobs_action/ya.make b/ydb/core/tx/columnshard/blobs_action/ya.make index 0993365a187..9775d5cb7e2 100644 --- a/ydb/core/tx/columnshard/blobs_action/ya.make +++ b/ydb/core/tx/columnshard/blobs_action/ya.make @@ -1,9 +1,8 @@ LIBRARY() SRCS( - abstract.cpp - bs.cpp blob_manager_db.cpp + memory.cpp ) PEERDIR( @@ -11,6 +10,19 @@ PEERDIR( contrib/libs/apache/arrow ydb/core/tablet_flat ydb/core/tx/tiering + ydb/core/tx/columnshard/blobs_action/bs + ydb/core/tx/columnshard/blobs_action/abstract + ydb/core/tx/columnshard/blobs_action/transaction ) +IF (OS_WINDOWS) + CFLAGS( + -DKIKIMR_DISABLE_S3_OPS + ) +ELSE() + PEERDIR( + ydb/core/tx/columnshard/blobs_action/tier + ) +ENDIF() + END() diff --git a/ydb/core/tx/columnshard/blobs_reader/actor.cpp b/ydb/core/tx/columnshard/blobs_reader/actor.cpp index 3da02504a47..ec1f0626e46 100644 --- a/ydb/core/tx/columnshard/blobs_reader/actor.cpp +++ b/ydb/core/tx/columnshard/blobs_reader/actor.cpp @@ -2,25 +2,37 @@ namespace NKikimr::NOlap::NBlobOperations::NRead { +TAtomicCounter TActor::WaitingBlobsCount = 0; + void TActor::Handle(TEvStartReadTask::TPtr& ev) { - ACFL_DEBUG("event", "TEvReadTask"); - Y_VERIFY(ev->Get()->GetTask()); - for (auto&& [uBlobId, ranges] : ev->Get()->GetTask()->GetBlobsGroupped()) { - for (auto&& bRange : ranges) { - BlobTasks[bRange].emplace_back(ev->Get()->GetTask()); + THashSet<TBlobRange> rangesInProgress; + for (auto&& agent : ev->Get()->GetTask()->GetAgents()) { + for (auto&& b : agent->GetRangesForRead()) { + for (auto&& r : b.second) { + auto it = BlobTasks.find(r); + if (it != BlobTasks.end()) { + ACFL_DEBUG("event", "TEvReadTask")("enqueued_blob_id", r); + rangesInProgress.emplace(r); + } else { + ACFL_TRACE("event", "TEvReadTask")("blob_id", r); + it = BlobTasks.emplace(r, std::vector<std::shared_ptr<ITask>>()).first; + WaitingBlobsCount.Inc(); + } + it->second.emplace_back(ev->Get()->GetTask()); + } } - NBlobCache::TReadBlobRangeOptions readOpts{.CacheAfterRead = false, .ForceFallback = false, .IsBackgroud = true, .WithDeadline = true}; - Send(BlobCacheActorId, new NBlobCache::TEvBlobCache::TEvReadBlobRangeBatch(std::move(ranges), std::move(readOpts))); } - ev->Get()->GetTask()->StartBlobsFetching(); + ev->Get()->GetTask()->StartBlobsFetching(rangesInProgress); + ACFL_DEBUG("task", ev->Get()->GetTask()->DebugString()); + AFL_VERIFY(ev->Get()->GetTask()->GetExpectedBlobsSize()); } void TActor::Handle(NBlobCache::TEvBlobCache::TEvReadBlobRangeResult::TPtr& ev) { - ACFL_TRACE("event", "TEvReadBlobRangeResult"); + ACFL_TRACE("event", "TEvReadBlobRangeResult")("blob_id", ev->Get()->BlobRange); auto& event = *ev->Get(); auto it = BlobTasks.find(event.BlobRange); - Y_VERIFY(it != BlobTasks.end()); + AFL_VERIFY(it != BlobTasks.end())("blob_id", event.BlobRange); for (auto&& i : it->second) { if (event.Status != NKikimrProto::EReplyStatus::OK) { i->AddError(event.BlobRange, ITask::TErrorStatus::Fail(event.Status, "cannot get blob")); @@ -28,6 +40,7 @@ void TActor::Handle(NBlobCache::TEvBlobCache::TEvReadBlobRangeResult::TPtr& ev) i->AddData(event.BlobRange, event.Data); } } + WaitingBlobsCount.Dec(); BlobTasks.erase(it); } diff --git a/ydb/core/tx/columnshard/blobs_reader/actor.h b/ydb/core/tx/columnshard/blobs_reader/actor.h index 59e807fe332..c03cc1e49b0 100644 --- a/ydb/core/tx/columnshard/blobs_reader/actor.h +++ b/ydb/core/tx/columnshard/blobs_reader/actor.h @@ -16,8 +16,8 @@ private: NActors::TActorId Parent; NActors::TActorId BlobCacheActorId; THashMap<TBlobRange, std::vector<std::shared_ptr<ITask>>> BlobTasks; - public: + static TAtomicCounter WaitingBlobsCount; TActor(ui64 tabletId, const TActorId& parent); void Handle(TEvStartReadTask::TPtr& ev); diff --git a/ydb/core/tx/columnshard/blobs_reader/task.cpp b/ydb/core/tx/columnshard/blobs_reader/task.cpp index 1e964b28659..20c7c3ec36e 100644 --- a/ydb/core/tx/columnshard/blobs_reader/task.cpp +++ b/ydb/core/tx/columnshard/blobs_reader/task.cpp @@ -3,13 +3,9 @@ namespace NKikimr::NOlap::NBlobOperations::NRead { -THashMap<TUnifiedBlobId, std::vector<TBlobRange>> ITask::GetBlobsGroupped() const { +const std::vector<std::shared_ptr<IBlobsReadingAction>>& ITask::GetAgents() const { Y_VERIFY(!BlobsFetchingStarted); - THashMap<TUnifiedBlobId, std::vector<TBlobRange>> result; - for (auto&& i : BlobsWaiting) { - result[i.BlobId].emplace_back(i); - } - return result; + return Agents; } bool ITask::AddError(const TBlobRange& range, const TErrorStatus& status) { @@ -19,7 +15,13 @@ bool ITask::AddError(const TBlobRange& range, const TErrorStatus& status) { } else { ACFL_ERROR("event", "NewError")("message", status.GetErrorMessage())("status", status.GetStatus()); } - Y_VERIFY(BlobsWaiting.erase(range)); + { + auto it = BlobsWaiting.find(range); + AFL_VERIFY(it != BlobsWaiting.end()); + it->second->OnReadError(range, status.GetStatus()); + BlobsWaiting.erase(it); + } + Y_VERIFY(BlobErrors.emplace(range, status).second); if (!OnError(range)) { TaskFinishedWithError = true; @@ -36,27 +38,72 @@ void ITask::AddData(const TBlobRange& range, const TString& data) { ACFL_WARN("event", "SkipDataAfterError"); return; } else { - ACFL_DEBUG("event", "NewData")("range", range.ToString()); + ACFL_TRACE("event", "NewData")("range", range.ToString()); } Y_VERIFY(BlobsFetchingStarted); - Y_VERIFY(BlobsWaiting.erase(range)); + { + auto it = BlobsWaiting.find(range); + AFL_VERIFY(it != BlobsWaiting.end()); + it->second->OnReadResult(range, data); + BlobsWaiting.erase(it); + } Y_VERIFY(BlobsData.emplace(range, data).second); if (BlobsWaiting.empty()) { OnDataReady(); } } -void ITask::StartBlobsFetching() { +void ITask::StartBlobsFetching(const THashSet<TBlobRange>& rangesInProgress) { + Y_VERIFY(!BlobsFetchingStarted); BlobsFetchingStarted = true; + for (auto&& agent : Agents) { + for (auto&& b : agent->GetRangesForRead()) { + for (auto&& r : b.second) { + BlobsWaiting.emplace(r, agent); + } + } + agent->Start(rangesInProgress); + } if (BlobsWaiting.empty()) { OnDataReady(); } } -ITask::ITask(const THashSet<TBlobRange>& blobs) - : BlobsWaiting(blobs) +namespace { +TAtomicCounter TaskIdentifierBuilder = 0; +} + +ITask::ITask(const std::vector<std::shared_ptr<IBlobsReadingAction>>& actions) + : Agents(actions) + , TaskIdentifier(TaskIdentifierBuilder.Inc()) { - Y_VERIFY(BlobsWaiting.size()); + AFL_VERIFY(Agents.size()); + for (auto&& i : Agents) { + AFL_VERIFY(i->GetExpectedBlobsCount()); + } +} + +TString ITask::DebugString() const { + TStringBuilder sb; + sb << "finished_with_error=" << TaskFinishedWithError << ";" + << "errors=" << BlobErrors.size() << ";" + << "data=" << BlobsData.size() << ";" + << "waiting=" << BlobsWaiting.size() << ";" + << "additional_info=(" << DoDebugString() << ");" + ; + return sb; +} + +void ITask::OnDataReady() { + ACFL_DEBUG("event", "OnDataReady")("task", DebugString()); + Y_VERIFY(!DataIsReadyFlag); + DataIsReadyFlag = true; + DoOnDataReady(); +} + +bool ITask::OnError(const TBlobRange& range) { + ACFL_DEBUG("event", "OnError")("task", DebugString()); + return DoOnError(range); } } diff --git a/ydb/core/tx/columnshard/blobs_reader/task.h b/ydb/core/tx/columnshard/blobs_reader/task.h index d5a3becd3db..a1706690d99 100644 --- a/ydb/core/tx/columnshard/blobs_reader/task.h +++ b/ydb/core/tx/columnshard/blobs_reader/task.h @@ -2,6 +2,7 @@ #include <ydb/library/accessor/accessor.h> #include <ydb/library/conclusion/status.h> #include <ydb/core/tx/columnshard/blob.h> +#include <ydb/core/tx/columnshard/blobs_action/abstract/read.h> #include <ydb/core/protos/base.pb.h> namespace NKikimr::NOlap::NBlobOperations::NRead { @@ -10,13 +11,19 @@ class ITask { public: using TErrorStatus = TConclusionSpecialStatus<NKikimrProto::EReplyStatus, NKikimrProto::EReplyStatus::OK, NKikimrProto::EReplyStatus::ERROR>; private: - YDB_READONLY_DEF(THashSet<TBlobRange>, BlobsWaiting); + THashMap<TBlobRange, std::shared_ptr<IBlobsReadingAction>> BlobsWaiting; + std::vector<std::shared_ptr<IBlobsReadingAction>> Agents; THashMap<TBlobRange, TString> BlobsData; THashMap<TBlobRange, TErrorStatus> BlobErrors; bool BlobsFetchingStarted = false; bool TaskFinishedWithError = false; bool DataIsReadyFlag = false; + const ui64 TaskIdentifier = 0; protected: + bool IsFetchingStarted() const { + return BlobsFetchingStarted; + } + const THashMap<TBlobRange, TString>& GetBlobsData() const { return BlobsData; } @@ -28,26 +35,52 @@ protected: virtual void DoOnDataReady() = 0; virtual bool DoOnError(const TBlobRange& range) = 0; - void OnDataReady() { - Y_VERIFY(!DataIsReadyFlag); - DataIsReadyFlag = true; - DoOnDataReady(); + void OnDataReady(); + bool OnError(const TBlobRange& range); + + virtual TString DoDebugString() const { + return ""; } +public: + ui64 GetTaskIdentifier() const { + return TaskIdentifier; + } + + TString DebugString() const; - bool OnError(const TBlobRange& range) { - return DoOnError(range); + ui64 GetExpectedBlobsSize() const { + ui64 result = 0; + for (auto&& i : BlobsWaiting) { + result += i.second->GetExpectedBlobsSize(); + } + return result; } -public: - THashMap<TUnifiedBlobId, std::vector<TBlobRange>> GetBlobsGroupped() const; + ui64 GetExpectedBlobsCount() const { + ui64 result = 0; + for (auto&& i : BlobsWaiting) { + result += i.second->GetExpectedBlobsCount(); + } + return result; + } + + THashSet<TBlobRange> GetExpectedRanges() const { + THashSet<TBlobRange> result; + for (auto&& i : BlobsWaiting) { + i.second->FillExpectedRanges(result); + } + return result; + } + + const std::vector<std::shared_ptr<IBlobsReadingAction>>& GetAgents() const; virtual ~ITask() { Y_VERIFY(DataIsReadyFlag || TaskFinishedWithError); } - ITask(const THashSet<TBlobRange>& blobs); + ITask(const std::vector<std::shared_ptr<IBlobsReadingAction>>& actions); - void StartBlobsFetching(); + void StartBlobsFetching(const THashSet<TBlobRange>& rangesInProgress); bool AddError(const TBlobRange& range, const TErrorStatus& status); void AddData(const TBlobRange& range, const TString& data); diff --git a/ydb/core/tx/columnshard/columnshard.cpp b/ydb/core/tx/columnshard/columnshard.cpp index 0ac565826e7..0237cdcf689 100644 --- a/ydb/core/tx/columnshard/columnshard.cpp +++ b/ydb/core/tx/columnshard/columnshard.cpp @@ -1,5 +1,6 @@ #include "columnshard_impl.h" #include "blobs_reader/actor.h" +#include "hooks/abstract/abstract.h" namespace NKikimr { @@ -31,6 +32,7 @@ void TColumnShard::SwitchToWork(const TActorContext& ctx) { LOG_S_INFO("Switched to work at " << TabletID() << " actor " << ctx.SelfID); BlobsReadActor = ctx.Register(new NOlap::NBlobOperations::NRead::TActor(TabletID(), SelfId())); + for (auto&& i : TablesManager.GetTables()) { ActivateTiering(i.first, i.second.GetTieringUsage()); } @@ -40,15 +42,29 @@ void TColumnShard::SwitchToWork(const TActorContext& ctx) { void TColumnShard::OnActivateExecutor(const TActorContext& ctx) { LOG_S_DEBUG("OnActivateExecutor at " << TabletID() << " actor " << ctx.SelfID); Executor()->RegisterExternalTabletCounters(TabletCountersPtr.release()); - BlobManager = std::make_unique<TBlobManager>(Info(), Executor()->Generation()); - auto& icb = *AppData(ctx)->Icb; - BlobManager->RegisterControls(icb); - Limits.RegisterControls(icb); - CompactionLimits.RegisterControls(icb); - Settings.RegisterControls(icb); + const auto selfActorId = SelfId(); + Tiers = std::make_shared<TTiersManager>(TabletID(), SelfId(), + [selfActorId](const TActorContext& ctx) { + ctx.Send(selfActorId, new TEvPrivate::TEvTieringModified); + }); + Tiers->Start(Tiers); + if (!NMetadata::NProvider::TServiceOperator::IsEnabled()) { + Tiers->TakeConfigs(NYDBTest::TControllers::GetColumnShardController()->GetFallbackTiersSnapshot(), nullptr); + } +} - Execute(CreateTxInitSchema(), ctx); +void TColumnShard::Handle(TEvPrivate::TEvTieringModified::TPtr& /*ev*/, const TActorContext& ctx) { + OnTieringModified(); + if (!TiersInitializedFlag) { + TiersInitializedFlag = true; + auto& icb = *AppData(ctx)->Icb; + Limits.RegisterControls(icb); + CompactionLimits.RegisterControls(icb); + Settings.RegisterControls(icb); + Execute(CreateTxInitSchema(), ctx); + } + NYDBTest::TControllers::GetColumnShardController()->OnTieringModified(Tiers); } void TColumnShard::Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext&) { @@ -108,7 +124,7 @@ void TColumnShard::Handle(TEvPrivate::TEvReadFinished::TPtr& ev, const TActorCon Y_UNUSED(ctx); ui64 readCookie = ev->Get()->RequestCookie; LOG_S_DEBUG("Finished read cookie: " << readCookie << " at tablet " << TabletID()); - auto blobs = InFlightReadsTracker.RemoveInFlightRequest(ev->Get()->RequestCookie, *BlobManager); + InFlightReadsTracker.RemoveInFlightRequest(ev->Get()->RequestCookie); ui64 txId = ev->Get()->TxId; if (ScanTxInFlight.contains(txId)) { @@ -117,11 +133,6 @@ void TColumnShard::Handle(TEvPrivate::TEvReadFinished::TPtr& ev, const TActorCon ScanTxInFlight.erase(txId); SetCounter(COUNTER_SCAN_IN_FLY, ScanTxInFlight.size()); } - - if (blobs.size()) { - // Cleanup just freed blobs (dropped exported ones) - CleanForgottenBlobs(ctx, blobs); - } } void TColumnShard::Handle(TEvPrivate::TEvPeriodicWakeup::TPtr& ev, const TActorContext& ctx) { @@ -166,18 +177,6 @@ void TColumnShard::Handle(TEvMediatorTimecast::TEvNotifyPlanStep::TPtr& ev, cons EnqueueBackgroundActivities(true); } -void TColumnShard::UpdateBlobMangerCounters() { - const auto counters = BlobManager->GetCountersUpdate(); - IncCounter(COUNTER_BLOB_MANAGER_GC_REQUESTS, counters.GcRequestsSent); - IncCounter(COUNTER_BLOB_MANAGER_KEEP_BLOBS, counters.BlobKeepEntries); - IncCounter(COUNTER_BLOB_MANAGER_DONT_KEEP_BLOBS, counters.BlobDontKeepEntries); - IncCounter(COUNTER_BLOB_MANAGER_SKIPPED_BLOBS, counters.BlobSkippedEntries); - IncCounter(COUNTER_SMALL_BLOB_WRITE_COUNT, counters.SmallBlobsWritten); - IncCounter(COUNTER_SMALL_BLOB_WRITE_BYTES, counters.SmallBlobsBytesWritten); - IncCounter(COUNTER_SMALL_BLOB_DELETE_COUNT, counters.SmallBlobsDeleted); - IncCounter(COUNTER_SMALL_BLOB_DELETE_BYTES, counters.SmallBlobsBytesDeleted); -} - void TColumnShard::UpdateInsertTableCounters() { auto& prepared = InsertTable->GetCountersPrepared(); auto& committed = InsertTable->GetCountersCommitted(); diff --git a/ydb/core/tx/columnshard/columnshard.h b/ydb/core/tx/columnshard/columnshard.h index 256f9358b5c..e6ce65b4a10 100644 --- a/ydb/core/tx/columnshard/columnshard.h +++ b/ydb/core/tx/columnshard/columnshard.h @@ -184,50 +184,6 @@ struct TEvColumnShard { } }; - // Fallback read BlobCache read to tablet (small blobs or S3) - struct TEvReadBlobRanges : public TEventPB<TEvReadBlobRanges, - NKikimrTxColumnShard::TEvReadBlobRanges, - TEvColumnShard::EvReadBlobRanges> - { - std::vector<NOlap::TBlobRange> BlobRanges; - - TEvReadBlobRanges() = default; - - TEvReadBlobRanges(const std::vector<NOlap::TBlobRange>& blobRanges) - : BlobRanges(blobRanges) - { - for (const auto& r : BlobRanges) { - auto* range = Record.AddBlobRanges(); - range->SetBlobId(r.BlobId.ToStringNew()); - range->SetOffset(r.Offset); - range->SetSize(r.Size); - } - } - - void RestoreFromProto(NColumnShard::TBlobGroupSelector* dsGroupSelector, TString& errString) { - BlobRanges.clear(); - BlobRanges.reserve(Record.BlobRangesSize()); - - for (const auto& range : Record.GetBlobRanges()) { - auto blobId = NOlap::TUnifiedBlobId::ParseFromString(range.GetBlobId(), dsGroupSelector, - errString); - if (!errString.empty()) { - return; - } - BlobRanges.push_back(NOlap::TBlobRange{blobId, (ui32)range.GetOffset(), (ui32)range.GetSize()}); - } - } - }; - - struct TEvReadBlobRangesResult : public TEventPB<TEvReadBlobRangesResult, - NKikimrTxColumnShard::TEvReadBlobRangesResult, - TEvColumnShard::EvReadBlobRangesResult> - { - explicit TEvReadBlobRangesResult(ui64 tabletId = 0) { - Record.SetTabletId(tabletId); - } - }; - struct TEvWrite : public TEventPB<TEvWrite, NKikimrTxColumnShard::TEvWrite, TEvColumnShard::EvWrite> { TEvWrite() = default; diff --git a/ydb/core/tx/columnshard/columnshard__export.cpp b/ydb/core/tx/columnshard/columnshard__export.cpp deleted file mode 100644 index d5a1d740b1f..00000000000 --- a/ydb/core/tx/columnshard/columnshard__export.cpp +++ /dev/null @@ -1,131 +0,0 @@ -#include "columnshard_impl.h" -#include "columnshard_schema.h" -#include <ydb/core/tx/columnshard/blobs_action/blob_manager_db.h> - -namespace NKikimr::NColumnShard { - -using namespace NTabletFlatExecutor; - -class TTxExportFinish: public TTransactionBase<TColumnShard> { -public: - TTxExportFinish(TColumnShard* self, TEvPrivate::TEvExport::TPtr& ev) - : TBase(self) - , Ev(ev) - , TabletTxNo(++Self->TabletTxCounter) - {} - - bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; - void Complete(const TActorContext& ctx) override; - TTxType GetTxType() const override { return TXTYPE_EXPORT; } - -private: - TEvPrivate::TEvExport::TPtr Ev; - const ui32 TabletTxNo; - THashMap<TString, THashSet<NOlap::TEvictedBlob>> BlobsToForget; - - TStringBuilder TxPrefix() const { - return TStringBuilder() << "TxExportFinish[" << ToString(TabletTxNo) << "] "; - } - - TString TxSuffix() const { - return TStringBuilder() << " at tablet " << Self->TabletID(); - } -}; - - -bool TTxExportFinish::Execute(TTransactionContext& txc, const TActorContext&) { - Y_VERIFY(Ev); - LOG_S_DEBUG(TxPrefix() << "execute" << TxSuffix()); - - txc.DB.NoMoreReadsForTx(); - //NIceDb::TNiceDb db(txc.DB); - - auto& msg = *Ev->Get(); - auto status = msg.Status; - - { - TBlobManagerDb blobManagerDb(txc.DB); - - for (auto& [blob, externId] : msg.SrcToDstBlobs) { - auto& blobId = blob; - Y_VERIFY(blobId.IsDsBlob()); - Y_VERIFY(externId.IsS3Blob()); - bool dropped = false; - - if (!msg.Blobs.contains(blobId)) { - Y_VERIFY(!msg.ErrorStrings.empty()); - continue; // not exported - } - - // TODO: SELF_CACHED logic - - NOlap::TEvictedBlob evict{ - .State = EEvictState::EXTERN, - .Blob = blobId, - .ExternBlob = externId - }; - bool present = Self->BlobManager->UpdateOneToOne(evict, blobManagerDb, dropped); - - // Delayed erase of evicted blob. Blob could be already deleted. - if (present && !dropped) { - LOG_S_INFO(TxPrefix() << "Blob exported '" << blobId << "'" << TxSuffix()); - Self->BlobManager->DeleteBlob(blobId, blobManagerDb); - Self->IncCounter(COUNTER_BLOBS_ERASED); - Self->IncCounter(COUNTER_BYTES_ERASED, blobId.BlobSize()); - } else if (present && dropped) { - LOG_S_INFO(TxPrefix() << "Stale blob exported '" << blobId << "'" << TxSuffix()); - - TEvictMetadata meta; - evict = Self->BlobManager->GetDropped(blobId, meta); - Y_VERIFY(evict.State == EEvictState::EXTERN); - - BlobsToForget[meta.GetTierName()].emplace(std::move(evict)); - } else { - LOG_S_ERROR(TxPrefix() << "Blob not exported '" << blobId << "'" << TxSuffix()); - } - } - } - - if (status == NKikimrProto::OK) { - Self->IncCounter(COUNTER_EXPORT_SUCCESS); - } else { - Self->IncCounter(COUNTER_EXPORT_FAIL); - } - - return true; -} - -void TTxExportFinish::Complete(const TActorContext& ctx) { - Y_VERIFY(Ev); - LOG_S_DEBUG(TxPrefix() << "complete" << TxSuffix()); - - if (!BlobsToForget.empty()) { - Self->ForgetBlobs(ctx, BlobsToForget); - } -} - - -void TColumnShard::Handle(TEvPrivate::TEvExport::TPtr& ev, const TActorContext& ctx) { - auto& msg = *ev->Get(); - auto status = msg.Status; - Y_VERIFY(status != NKikimrProto::UNKNOWN); - - ui64 exportNo = msg.ExportNo; - auto& tierName = msg.TierName; - - if (status == NKikimrProto::ERROR && msg.Blobs.empty()) { - LOG_S_WARN("Export (fail): id " << exportNo << " tier '" << tierName << "' error: " - << ev->Get()->SerializeErrorsToString() << "' at tablet " << TabletID()); - } else { - // There's no atomicity needed here. Allow partial export - if (status == NKikimrProto::ERROR) { - LOG_S_WARN("Export (partial): id " << exportNo << " tier '" << tierName << "' error: " - << ev->Get()->SerializeErrorsToString() << "' at tablet " << TabletID()); - } else { - LOG_S_DEBUG("Export (apply): id " << exportNo << " tier '" << tierName << "' at tablet " << TabletID()); - } - Execute(new TTxExportFinish(this, ev), ctx); - } -} - -} diff --git a/ydb/core/tx/columnshard/columnshard__forget.cpp b/ydb/core/tx/columnshard/columnshard__forget.cpp deleted file mode 100644 index 9843849c7a8..00000000000 --- a/ydb/core/tx/columnshard/columnshard__forget.cpp +++ /dev/null @@ -1,88 +0,0 @@ -#include "columnshard_impl.h" -#include <ydb/core/tx/columnshard/blobs_action/blob_manager_db.h> - -namespace NKikimr::NColumnShard { - -using namespace NTabletFlatExecutor; - -class TTxForget : public TTransactionBase<TColumnShard> { -public: - TTxForget(TColumnShard* self, TEvPrivate::TEvForget::TPtr& ev) - : TBase(self) - , Ev(ev) - , TabletTxNo(++Self->TabletTxCounter) - {} - - bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; - void Complete(const TActorContext& ctx) override; - TTxType GetTxType() const override { return TXTYPE_FORGET; } - -private: - TEvPrivate::TEvForget::TPtr Ev; - const ui32 TabletTxNo; - - TStringBuilder TxPrefix() const { - return TStringBuilder() << "TxForget[" << ToString(TabletTxNo) << "] "; - } - - TString TxSuffix() const { - return TStringBuilder() << " at tablet " << Self->TabletID(); - } -}; - - -bool TTxForget::Execute(TTransactionContext& txc, const TActorContext&) { - Y_VERIFY(Ev); - LOG_S_DEBUG(TxPrefix() << "execute" << TxSuffix()); - - txc.DB.NoMoreReadsForTx(); - //NIceDb::TNiceDb db(txc.DB); - - auto& msg = *Ev->Get(); - auto status = msg.Status; - - if (status == NKikimrProto::OK) { - TBlobManagerDb blobManagerDb(txc.DB); - - TString strBlobs; - TString unknownBlobs; - for (auto& evict : msg.Evicted) { - bool erased = Self->BlobManager->EraseOneToOne(evict, blobManagerDb); - if (erased) { - strBlobs += "'" + evict.Blob.ToStringNew() + "' "; - } else { - unknownBlobs += "'" + evict.Blob.ToStringNew() + "' "; - } - } - LOG_S_INFO(TxPrefix() << "forget evicted blobs " << strBlobs - << (unknownBlobs.size() ? ", forget unknown blobs " : "") << unknownBlobs << TxSuffix()); - - Self->IncCounter(COUNTER_FORGET_SUCCESS); - } else { - Self->IncCounter(COUNTER_FORGET_FAIL); - } - - return true; -} - -void TTxForget::Complete(const TActorContext&) { - LOG_S_DEBUG(TxPrefix() << "complete" << TxSuffix()); -} - - -void TColumnShard::Handle(TEvPrivate::TEvForget::TPtr& ev, const TActorContext& ctx) { - auto status = ev->Get()->Status; - bool error = status == NKikimrProto::ERROR; - - if (error) { - LOG_S_WARN("Forget (fail): '" << ev->Get()->ErrorStr << "' at tablet " << TabletID()); - } else if (status == NKikimrProto::OK) { - LOG_S_DEBUG("Forget (apply) at tablet " << TabletID()); - - Execute(new TTxForget(this, ev), ctx); - } else { - Y_VERIFY(false); - } -} - -} diff --git a/ydb/core/tx/columnshard/columnshard__index_scan.cpp b/ydb/core/tx/columnshard/columnshard__index_scan.cpp index 5465ed35fa9..240230457c9 100644 --- a/ydb/core/tx/columnshard/columnshard__index_scan.cpp +++ b/ydb/core/tx/columnshard/columnshard__index_scan.cpp @@ -17,17 +17,13 @@ TColumnShardScanIterator::TColumnShardScanIterator(NOlap::TReadMetadata::TConstP } } -void TColumnShardScanIterator::AddData(const TBlobRange& blobRange, TString data) { - IndexedData->AddData(blobRange, data); -} - std::optional<NOlap::TPartialReadResult> TColumnShardScanIterator::GetBatch() { FillReadyResults(); return ReadyResults.pop_front(); } -std::optional<NBlobCache::TBlobRange> TColumnShardScanIterator::GetNextBlobToRead() { - return IndexedData->ExtractNextBlob(ReadyResults.size()); +std::shared_ptr<NOlap::NBlobOperations::NRead::ITask> TColumnShardScanIterator::GetNextTaskToRead() { + return IndexedData->ExtractNextReadTask(ReadyResults.size()); } void TColumnShardScanIterator::FillReadyResults() { @@ -50,14 +46,6 @@ void TColumnShardScanIterator::FillReadyResults() { if (limitLeft == 0) { IndexedData->Abort(); } - - if (IndexedData->IsFinished()) { - Context.MutableProcessor().Stop(); - } -} - -bool TColumnShardScanIterator::HasWaitingTasks() const { - return Context.GetProcessor().InWaiting(); } TColumnShardScanIterator::~TColumnShardScanIterator() { @@ -66,10 +54,9 @@ TColumnShardScanIterator::~TColumnShardScanIterator() { } void TColumnShardScanIterator::Apply(IDataTasksProcessor::ITask::TPtr task) { - if (!task->IsDataProcessed() || Context.GetProcessor().IsStopped() || !task->IsSameProcessor(Context.GetProcessor()) || IndexedData->IsFinished()) { - return; + if (!IndexedData->IsFinished()) { + Y_VERIFY(task->Apply(*IndexedData)); } - Y_VERIFY(task->Apply(*IndexedData)); } } diff --git a/ydb/core/tx/columnshard/columnshard__index_scan.h b/ydb/core/tx/columnshard/columnshard__index_scan.h index 09fb17f9db1..905d31fb7e6 100644 --- a/ydb/core/tx/columnshard/columnshard__index_scan.h +++ b/ydb/core/tx/columnshard/columnshard__index_scan.h @@ -96,17 +96,13 @@ public: virtual void Apply(IDataTasksProcessor::ITask::TPtr task) override; - virtual bool HasWaitingTasks() const override; - - void AddData(const TBlobRange& blobRange, TString data) override; - bool Finished() const override { return IndexedData->IsFinished() && ReadyResults.empty(); } std::optional<NOlap::TPartialReadResult> GetBatch() override; - std::optional<NBlobCache::TBlobRange> GetNextBlobToRead() override; + virtual std::shared_ptr<NOlap::NBlobOperations::NRead::ITask> GetNextTaskToRead() override; private: void FillReadyResults(); diff --git a/ydb/core/tx/columnshard/columnshard__init.cpp b/ydb/core/tx/columnshard/columnshard__init.cpp index 60f48f22162..86cb05fe5b9 100644 --- a/ydb/core/tx/columnshard/columnshard__init.cpp +++ b/ydb/core/tx/columnshard/columnshard__init.cpp @@ -130,44 +130,17 @@ bool TTxInit::ReadEverything(TTransactionContext& txc, const TActorContext& ctx) THashSet<TUnifiedBlobId> lostEvictions; TBlobManagerDb blobManagerDb(txc.DB); - // Initialize the BlobManager - { - if (!Self->BlobManager->LoadState(blobManagerDb)) { - return false; - } - if (!Self->BlobManager->LoadOneToOneExport(blobManagerDb, lostEvictions)) { + for (auto&& i : Self->StoragesManager->GetStorages()) { + if (!i.second->Load(blobManagerDb)) { return false; } } + if (!Self->TablesManager.LoadIndex(dbTable, lostEvictions)) { return false; } - // Set dropped evicting records to be erased in future cleanups - TString strBlobs; - for (auto& blobId : lostEvictions) { - TEvictMetadata meta; - auto evict = Self->BlobManager->GetDropped(blobId, meta); - Y_VERIFY(evict.State == EEvictState::EVICTING); - evict.State = EEvictState::ERASING; - - if (meta.GetTierName().empty()) { - LOG_S_ERROR("Blob " << evict.Blob << " eviction with empty tier name at tablet " << Self->TabletID()); - } - - bool dropped; - bool present = Self->BlobManager->UpdateOneToOne(evict, blobManagerDb, dropped); - if (present) { - strBlobs += "'" + evict.Blob.ToStringNew() + "' "; - } else { - LOG_S_ERROR("Unknown dropped evicting blob " << evict.Blob << " at tablet " << Self->TabletID()); - } - } - if (!strBlobs.empty()) { - LOG_S_NOTICE("Erasing potentially exported blobs " << strBlobs << "at tablet " << Self->TabletID()); - } - Self->UpdateInsertTableCounters(); Self->UpdateIndexCounters(); Self->UpdateResourceMetrics(ctx, {}); diff --git a/ydb/core/tx/columnshard/columnshard__progress_tx.cpp b/ydb/core/tx/columnshard/columnshard__progress_tx.cpp index 57d970cafe3..4c86113e2bd 100644 --- a/ydb/core/tx/columnshard/columnshard__progress_tx.cpp +++ b/ydb/core/tx/columnshard/columnshard__progress_tx.cpp @@ -160,19 +160,7 @@ public: ctx.Send(res.TxInfo.Source, event.release(), 0, res.TxInfo.Cookie); } - Self->ScheduleNextGC(ctx); - - switch (Trigger) { - case ETriggerActivities::POST_INSERT: - Self->EnqueueBackgroundActivities(false, TBackgroundActivity::Indexation()); - break; - case ETriggerActivities::POST_SCHEMA: - Self->EnqueueBackgroundActivities(); - break; - case ETriggerActivities::NONE: - default: - break; - } + Self->EnqueueBackgroundActivities(); } private: diff --git a/ydb/core/tx/columnshard/columnshard__read.cpp b/ydb/core/tx/columnshard/columnshard__read.cpp index b888d503b1e..7f1c58a0890 100644 --- a/ydb/core/tx/columnshard/columnshard__read.cpp +++ b/ydb/core/tx/columnshard/columnshard__read.cpp @@ -47,7 +47,7 @@ private: }; -bool TTxRead::Execute(TTransactionContext& txc, const TActorContext& ctx) { +bool TTxRead::Execute(TTransactionContext& txc, const TActorContext& /*ctx*/) { Y_VERIFY(Ev); Y_VERIFY(Self->TablesManager.HasPrimaryIndex()); Y_UNUSED(txc); @@ -98,7 +98,6 @@ bool TTxRead::Execute(TTransactionContext& txc, const TActorContext& ctx) { ui32 status = NKikimrTxColumnShard::EResultStatus::ERROR; if (metadata) { - Self->MapExternBlobs(ctx, *metadata); ReadMetadata = metadata; status = NKikimrTxColumnShard::EResultStatus::SUCCESS; } @@ -131,7 +130,7 @@ void TTxRead::Complete(const TActorContext& ctx) { LOG_S_DEBUG(TxPrefix() << "complete" << TxSuffix() << " Metadata: " << *ReadMetadata); const ui64 requestCookie = Self->InFlightReadsTracker.AddInFlightRequest( - std::static_pointer_cast<const NOlap::TReadMetadataBase>(ReadMetadata), *Self->BlobManager); + std::static_pointer_cast<const NOlap::TReadMetadataBase>(ReadMetadata)); auto statsDelta = Self->InFlightReadsTracker.GetSelectStatsDelta(); Self->IncCounter(COUNTER_READ_INDEX_GRANULES, statsDelta.Granules); @@ -141,8 +140,8 @@ void TTxRead::Complete(const TActorContext& ctx) { Self->IncCounter(COUNTER_READ_INDEX_BYTES, statsDelta.Bytes); TInstant deadline = TInstant::Max(); // TODO - ctx.Register(CreateReadActor(Self->TabletID(), Ev->Get()->GetSource(), - std::move(Result), ReadMetadata, deadline, Self->SelfId(), requestCookie, Self->ReadCounters)); + ctx.Register(CreateReadActor(Self->TabletID(), Self->GetBlobsReadActorId(), Ev->Get()->GetSource(), + Self->GetStoragesManager(), std::move(Result), ReadMetadata, deadline, Self->SelfId(), requestCookie, Self->ReadCounters)); } } diff --git a/ydb/core/tx/columnshard/columnshard__read_blob_ranges.cpp b/ydb/core/tx/columnshard/columnshard__read_blob_ranges.cpp deleted file mode 100644 index e5a04b48d25..00000000000 --- a/ydb/core/tx/columnshard/columnshard__read_blob_ranges.cpp +++ /dev/null @@ -1,196 +0,0 @@ -#include "columnshard_impl.h" -#include "columnshard_private_events.h" -#include "columnshard_schema.h" -#include <ydb/core/tx/columnshard/blobs_action/blob_manager_db.h> - -namespace NKikimr::NColumnShard { - -using namespace NTabletFlatExecutor; - -class TTxReadBlobRanges : public TTransactionBase<TColumnShard> { -public: - TTxReadBlobRanges(TColumnShard* self, TEvColumnShard::TEvReadBlobRanges::TPtr& ev) - : TTransactionBase<TColumnShard>(self) - , Ev(ev) - {} - - bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; - void Complete(const TActorContext& ctx) override; - TTxType GetTxType() const override { return TXTYPE_READ_BLOB_RANGES; } - -private: - TEvColumnShard::TEvReadBlobRanges::TPtr Ev; - std::unique_ptr<TEvColumnShard::TEvReadBlobRangesResult> Result; -}; - - -// Returns false in case of page fault -bool TryReadValue(NIceDb::TNiceDb& db, const TString& key, TString& value, ui32& readStatus) { - auto rowset = db.Table<Schema::SmallBlobs>().Key(key).Select<Schema::SmallBlobs::Data>(); - if (!rowset.IsReady()) { - return false; - } - - if (rowset.IsValid()) { - readStatus = NKikimrProto::EReplyStatus::OK; - value = rowset.GetValue<Schema::SmallBlobs::Data>(); - } else { - readStatus = NKikimrProto::EReplyStatus::NODATA; - value.clear(); - } - return true; -} - -bool TTxReadBlobRanges::Execute(TTransactionContext& txc, const TActorContext& ctx) { - Y_VERIFY(Ev); - auto& record = Ev->Get()->Record; - LOG_S_DEBUG("TTxReadBlobRanges.Execute at tablet " << Self->TabletID()<< " : " << record); - - Result = std::make_unique<TEvColumnShard::TEvReadBlobRangesResult>(Self->TabletID()); - - NIceDb::TNiceDb db(txc.DB); - - ui64 successCount = 0; - ui64 errorCount = 0; - ui64 byteCount = 0; - for (const auto& range : record.GetBlobRanges()) { - auto blobId = range.GetBlobId(); - - TString blob; - ui32 status = NKikimrProto::EReplyStatus::NODATA; - if (!TryReadValue(db, blobId, blob, status)) { - return false; // Page fault - } - - if (status == NKikimrProto::EReplyStatus::NODATA) { - // If the value wasn't found by string key then try to parse the key as small blob id - // and try lo lookup by this id serialized in the old format and in the new format - TString error; - NOlap::TUnifiedBlobId smallBlobId = NOlap::TUnifiedBlobId::ParseFromString(blobId, nullptr, error); - - if (smallBlobId.IsValid()) { - if (!TryReadValue(db, smallBlobId.ToStringNew(), blob, status)) { - return false; // Page fault - } - - if (status == NKikimrProto::EReplyStatus::NODATA && - !TryReadValue(db, smallBlobId.ToStringLegacy(), blob, status)) - { - return false; // Page fault - } - } - } - - auto* res = Result->Record.AddResults(); - res->MutableBlobRange()->CopyFrom(range); - if (status == NKikimrProto::EReplyStatus::OK) { - if (range.GetOffset() + range.GetSize() <= blob.size()) { - res->SetData(blob.substr(range.GetOffset(), range.GetSize())); - byteCount += range.GetSize(); - } else { - LOG_S_NOTICE("TTxReadBlobRanges.Execute at tablet " << Self->TabletID() - << " the requested range " << range << " is outside blob data, blob size << " << blob.size()); - status = NKikimrProto::EReplyStatus::ERROR; - } - } - res->SetStatus(status); - if (status == NKikimrProto::EReplyStatus::OK) { - ++successCount; - } else { - ++errorCount; - } - } - - // Sending result right away without waiting for Complete() - // It is ok because the blob ids that were requested can only be known - // to the caller if they have been already committed. - ctx.Send(Ev->Sender, Result.release(), 0, Ev->Cookie); - - Self->IncCounter(COUNTER_SMALL_BLOB_READ_SUCCESS, successCount); - Self->IncCounter(COUNTER_SMALL_BLOB_READ_ERROR, errorCount); - Self->IncCounter(COUNTER_SMALL_BLOB_READ_BYTES, byteCount); - - return true; -} - -void TTxReadBlobRanges::Complete(const TActorContext& ctx) { - Y_UNUSED(ctx); - LOG_S_DEBUG("TTxReadBlobRanges.Complete at tablet " << Self->TabletID()); -} - -static std::unique_ptr<TEvColumnShard::TEvReadBlobRangesResult> -MakeErrorResponse(const TEvColumnShard::TEvReadBlobRanges& msg, ui64 tabletId, ui32 status) { - auto result = std::make_unique<TEvColumnShard::TEvReadBlobRangesResult>(tabletId); - for (const auto& range : msg.Record.GetBlobRanges()) { - auto* res = result->Record.AddResults(); - res->MutableBlobRange()->CopyFrom(range); - res->SetStatus(status); - } - return result; -} - -void TColumnShard::Handle(TEvColumnShard::TEvReadBlobRanges::TPtr& ev, const TActorContext& ctx) { - auto& msg = *ev->Get(); - - LOG_S_DEBUG("Read blob ranges at tablet " << TabletID() << msg.Record); - - if (msg.BlobRanges.empty()) { - TBlobGroupSelector dsGroupSelector(Info()); - TString errString; - msg.RestoreFromProto(&dsGroupSelector, errString); - Y_VERIFY_S(errString.empty(), errString); - } - - std::optional<TUnifiedBlobId> evictedBlobId; - bool isSmall = false; - bool isFallback = false; - bool isOther = false; - for (const auto& range : msg.BlobRanges) { - auto& blobId = range.BlobId; - if (blobId.IsSmallBlob()) { - isSmall = true; - } else if (blobId.IsDsBlob()) { - isFallback = true; - if (evictedBlobId) { - // Can read only one blobId at a time (but multiple ranges from it) - Y_VERIFY(evictedBlobId == blobId); - } else { - evictedBlobId = blobId; - } - } else { - isOther = true; - } - } - - Y_VERIFY(isSmall != isFallback && !isOther); - - if (isSmall) { - Execute(new TTxReadBlobRanges(this, ev), ctx); - } else if (isFallback) { - Y_VERIFY(evictedBlobId->IsValid()); - - NKikimrTxColumnShard::TEvictMetadata meta; - auto evicted = BlobManager->GetEvicted(*evictedBlobId, meta); - - if (!evicted.Blob.IsValid()) { - evicted = BlobManager->GetDropped(*evictedBlobId, meta); - } - - if (!evicted.Blob.IsValid() || !evicted.ExternBlob.IsValid()) { - LOG_S_NOTICE("No data for blobId " << *evictedBlobId << " at tablet " << TabletID()); - auto result = MakeErrorResponse(msg, TabletID(), NKikimrProto::EReplyStatus::NODATA); - ctx.Send(ev->Sender, result.release(), 0, ev->Cookie); - return; - } - - TString tierName = meta.GetTierName(); - Y_VERIFY_S(!tierName.empty(), evicted.ToString()); - - if (!GetExportedBlob(ctx, ev->Sender, ev->Cookie, tierName, std::move(evicted), std::move(msg.BlobRanges))) { - auto result = MakeErrorResponse(msg, TabletID(), NKikimrProto::EReplyStatus::ERROR); - ctx.Send(ev->Sender, result.release(), 0, ev->Cookie); - } - } -} - -} diff --git a/ydb/core/tx/columnshard/columnshard__scan.cpp b/ydb/core/tx/columnshard/columnshard__scan.cpp index 337b52ce988..2a918bcc4a3 100644 --- a/ydb/core/tx/columnshard/columnshard__scan.cpp +++ b/ydb/core/tx/columnshard/columnshard__scan.cpp @@ -1,4 +1,5 @@ #include "engines/reader/read_context.h" +#include "blobs_reader/events.h" #include <ydb/core/tx/columnshard/columnshard__scan.h> #include <ydb/core/tx/columnshard/columnshard__index_scan.h> @@ -53,34 +54,24 @@ constexpr i64 DEFAULT_READ_AHEAD_BYTES = (i64)2 * 1024 * 1024 * 1024; constexpr TDuration SCAN_HARD_TIMEOUT = TDuration::Minutes(10); constexpr TDuration SCAN_HARD_TIMEOUT_GAP = TDuration::Seconds(5); -class TLocalDataTasksProcessor: public IDataTasksProcessor { -private: - const TActorIdentity OwnerActorId; -protected: - virtual bool DoAdd(IDataTasksProcessor::ITask::TPtr task) override { - OwnerActorId.Send(NConveyor::TScanServiceOperator::MakeServiceId(OwnerActorId.NodeId()), new NConveyor::TEvExecution::TEvNewTask(task)); - return true; - } -public: - TLocalDataTasksProcessor(const TActorIdentity& ownerActorId) - : OwnerActorId(ownerActorId) { - } -}; - class TColumnShardScan : public TActorBootstrapped<TColumnShardScan>, NArrow::IRowWriter { private: std::shared_ptr<NOlap::TActorBasedMemoryAccesor> MemoryAccessor; + const std::shared_ptr<NOlap::IStoragesManager> StoragesManager; public: static constexpr auto ActorActivityType() { return NKikimrServices::TActivity::KQP_OLAP_SCAN; } public: - TColumnShardScan(const TActorId& columnShardActorId, const TActorId& scanComputeActorId, + TColumnShardScan(const TActorId& readBlobsActorId, const TActorId& columnShardActorId, const TActorId& scanComputeActorId, + const std::shared_ptr<NOlap::IStoragesManager>& storagesManager, ui32 scanId, ui64 txId, ui32 scanGen, ui64 requestCookie, ui64 tabletId, TDuration timeout, std::vector<TTxScan::TReadMetadataPtr>&& readMetadataList, NKikimrTxDataShard::EScanDataFormat dataFormat, const TScanCounters& scanCountersPool) - : ColumnShardActorId(columnShardActorId) + : StoragesManager(storagesManager) + , ColumnShardActorId(columnShardActorId) + , ReadBlobsActorId(readBlobsActorId) , ScanComputeActorId(scanComputeActorId) , BlobCacheActorId(NBlobCache::MakeBlobCacheServiceId()) , ScanId(scanId) @@ -95,7 +86,6 @@ public: , ScanCountersPool(scanCountersPool) , Stats(ScanCountersPool) { - NoTasksStartInstant = Now(); KeyYqlSchema = ReadMetadataRanges[ReadMetadataIndex]->GetKeyYqlSchema(); } @@ -109,7 +99,7 @@ public: Y_VERIFY(!ScanIterator); MemoryAccessor = std::make_shared<NOlap::TActorBasedMemoryAccesor>(SelfId(), "CSScan/Result"); - NOlap::TReadContext context(MakeTasksProcessor(), ScanCountersPool, MemoryAccessor, false); + NOlap::TReadContext context(StoragesManager, ScanCountersPool, MemoryAccessor, false); ScanIterator = ReadMetadataRanges[ReadMetadataIndex]->StartScan(context); // propagate self actor id // TODO: FlagSubscribeOnSession ? @@ -120,14 +110,6 @@ public: } private: - IDataTasksProcessor::TPtr MakeTasksProcessor() const { - if (NConveyor::TScanServiceOperator::IsEnabled()) { - return std::make_shared<TLocalDataTasksProcessor>(SelfId()); - } else { - return nullptr; - } - } - STATEFN(StateScan) { auto g = Stats.MakeGuard("processing"); TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_SCAN) @@ -135,7 +117,6 @@ private: ); switch (ev->GetTypeRewrite()) { hFunc(TEvKqpCompute::TEvScanDataAck, HandleScan); - hFunc(NBlobCache::TEvBlobCache::TEvReadBlobRangeResult, HandleScan); hFunc(TEvKqp::TEvAbortExecution, HandleScan); hFunc(TEvents::TEvUndelivered, HandleScan); hFunc(TEvents::TEvWakeup, HandleScan); @@ -146,43 +127,20 @@ private: } bool ReadNextBlob() { - THashMap<TUnifiedBlobId, std::vector<NBlobCache::TBlobRange>> ranges; - while (InFlightGuard.CanTake()) { - auto blobRange = ScanIterator->GetNextBlobToRead(); - if (!blobRange) { + while (true) { + std::shared_ptr<NOlap::NBlobOperations::NRead::ITask> task = ScanIterator->GetNextTaskToRead(); + if (!task) { break; } - Y_VERIFY(blobRange->BlobId.IsValid()); - InFlightGuard.Take(blobRange->Size); ++InFlightReads; - ranges[blobRange->BlobId].emplace_back(*blobRange); - } - if (!InFlightGuard.CanTake()) { - ScanCountersPool.OnReadingOverloaded(); - } - if (!ranges.size()) { - return true; - } - auto& externBlobs = ReadMetadataRanges[ReadMetadataIndex]->ExternBlobs; - for (auto&& i : ranges) { - bool fallback = externBlobs && externBlobs->contains(i.first); - NBlobCache::TReadBlobRangeOptions readOpts{ - .CacheAfterRead = true, - .ForceFallback = fallback, - .IsBackgroud = false - }; - ui32 size = 0; - for (auto&& s : i.second) { - size += s.Size; - } - ACFL_DEBUG("event", "ReadNextBlob")("blob_id", i.first)("ranges_count", i.second.size())("size", size); - Stats.RequestSent(i.second); - Send(BlobCacheActorId, new NBlobCache::TEvBlobCache::TEvReadBlobRangeBatch(std::move(i.second), std::move(readOpts))); + Stats.RequestSent(task->GetExpectedRanges()); + Send(ReadBlobsActorId, std::make_unique<NOlap::NBlobOperations::NRead::TEvStartReadTask>(task)); } return true; } void HandleScan(NConveyor::TEvExecution::TEvTaskProcessedResult::TPtr& ev) { + --InFlightReads; auto g = Stats.MakeGuard("task_result"); if (ev->Get()->GetErrorMessage()) { ACFL_DEBUG("event", "TEvTaskProcessedResult")("error", ev->Get()->GetErrorMessage()); @@ -195,9 +153,6 @@ private: if (!ScanIterator->Finished()) { ScanIterator->Apply(t); } - if (!ScanIterator->HasWaitingTasks() && !NoTasksStartInstant) { - NoTasksStartInstant = Now(); - } } ContinueProcessing(); } @@ -222,42 +177,6 @@ private: ContinueProcessing(); } - void HandleScan(NBlobCache::TEvBlobCache::TEvReadBlobRangeResult::TPtr& ev) { - auto g = Stats.MakeGuard("blob"); - ACFL_DEBUG("event", "TEvReadBlobRangeResult"); - --InFlightReads; - auto& event = *ev->Get(); - const auto& blobRange = event.BlobRange; - InFlightGuard.Return(blobRange.Size); - ScanCountersPool.OnBlobReceived(blobRange.Size); - Stats.BlobReceived(blobRange, event.FromCache, event.ConstructTime); - - if (event.Status != NKikimrProto::EReplyStatus::OK) { - TString strStatus = NKikimrProto::EReplyStatus_Name(event.Status); - ACFL_WARN("event", "TEvReadBlobRangeResult")("error", strStatus)("blob", ev->Get()->BlobRange); - SendScanError(strStatus); - return Finish(); - } - - Y_VERIFY(event.Data.size() == blobRange.Size, - "Read %s, size %" PRISZT, event.BlobRange.ToString().c_str(), event.Data.size()); - - ACFL_TRACE("event", "TEvReadBlobRangeResult")("blob", ev->Get()->BlobRange)("chunks_limiter", ChunksLimiter.DebugString()); - - if (ScanIterator) { - { - auto g = Stats.MakeGuard("AddData"); - ScanIterator->AddData(blobRange, event.Data); - if (ScanIterator->HasWaitingTasks() && NoTasksStartInstant) { - ScanCountersPool.OnBlobsWaitDuration(Now() - *NoTasksStartInstant, Stats.GetScanDuration()); - Stats.OnBlobsWaitDuration(Now() - *NoTasksStartInstant); - NoTasksStartInstant.reset(); - } - } - ContinueProcessing(); - } - } - // Returns true if it was able to produce new batch bool ProduceResults() noexcept { auto g = Stats.MakeGuard("ProduceResults"); @@ -366,7 +285,7 @@ private: // * we have finished scanning ALL the ranges // * or there is an in-flight blob read or ScanData message for which // we will get a reply and will be able to proceed further - if (!ScanIterator || !ChunksLimiter.HasMore() || InFlightReads != 0 || ScanIterator->HasWaitingTasks() + if (!ScanIterator || !ChunksLimiter.HasMore() || InFlightReads || MemoryAccessor->InWaiting()) { return; } @@ -448,7 +367,7 @@ private: return Finish(); } - NOlap::TReadContext context(MakeTasksProcessor(), ScanCountersPool, MemoryAccessor, false); + NOlap::TReadContext context(StoragesManager, ScanCountersPool, MemoryAccessor, false); ScanIterator = ReadMetadataRanges[ReadMetadataIndex]->StartScan(context); // Used in TArrowToYdbConverter ResultYqlSchema.clear(); @@ -598,6 +517,7 @@ private: private: const TActorId ColumnShardActorId; + const TActorId ReadBlobsActorId; const TActorId ScanComputeActorId; std::optional<TMonotonic> AckReceivedInstant; TActorId ScanActorId; @@ -625,7 +545,6 @@ private: TChunksLimiter ChunksLimiter; THolder<TEvKqpCompute::TEvScanData> Result; i64 InFlightReads = 0; - TInFlightGuard InFlightGuard; bool Finished = false; class TBlobStats { @@ -772,7 +691,7 @@ private: return TGuard(sectionName, *this); } - void RequestSent(const std::vector<NBlobCache::TBlobRange>& ranges) { + void RequestSent(const THashSet<NBlobCache::TBlobRange>& ranges) { ++RequestsCount; const TInstant now = Now(); for (auto&& i : ranges) { @@ -800,7 +719,6 @@ private: }; TScanStats Stats; - std::optional<TInstant> NoTasksStartInstant = TInstant::Zero(); ui64 Rows = 0; ui64 Bytes = 0; ui32 PageFaults = 0; @@ -896,7 +814,7 @@ std::shared_ptr<NOlap::TReadMetadataBase> TTxScan::CreateReadMetadata(NOlap::TRe } -bool TTxScan::Execute(TTransactionContext& txc, const TActorContext& ctx) { +bool TTxScan::Execute(TTransactionContext& txc, const TActorContext& /*ctx*/) { Y_UNUSED(txc); auto& record = Ev->Get()->Record; @@ -949,9 +867,6 @@ bool TTxScan::Execute(TTransactionContext& txc, const TActorContext& ctx) { if (!record.RangesSize()) { auto range = CreateReadMetadata(read, isIndexStats, record.GetReverse(), itemsLimit); if (range) { - if (!isIndexStats) { - Self->MapExternBlobs(ctx, static_cast<NOlap::TReadMetadata&>(*range)); - } ReadMetadataRanges = {range}; } return true; @@ -975,9 +890,6 @@ bool TTxScan::Execute(TTransactionContext& txc, const TActorContext& ctx) { ReadMetadataRanges.clear(); return true; } - if (!isIndexStats) { - Self->MapExternBlobs(ctx, static_cast<NOlap::TReadMetadata&>(*newRange)); - } ReadMetadataRanges.emplace_back(newRange); } Y_VERIFY(ReadMetadataRanges.size() == 1); @@ -1043,7 +955,7 @@ void TTxScan::Complete(const TActorContext& ctx) { return; } - ui64 requestCookie = Self->InFlightReadsTracker.AddInFlightRequest(ReadMetadataRanges, *Self->BlobManager); + ui64 requestCookie = Self->InFlightReadsTracker.AddInFlightRequest(ReadMetadataRanges); auto statsDelta = Self->InFlightReadsTracker.GetSelectStatsDelta(); Self->IncCounter(COUNTER_READ_INDEX_GRANULES, statsDelta.Granules); @@ -1052,7 +964,7 @@ void TTxScan::Complete(const TActorContext& ctx) { Self->IncCounter(COUNTER_READ_INDEX_ROWS, statsDelta.Rows); Self->IncCounter(COUNTER_READ_INDEX_BYTES, statsDelta.Bytes); - auto scanActor = ctx.Register(new TColumnShardScan(Self->SelfId(), scanComputeActor, + auto scanActor = ctx.Register(new TColumnShardScan(Self->GetBlobsReadActorId(), Self->SelfId(), scanComputeActor, Self->GetStoragesManager(), scanId, txId, scanGen, requestCookie, Self->TabletID(), timeout, std::move(ReadMetadataRanges), dataFormat, Self->ScanCounters)); LOG_S_DEBUG("TTxScan starting " << scanActor diff --git a/ydb/core/tx/columnshard/columnshard__scan.h b/ydb/core/tx/columnshard/columnshard__scan.h index ca72f0597cc..b3fe44d1518 100644 --- a/ydb/core/tx/columnshard/columnshard__scan.h +++ b/ydb/core/tx/columnshard/columnshard__scan.h @@ -1,6 +1,7 @@ #pragma once #include "blob_cache.h" +#include "blobs_reader/task.h" #include "engines/reader/conveyor_task.h" #include "resources/memory.h" #include <ydb/core/formats/arrow/size_calcer.h> @@ -146,11 +147,9 @@ public: virtual std::optional<ui32> GetAvailableResultsCount() const { return {}; } - virtual void AddData(const NBlobCache::TBlobRange& /*blobRange*/, TString /*data*/) {} - virtual bool HasWaitingTasks() const = 0; virtual bool Finished() const = 0; virtual std::optional<NOlap::TPartialReadResult> GetBatch() = 0; - virtual std::optional<NBlobCache::TBlobRange> GetNextBlobToRead() { return {}; } + virtual std::shared_ptr<NOlap::NBlobOperations::NRead::ITask> GetNextTaskToRead() { return nullptr; } virtual TString DebugString() const { return "NO_DATA"; } diff --git a/ydb/core/tx/columnshard/columnshard__stats_scan.h b/ydb/core/tx/columnshard/columnshard__stats_scan.h index a334dff2a15..176dbece7ff 100644 --- a/ydb/core/tx/columnshard/columnshard__stats_scan.h +++ b/ydb/core/tx/columnshard/columnshard__stats_scan.h @@ -45,10 +45,6 @@ public: { } - virtual bool HasWaitingTasks() const override { - return false; - } - bool Finished() const override { return IndexStats.empty(); } diff --git a/ydb/core/tx/columnshard/columnshard__write.cpp b/ydb/core/tx/columnshard/columnshard__write.cpp index 0e7b6e3b0b4..5f5068768c3 100644 --- a/ydb/core/tx/columnshard/columnshard__write.cpp +++ b/ydb/core/tx/columnshard/columnshard__write.cpp @@ -1,175 +1,16 @@ #include "columnshard_impl.h" -#include "columnshard_schema.h" -#include "blob_cache.h" -#include "blobs_action/bs.h" +#include "blobs_action/transaction/tx_write.h" +#include "blobs_action/transaction/tx_draft.h" #include "operations/slice_builder.h" +#include "operations/write_data.h" #include <ydb/core/tx/conveyor/usage/service.h> -#include <ydb/core/tx/columnshard/blobs_action/blob_manager_db.h> -#include <ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h> -#include <ydb/core/tx/columnshard/operations/write.h> -#include <ydb/core/tx/columnshard/operations/write_data.h> +#include <ydb/core/tx/ev_write/events.h> namespace NKikimr::NColumnShard { using namespace NTabletFlatExecutor; -class TTxWriteDraft: public TTransactionBase<TColumnShard> { -private: - const IWriteController::TPtr WriteController; -public: - TTxWriteDraft(TColumnShard* self, const IWriteController::TPtr writeController) - : TBase(self) - , WriteController(writeController) { - } - - bool Execute(TTransactionContext& txc, const TActorContext& /*ctx*/) override { - TBlobManagerDb blobManagerDb(txc.DB); - for (auto&& action : WriteController->GetBlobActions()) { - action->OnExecuteTxBeforeWrite(*Self, blobManagerDb); - } - return true; - } - void Complete(const TActorContext& ctx) override { - for (auto&& action : WriteController->GetBlobActions()) { - action->OnCompleteTxBeforeWrite(*Self); - } - ctx.Register(NColumnShard::CreateWriteActor(Self->TabletID(), WriteController, TInstant::Max())); - } - TTxType GetTxType() const override { return TXTYPE_WRITE_DRAFT; } -}; - -class TTxWrite : public TTransactionBase<TColumnShard> { -public: - TTxWrite(TColumnShard* self, const TEvPrivate::TEvWriteBlobsResult::TPtr& putBlobResult) - : TBase(self) - , PutBlobResult(putBlobResult) - , TabletTxNo(++Self->TabletTxCounter) - {} - - bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; - void Complete(const TActorContext& ctx) override; - TTxType GetTxType() const override { return TXTYPE_WRITE; } - -private: - TEvPrivate::TEvWriteBlobsResult::TPtr PutBlobResult; - const ui32 TabletTxNo; - std::unique_ptr<NActors::IEventBase> Result; - - bool InsertOneBlob(TTransactionContext& txc, const TEvPrivate::TEvWriteBlobsResult::TPutBlobData& blobData, const TWriteId writeId); - - TStringBuilder TxPrefix() const { - return TStringBuilder() << "TxWrite[" << ToString(TabletTxNo) << "] "; - } - - TString TxSuffix() const { - return TStringBuilder() << " at tablet " << Self->TabletID(); - } -}; - -bool TTxWrite::InsertOneBlob(TTransactionContext& txc, const TEvPrivate::TEvWriteBlobsResult::TPutBlobData& blobData, const TWriteId writeId) { - const NKikimrTxColumnShard::TLogicalMetadata& meta = blobData.GetLogicalMeta(); - - const auto& blobRange = blobData.GetBlobRange(); - Y_VERIFY(blobRange.GetBlobId().IsValid()); - - ui64 writeUnixTime = meta.GetDirtyWriteTimeSeconds(); - TInstant time = TInstant::Seconds(writeUnixTime); - - // First write wins - TBlobGroupSelector dsGroupSelector(Self->Info()); - NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); - - const auto& writeMeta(PutBlobResult->Get()->GetWriteMeta()); - - auto tableSchema = Self->TablesManager.GetPrimaryIndex()->GetVersionedIndex().GetSchemaUnsafe(PutBlobResult->Get()->GetSchemaVersion()); - - NOlap::TInsertedData insertData((ui64)writeId, writeMeta.GetTableId(), writeMeta.GetDedupId(), blobRange, meta, tableSchema->GetSnapshot()); - bool ok = Self->InsertTable->Insert(dbTable, std::move(insertData)); - if (ok) { - THashSet<TWriteId> writesToAbort = Self->InsertTable->OldWritesToAbort(time); - NIceDb::TNiceDb db(txc.DB); - Self->TryAbortWrites(db, dbTable, std::move(writesToAbort)); - - // TODO: It leads to write+erase for aborted rows. Abort() inserts rows, EraseAborted() erases them. - // It's not optimal but correct. - TBlobManagerDb blobManagerDb(txc.DB); - auto allAborted = Self->InsertTable->GetAborted(); // copy (src is modified in cycle) - for (auto& [abortedWriteId, abortedData] : allAborted) { - Self->InsertTable->EraseAborted(dbTable, abortedData); - Y_VERIFY(blobRange.IsFullBlob()); - Self->BlobManager->DeleteBlob(abortedData.GetBlobRange().GetBlobId(), blobManagerDb); - } - - // Put new data into blob cache - Y_VERIFY(blobRange.IsFullBlob()); - - Self->UpdateInsertTableCounters(); - return true; - } - return false; -} - - -bool TTxWrite::Execute(TTransactionContext& txc, const TActorContext&) { - LOG_S_DEBUG(TxPrefix() << "execute" << TxSuffix()); - - const auto& writeMeta(PutBlobResult->Get()->GetWriteMeta()); - Y_VERIFY(Self->TablesManager.IsReadyForWrite(writeMeta.GetTableId())); - - txc.DB.NoMoreReadsForTx(); - TWriteOperation::TPtr operation; - if (writeMeta.HasLongTxId()) { - Y_VERIFY_S(PutBlobResult->Get()->GetBlobData().size() == 1, TStringBuilder() << "Blobs count: " << PutBlobResult->Get()->GetBlobData().size()); - } else { - operation = Self->OperationsManager.GetOperation((TWriteId)writeMeta.GetWriteId()); - Y_VERIFY(operation); - Y_VERIFY(operation->GetStatus() == EOperationStatus::Started); - } - - TVector<TWriteId> writeIds; - for (auto blobData : PutBlobResult->Get()->GetBlobData()) { - auto writeId = TWriteId(writeMeta.GetWriteId()); - if (operation) { - writeId = Self->BuildNextWriteId(txc); - } else { - NIceDb::TNiceDb db(txc.DB); - writeId = Self->GetLongTxWrite(db, writeMeta.GetLongTxIdUnsafe(), writeMeta.GetWritePartId()); - } - - if (!InsertOneBlob(txc, blobData, writeId)) { - LOG_S_DEBUG(TxPrefix() << "duplicate writeId " << (ui64)writeId << TxSuffix()); - Self->IncCounter(COUNTER_WRITE_DUPLICATE); - } - writeIds.push_back(writeId); - } - - TBlobManagerDb blobManagerDb(txc.DB); - for (auto&& i : PutBlobResult->Get()->GetActions()) { - i->OnExecuteTxAfterWrite(*Self, blobManagerDb); - } - - if (operation) { - operation->OnWriteFinish(txc, writeIds); - auto txInfo = Self->ProgressTxController.RegisterTxWithDeadline(operation->GetTxId(), NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE, "", writeMeta.GetSource(), 0, txc); - Y_UNUSED(txInfo); - NEvents::TDataEvents::TCoordinatorInfo tInfo = Self->ProgressTxController.GetCoordinatorInfo(operation->GetTxId()); - Result = NEvents::TDataEvents::TEvWriteResult::BuildPrepared(operation->GetTxId(), tInfo); - } else { - Y_VERIFY(writeIds.size() == 1); - Result = std::make_unique<TEvColumnShard::TEvWriteResult>(Self->TabletID(), writeMeta, (ui64)writeIds.front(), NKikimrTxColumnShard::EResultStatus::SUCCESS); - } - return true; -} - -void TTxWrite::Complete(const TActorContext& ctx) { - Y_VERIFY(Result); - LOG_S_DEBUG(TxPrefix() << "complete" << TxSuffix()); - Self->CSCounters.OnWriteTxComplete((TMonotonic::Now() - PutBlobResult->Get()->GetWriteMeta().GetWriteStartInstant()).MilliSeconds()); - Self->CSCounters.OnSuccessWriteResponse(); - ctx.Send(PutBlobResult->Get()->GetWriteMeta().GetSource(), Result.release()); -} - void TColumnShard::OverloadWriteFail(const EOverloadStatus overloadReason, const NEvWrite::TWriteData& writeData, std::unique_ptr<NActors::IEventBase>&& event, const TActorContext& ctx) { IncCounter(COUNTER_WRITE_FAIL); switch (overloadReason) { @@ -321,7 +162,8 @@ void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContex << WritesMonitor.DebugString() << " at tablet " << TabletID()); - std::shared_ptr<NConveyor::ITask> task = std::make_shared<NOlap::TBuildSlicesTask>(TabletID(), SelfId(), std::make_shared<NOlap::TBSWriteAction>(*BlobManager), writeData); + std::shared_ptr<NConveyor::ITask> task = std::make_shared<NOlap::TBuildSlicesTask>(TabletID(), SelfId(), + StoragesManager->GetInsertOperator()->StartWritingAction(), writeData); NConveyor::TCompServiceOperator::SendTaskToExecute(task); } } diff --git a/ydb/core/tx/columnshard/columnshard__write_index.cpp b/ydb/core/tx/columnshard/columnshard__write_index.cpp index bcc4b105923..833aad178c1 100644 --- a/ydb/core/tx/columnshard/columnshard__write_index.cpp +++ b/ydb/core/tx/columnshard/columnshard__write_index.cpp @@ -1,121 +1,15 @@ #include "columnshard_impl.h" + +#include "blobs_action/transaction/tx_draft.h" +#include "blobs_action/transaction/tx_write_index.h" #include "columnshard_private_events.h" -#include "columnshard_schema.h" -#include "blob_cache.h" -#include "blobs_action/bs.h" +#include "engines/changes/abstract/abstract.h" +#include "engines/writer/compacted_blob_constructor.h" -#include <ydb/core/tx/columnshard/blobs_action/blob_manager_db.h> -#include <ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.h> #include <library/cpp/actors/core/log.h> namespace NKikimr::NColumnShard { -using namespace NTabletFlatExecutor; - -/// Common transaction for WriteIndex and GranuleCompaction. -/// For WriteIndex it writes new portion from InsertTable into index. -/// For GranuleCompaction it writes new portion of indexed data and mark old data with "switching" snapshot. -class TTxWriteIndex : public TTransactionBase<TColumnShard> { -public: - TTxWriteIndex(TColumnShard* self, TEvPrivate::TEvWriteIndex::TPtr& ev) - : TBase(self) - , Ev(ev) - , TabletTxNo(++Self->TabletTxCounter) - { - Y_VERIFY(Ev && Ev->Get()->IndexChanges); - } - - ~TTxWriteIndex() { - if (Ev) { - auto changes = Ev->Get()->IndexChanges; - if (!CompleteReady && changes) { - changes->AbortEmergency(); - } - } - } - - bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; - void Complete(const TActorContext& ctx) override; - TTxType GetTxType() const override { return TXTYPE_WRITE_INDEX; } - -private: - - TEvPrivate::TEvWriteIndex::TPtr Ev; - const ui32 TabletTxNo; - TBackgroundActivity TriggerActivity = TBackgroundActivity::All(); - bool CompleteReady = false; - - TStringBuilder TxPrefix() const { - return TStringBuilder() << "TxWriteIndex[" << ToString(TabletTxNo) << "] "; - } - - TString TxSuffix() const { - return TStringBuilder() << " at tablet " << Self->TabletID(); - } -}; - - -bool TTxWriteIndex::Execute(TTransactionContext& txc, const TActorContext& ctx) { - TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())); - Y_VERIFY(Self->InsertTable); - Y_VERIFY(Self->TablesManager.HasPrimaryIndex()); - - txc.DB.NoMoreReadsForTx(); - - auto changes = Ev->Get()->IndexChanges; - LOG_S_DEBUG(TxPrefix() << "execute(" << changes->TypeString() << ") changes: " << *changes << TxSuffix()); - if (Ev->Get()->GetPutStatus() == NKikimrProto::OK) { - NOlap::TSnapshot snapshot(Self->LastPlannedStep, Self->LastPlannedTxId); - Y_VERIFY(Ev->Get()->IndexInfo.GetLastSchema()->GetSnapshot() <= snapshot); - - TBlobGroupSelector dsGroupSelector(Self->Info()); - NOlap::TDbWrapper dbWrap(txc.DB, &dsGroupSelector); - Y_VERIFY(Self->TablesManager.MutablePrimaryIndex().ApplyChanges(dbWrap, changes, snapshot)); - LOG_S_DEBUG(TxPrefix() << "(" << changes->TypeString() << ") apply" << TxSuffix()); - NOlap::TWriteIndexContext context(txc, dbWrap); - changes->WriteIndex(*Self, context); - - Ev->Get()->BlobsAction->OnExecuteTxAfterWrite(*Self, *context.BlobManagerDb); - - Self->UpdateIndexCounters(); - } else { - for (ui32 i = 0; i < changes->GetWritePortionsCount(); ++i) { - for (auto&& i : changes->GetWritePortionInfo(i)->GetPortionInfo().Records) { - LOG_S_WARN(TxPrefix() << "(" << changes->TypeString() << ":" << i.BlobRange << ") blob cannot apply changes: " << TxSuffix()); - } - } - NOlap::TChangesFinishContext context("cannot write index blobs"); - changes->Abort(*Self, context); - LOG_S_ERROR(TxPrefix() << " (" << changes->TypeString() << ") cannot write index blobs" << TxSuffix()); - } - - Self->EnqueueProgressTx(ctx); - return true; -} - -void TTxWriteIndex::Complete(const TActorContext& ctx) { - TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())); - CompleteReady = true; - LOG_S_DEBUG(TxPrefix() << "complete" << TxSuffix()); - - const ui64 blobsWritten = Ev->Get()->BlobsAction->GetBlobsCount(); - const ui64 bytesWritten = Ev->Get()->BlobsAction->GetTotalSize(); - - if (!Ev->Get()->IndexChanges->IsAborted()) { - NOlap::TWriteIndexCompleteContext context(ctx, blobsWritten, bytesWritten, Ev->Get()->Duration, TriggerActivity); - Ev->Get()->IndexChanges->WriteIndexComplete(*Self, context); - } - - if (Ev->Get()->GetPutStatus() == NKikimrProto::TRYLATER) { - ctx.Schedule(Self->FailActivationDelay, new TEvPrivate::TEvPeriodicWakeup(true)); - } else { - Self->EnqueueBackgroundActivities(false, TriggerActivity); - } - - Self->UpdateResourceMetrics(ctx, Ev->Get()->PutResult->GetResourceUsage()); - Ev->Get()->BlobsAction->OnCompleteTxAfterWrite(*Self); -} - void TColumnShard::Handle(TEvPrivate::TEvWriteIndex::TPtr& ev, const TActorContext& ctx) { auto putStatus = ev->Get()->GetPutStatus(); @@ -132,8 +26,13 @@ void TColumnShard::Handle(TEvPrivate::TEvWriteIndex::TPtr& ev, const TActorConte ACFL_DEBUG("event", "TEvWriteIndex")("count", ev->Get()->IndexChanges->GetWritePortionsCount()); AFL_VERIFY(ev->Get()->IndexChanges->GetWritePortionsCount()); - auto writeController = std::make_shared<NOlap::TCompactedWriteController>(ctx.SelfID, ev->Release(), Settings.BlobWriteGrouppingEnabled); - ctx.Register(CreateWriteActor(TabletID(), writeController, TInstant::Max())); + const bool needDraftTransaction = ev->Get()->IndexChanges->GetBlobsAction().NeedDraftWritingTransaction(); + auto writeController = std::make_shared<NOlap::TCompactedWriteController>(ctx.SelfID, ev->Release()); + if (needDraftTransaction) { + Execute(new TTxWriteDraft(this, writeController)); + } else { + ctx.Register(CreateWriteActor(TabletID(), writeController, TInstant::Max())); + } } } else { if (putStatus == NKikimrProto::OK) { diff --git a/ydb/core/tx/columnshard/columnshard_impl.cpp b/ydb/core/tx/columnshard/columnshard_impl.cpp index 823992ef03f..1f450c58b25 100644 --- a/ydb/core/tx/columnshard/columnshard_impl.cpp +++ b/ydb/core/tx/columnshard/columnshard_impl.cpp @@ -2,8 +2,13 @@ #include "columnshard_schema.h" #include "blobs_reader/task.h" #include "blobs_reader/events.h" -#include "blobs_action/bs.h" #include "engines/changes/ttl.h" +#include "engines/changes/cleanup.h" +#include "blobs_action/bs/storage.h" +#include "blobs_action/tier/storage.h" +#include "blobs_action/transaction/tx_gc_insert_table.h" +#include "blobs_action/transaction/tx_gc_indexed.h" +#include "hooks/abstract/abstract.h" #include <ydb/core/scheme/scheme_types_proto.h> #include <ydb/core/tablet/tablet_counters_protobuf.h> #include <ydb/core/tx/tiering/external_data.h> @@ -130,11 +135,39 @@ bool TColumnShard::TAlterMeta::Validate(const NOlap::ISnapshotSchema::TPtr& sche return true; } +class TColumnShard::TStoragesManager: public NOlap::IStoragesManager { +private: + using TBase = NOlap::IStoragesManager; + TColumnShard* Shard; +protected: + virtual std::shared_ptr<NOlap::IBlobsStorageOperator> DoBuildOperator(const TString& storageId) override { + if (storageId == TBase::DefaultStorageId) { + return std::make_shared<NOlap::NBlobOperations::NBlobStorage::TOperator>(storageId, Shard->SelfId(), Shard->Info(), Shard->Executor()->Generation()); + } else if (!Shard->Tiers) { + return nullptr; + } else { + const NTiers::TManager* externalManager = Shard->Tiers->GetManagerOptional(storageId); + if (!externalManager) { + return nullptr; + } else { + return std::make_shared<NOlap::NBlobOperations::NTier::TOperator>(storageId, *Shard, externalManager->GetExternalStorageOperator()); + } + } + } +public: + TStoragesManager(TColumnShard* shard) + : Shard(shard) { + + } +}; TColumnShard::TColumnShard(TTabletStorageInfo* info, const TActorId& tablet) : TActor(&TThis::StateInit) , TTabletExecutedFlat(info, tablet, nullptr) , ProgressTxController(*this) + , StoragesManager(std::make_shared<TStoragesManager>(this)) + , InFlightReadsTracker(StoragesManager) + , TablesManager(StoragesManager) , PipeClientCache(NTabletPipe::CreateBoundedClientCache(new NTabletPipe::TBoundedClientCacheConfig(), GetPipeClientConfig())) , InsertTable(std::make_unique<NOlap::TInsertTable>()) , ReadCounters("Read") @@ -593,18 +626,6 @@ void TColumnShard::RunAlterStore(const NKikimrTxColumnShard::TAlterStore& proto, } } -void TColumnShard::ScheduleNextGC(const TActorContext& ctx, bool cleanupOnly) { - LOG_S_DEBUG("Scheduling GC at tablet " << TabletID()); - - UpdateBlobMangerCounters(); - if (BlobManager->CanCollectGarbage(cleanupOnly)) { - BlobManager->GetCounters().StartCollection->Add(1); - Execute(CreateTxRunGc(), ctx); - } else { - BlobManager->GetCounters().SkipCollection->Add(1); - } -} - void TColumnShard::EnqueueBackgroundActivities(bool periodic, TBackgroundActivity activity) { TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())); ACFL_DEBUG("event", "EnqueueBackgroundActivities")("periodic", periodic)("activity", activity.DebugString()); @@ -617,7 +638,6 @@ void TColumnShard::EnqueueBackgroundActivities(bool periodic, TBackgroundActivit return; } - const TActorContext& ctx = ActorContext(); if (activity.HasIndexation()) { SetupIndexation(); } @@ -627,17 +647,16 @@ void TColumnShard::EnqueueBackgroundActivities(bool periodic, TBackgroundActivit } if (activity.HasCleanup()) { - if (auto event = SetupCleanup()) { - ctx.Send(SelfId(), event.release()); - } else if (periodic) { - // Small cleanup (no index changes) - CleanForgottenBlobs(ctx); - } + SetupCleanup(); } if (activity.HasTtl()) { SetupTtl(); } + + SetupGC(); + + SetupCleanupInsertTable(); } class TChangesTask: public NConveyor::ITask { @@ -697,7 +716,7 @@ protected: } public: TChangesReadTask(std::unique_ptr<TEvPrivate::TEvWriteIndex>&& event, const TActorId parentActorId, const ui64 tabletId, const TIndexationCounters& counters) - : TBase(event->IndexChanges->GetReadBlobRanges()) + : TBase(event->IndexChanges->GetReadingActions()) , ParentActorId(parentActorId) , TabletId(tabletId) , TxEvent(std::move(event)) @@ -772,7 +791,7 @@ void TColumnShard::SetupIndexation() { auto actualIndexInfo = TablesManager.GetPrimaryIndex()->GetVersionedIndex(); indexChanges->Start(*this); auto ev = std::make_unique<TEvPrivate::TEvWriteIndex>(std::move(actualIndexInfo), indexChanges, - Settings.CacheDataAfterIndexing, std::make_shared<NOlap::TBSWriteAction>(*BlobManager)); + Settings.CacheDataAfterIndexing); ActorContext().Send(BlobsReadActor, std::make_unique<NOlap::NBlobOperations::NRead::TEvStartReadTask>(std::make_unique<TChangesReadTask>(std::move(ev), SelfId(), TabletID(), IndexationCounters))); } @@ -794,7 +813,7 @@ void TColumnShard::SetupCompaction() { indexChanges->Start(*this); auto actualIndexInfo = TablesManager.GetPrimaryIndex()->GetVersionedIndex(); - auto ev = std::make_unique<TEvPrivate::TEvWriteIndex>(std::move(actualIndexInfo), indexChanges, Settings.CacheDataAfterCompaction, std::make_shared<NOlap::TBSWriteAction>(*BlobManager)); + auto ev = std::make_unique<TEvPrivate::TEvWriteIndex>(std::move(actualIndexInfo), indexChanges, Settings.CacheDataAfterCompaction); ActorContext().Send(BlobsReadActor, std::make_unique<NOlap::NBlobOperations::NRead::TEvStartReadTask>(std::make_unique<TChangesReadTask>(std::move(ev), SelfId(), TabletID(), CompactionCounters))); } @@ -832,8 +851,8 @@ bool TColumnShard::SetupTtl(const THashMap<ui64, NOlap::TTiering>& pathTtls, con LOG_S_INFO("TTL" << (needWrites ? " with writes" : "" ) << " prepared at tablet " << TabletID()); indexChanges->Start(*this); - auto ev = std::make_unique<TEvPrivate::TEvWriteIndex>(std::move(actualIndexInfo), indexChanges, false, std::make_shared<NOlap::TBSWriteAction>(*BlobManager)); - + auto ev = std::make_unique<TEvPrivate::TEvWriteIndex>(std::move(actualIndexInfo), indexChanges, false); + NYDBTest::TControllers::GetColumnShardController()->OnWriteIndexStart(TabletID(), indexChanges->TypeString()); if (needWrites) { ActorContext().Send(BlobsReadActor, std::make_unique<NOlap::NBlobOperations::NRead::TEvStartReadTask>(std::make_unique<TChangesReadTask>(std::move(ev), SelfId(), TabletID(), CompactionCounters))); } else { @@ -843,11 +862,11 @@ bool TColumnShard::SetupTtl(const THashMap<ui64, NOlap::TTiering>& pathTtls, con return true; } -std::unique_ptr<TEvPrivate::TEvWriteIndex> TColumnShard::SetupCleanup() { +void TColumnShard::SetupCleanup() { CSCounters.OnSetupCleanup(); if (BackgroundController.IsCleanupActive()) { LOG_S_DEBUG("Cleanup already in progress at tablet " << TabletID()); - return {}; + return; } NOlap::TSnapshot cleanupSnapshot{GetMinReadStep(), 0}; @@ -856,209 +875,36 @@ std::unique_ptr<TEvPrivate::TEvWriteIndex> TColumnShard::SetupCleanup() { TablesManager.MutablePrimaryIndex().StartCleanup(cleanupSnapshot, TablesManager.MutablePathsToDrop(), TLimits::MAX_TX_RECORDS); if (!changes) { LOG_S_INFO("Cannot prepare cleanup at tablet " << TabletID()); - return {}; - } - - // Filter PortionsToDrop - std::vector<NOlap::TPortionInfo> portionsCanBedropped; - THashSet<ui64> excludedPortions; - for (const auto& portionInfo : changes->PortionsToDrop) { - const ui64 portionId = portionInfo.GetPortion(); - // Exclude portions that are used by in-flight reads/scans - if (!InFlightReadsTracker.IsPortionUsed(portionId)) { - portionsCanBedropped.push_back(portionInfo); - } else { - excludedPortions.insert(portionId); - } - } - changes->PortionsToDrop.swap(portionsCanBedropped); - - LOG_S_DEBUG("Prepare Cleanup snapshot: " << cleanupSnapshot - << " portions to drop: " << changes->PortionsToDrop.size() - << " in use by reads: " << excludedPortions.size() - << " at tablet " << TabletID()); - - if (changes->PortionsToDrop.empty()) { - return {}; + return; } auto actualIndexInfo = TablesManager.GetPrimaryIndex()->GetVersionedIndex(); - auto ev = std::make_unique<TEvPrivate::TEvWriteIndex>(std::move(actualIndexInfo), changes, false, std::make_shared<NOlap::TBSWriteAction>(*BlobManager)); + auto ev = std::make_unique<TEvPrivate::TEvWriteIndex>(std::move(actualIndexInfo), changes, false); ev->SetPutStatus(NKikimrProto::OK); // No new blobs to write changes->Start(*this); - return ev; -} - - -void TColumnShard::MapExternBlobs(const TActorContext& /*ctx*/, NOlap::TReadMetadata& metadata) { - if (!metadata.SelectInfo) { - return; - } - - if (!BlobManager->HasExternBlobs()) { - return; - } - - THashSet<TUnifiedBlobId> uniqBlobs; - for (auto& portion : metadata.SelectInfo->PortionsOrderedPK) { - for (auto& rec : portion->Records) { - uniqBlobs.insert(rec.BlobRange.BlobId); - } - } - - auto exported = std::make_shared<THashSet<TUnifiedBlobId>>(); - - for (auto& blobId : uniqBlobs) { - TEvictMetadata meta; - auto evicted = BlobManager->GetEvicted(blobId, meta); - if (evicted.IsExternal()) { - exported->insert(blobId); - } - } - - if (!exported->empty()) { - metadata.ExternBlobs = exported; - } -} -void TColumnShard::CleanForgottenBlobs(const TActorContext& ctx, const THashSet<TUnifiedBlobId>& allowList) { - THashMap<TString, THashSet<NOlap::TEvictedBlob>> tierBlobsToForget; - BlobManager->GetCleanupBlobs(tierBlobsToForget, allowList); - if (tierBlobsToForget.size()) { - ForgetBlobs(ctx, tierBlobsToForget); - } + Send(SelfId(), ev.release()); } -void TColumnShard::Reexport(const TActorContext& ctx) { - THashMap<TString, THashSet<NOlap::TEvictedBlob>> tierBlobsToReexport; - BlobManager->GetReexportBlobs(tierBlobsToReexport); - - ui64 exportNo = LastExportNo; - LastExportNo += tierBlobsToReexport.size(); // TODO: persist it? - - for (auto& [tierName, evictSet] : tierBlobsToReexport) { - ++exportNo; - LOG_S_INFO("Reexport " << exportNo << " at tablet " << TabletID()); - ExportBlobs(ctx, std::make_unique<TEvPrivate::TEvExport>(exportNo, tierName, evictSet)); +void TColumnShard::SetupGC() { + for (auto&& i : StoragesManager->GetStorages()) { + i.second->StartGC(); } } -void TColumnShard::ExportBlobs(const TActorContext& ctx, std::unique_ptr<TEvPrivate::TEvExport>&& event) { - Y_VERIFY(event); - Y_VERIFY(event->ExportNo); - Y_VERIFY(event->Blobs.size()); - Y_VERIFY(event->SrcToDstBlobs.size() == event->Blobs.size()); - - const auto& tierName = event->TierName; - if (auto s3 = GetS3ActorForTier(tierName)) { - TStringBuilder strBlobs; - ui64 sumBytes = 0; - for (auto& [blobId, _] : event->Blobs) { - strBlobs << "'" << blobId << "' "; - sumBytes += blobId.BlobSize(); - } - event->DstActor = s3; - IncCounter(COUNTER_EXPORTING_BLOBS, event->Blobs.size()); - IncCounter(COUNTER_EXPORTING_BYTES, sumBytes); - - LOG_S_NOTICE("Export blobs " << strBlobs << "(tier '" << tierName << "') at tablet " << TabletID()); - ctx.Register(CreateExportActor(TabletID(), SelfId(), event.release())); - } else { - LOG_S_INFO("Cannot export blobs (no S3 actor for tier '" << tierName << "') at tablet " << TabletID()); - } +void TColumnShard::Handle(TEvPrivate::TEvGarbageCollectionFinished::TPtr& ev, const TActorContext& ctx) { + Execute(new TTxGarbageCollectionFinished(this, ev->Get()->Action), ctx); } -// It should be called from ForgetBlobs() only to log all S3 activity -void TColumnShard::ForgetTierBlobs(const TActorContext& ctx, const TString& tierName, std::vector<NOlap::TEvictedBlob>&& blobs) const { - if (auto s3 = GetS3ActorForTier(tierName)) { - auto forget = std::make_unique<TEvPrivate::TEvForget>(); - forget->Evicted = std::move(blobs); - - ui64 sumBytes = 0; - for (auto& blob : forget->Evicted) { - sumBytes += blob.Blob.BlobSize(); - } - IncCounter(COUNTER_FORGETTING_BLOBS, forget->Evicted.size()); - IncCounter(COUNTER_FORGETTING_BYTES, sumBytes); - - ctx.Send(s3, forget.release()); - } -} - -void TColumnShard::ForgetBlobs(const TActorContext& ctx, const THashMap<TString, THashSet<NOlap::TEvictedBlob>>& evictedBlobs) { - TStringBuilder strBlobs; - TStringBuilder strBlobsDelayed; - - for (const auto& [tierName, evictSet] : evictedBlobs) { - std::vector<NOlap::TEvictedBlob> tierBlobs; - - for (const auto& ev : evictSet) { - auto& blobId = ev.Blob; - if (BlobManager->BlobInUse(blobId)) { - LOG_S_DEBUG("Blob '" << blobId << "' is in use at tablet " << TabletID()); - strBlobsDelayed << "'" << blobId << "' "; - continue; - } - - TEvictMetadata meta; - auto evict = BlobManager->GetDropped(blobId, meta); - if (!evict.Blob.IsValid()) { - LOG_S_INFO("Forget forgotten blob '" << blobId << "' at tablet " << TabletID()); - continue; - } - if (tierName != meta.GetTierName()) { - LOG_S_ERROR("Forget blob '" << blobId << "' with unexpected tier name '" - << meta.GetTierName() << "' at tablet " << TabletID()); - continue; - } - - if (evict.State == EEvictState::UNKNOWN) { - LOG_S_ERROR("Forget unknown blob '" << blobId << "' at tablet " << TabletID()); - } else if (NOlap::CouldBeExported(evict.State)) { - Y_VERIFY(evict.Blob == blobId); - strBlobs << "'" << blobId << "' "; - tierBlobs.emplace_back(std::move(evict)); - } else { - Y_VERIFY(evict.Blob == blobId); - strBlobsDelayed << "'" << blobId << "' "; - } - } - - if (tierBlobs.size()) { - ForgetTierBlobs(ctx, tierName, std::move(tierBlobs)); - } - } +void TColumnShard::SetupCleanupInsertTable() { + auto writeIdsToCleanup = InsertTable->OldWritesToAbort(AppData()->TimeProvider->Now()); - if (strBlobs.size()) { - LOG_S_NOTICE("Forget blobs " << strBlobs << "at tablet " << TabletID()); - } - if (strBlobsDelayed.size()) { - LOG_S_NOTICE("Forget blobs (deleyed) " << strBlobsDelayed << "at tablet " << TabletID()); + if (!InsertTable->GetAborted().size() && !writeIdsToCleanup.size()) { + return; } -} - -bool TColumnShard::GetExportedBlob(const TActorContext& ctx, TActorId dst, ui64 cookie, const TString& tierName, - NOlap::TEvictedBlob&& evicted, std::vector<NOlap::TBlobRange>&& ranges) { - if (auto s3 = GetS3ActorForTier(tierName)) { - auto get = std::make_unique<TEvPrivate::TEvGetExported>(); - get->DstActor = dst; - get->DstCookie = cookie; - get->Evicted = std::move(evicted); - get->BlobRanges = std::move(ranges); - - ui64 sumBytes = 0; - for (auto& blobRange : get->BlobRanges) { - sumBytes += blobRange.Size; - } - IncCounter(COUNTER_READING_EXPORTED_BLOBS); - IncCounter(COUNTER_READING_EXPORTED_RANGES, get->BlobRanges.size()); - IncCounter(COUNTER_READING_EXPORTED_BYTES, sumBytes); - ctx.Send(s3, get.release()); - return true; - } - return false; + Execute(new TTxInsertTableCleanup(this, std::move(writeIdsToCleanup)), TActorContext::AsActorContext()); } void TColumnShard::Die(const TActorContext& ctx) { @@ -1069,12 +915,14 @@ void TColumnShard::Die(const TActorContext& ctx) { return IActor::Die(ctx); } -TActorId TColumnShard::GetS3ActorForTier(const TString& tierId) const { +#ifndef KIKIMR_DISABLE_S3_OPS +NWrappers::NExternalStorage::IExternalStorageOperator::TPtr TColumnShard::GetTierStorageOperator(const TString& tierId) const { if (!Tiers) { - return {}; + return nullptr; } - return Tiers->GetStorageActorId(tierId); + return Tiers->GetStorageOperator(tierId); } +#endif void TColumnShard::Handle(NMetadata::NProvider::TEvRefreshSubscriberData::TPtr& ev) { Y_VERIFY(Tiers); @@ -1084,18 +932,7 @@ void TColumnShard::Handle(NMetadata::NProvider::TEvRefreshSubscriberData::TPtr& } void TColumnShard::ActivateTiering(const ui64 pathId, const TString& useTiering) { - if (!Tiers) { - Tiers = std::make_shared<TTiersManager>(TabletID(), SelfId(), - [this](const TActorContext& ctx){ - if (!TablesManager.HasPrimaryIndex()) { - return; - } - TablesManager.MutablePrimaryIndex().OnTieringModified(Tiers, TablesManager.GetTtl()); - CleanForgottenBlobs(ctx); - Reexport(ctx); - }); - Tiers->Start(Tiers); - } + Y_VERIFY(!!Tiers); if (!!Tiers) { if (useTiering) { Tiers->EnablePathId(pathId, useTiering); @@ -1103,6 +940,22 @@ void TColumnShard::ActivateTiering(const ui64 pathId, const TString& useTiering) Tiers->DisablePathId(pathId); } } + OnTieringModified(); +} + +void TColumnShard::Enqueue(STFUNC_SIG) { + switch (ev->GetTypeRewrite()) { + HFunc(TEvPrivate::TEvTieringModified, Handle); + default: + return NTabletFlatExecutor::TTabletExecutedFlat::Enqueue(ev); + } +} + +void TColumnShard::OnTieringModified() { + StoragesManager->OnTieringModified(Tiers); + if (TablesManager.HasPrimaryIndex()) { + TablesManager.MutablePrimaryIndex().OnTieringModified(Tiers, TablesManager.GetTtl()); + } } } diff --git a/ydb/core/tx/columnshard/columnshard_impl.h b/ydb/core/tx/columnshard/columnshard_impl.h index 39e3847eb13..2a0f1ca342b 100644 --- a/ydb/core/tx/columnshard/columnshard_impl.h +++ b/ydb/core/tx/columnshard/columnshard_impl.h @@ -30,7 +30,15 @@ class TTTLColumnEngineChanges; class TChangesWithAppend; class TCompactColumnEngineChanges; class TInsertColumnEngineChanges; -class TBSWriteAction; +namespace NBlobOperations { +namespace NBlobStorage { +class TWriteAction; +class TOperator; +} +namespace NTier { +class TOperator; +} +} namespace NCompaction { class TGeneralCompactColumnEngineChanges; } @@ -38,13 +46,14 @@ class TGeneralCompactColumnEngineChanges; namespace NKikimr::NColumnShard { +class TTxInsertTableCleanup; class TOperationsManager; extern bool gAllowLogBatchingDefaultValue; IActor* CreateWriteActor(ui64 tabletId, IWriteController::TPtr writeController, const TInstant& deadline); -IActor* CreateReadActor(ui64 tabletId, - const TActorId& dstActor, +IActor* CreateReadActor(ui64 tabletId, const NActors::TActorId readBlobsActor, + const TActorId& dstActor, const std::shared_ptr<NOlap::IStoragesManager>& storages, std::unique_ptr<TEvColumnShard::TEvReadResult>&& event, NOlap::TReadMetadata::TConstPtr readMetadata, const TInstant& deadline, @@ -61,7 +70,6 @@ struct TSettings { TControlWrapper BlobWriteGrouppingEnabled; TControlWrapper CacheDataAfterIndexing; TControlWrapper CacheDataAfterCompaction; - TControlWrapper MaxSmallBlobSize; static constexpr ui64 OverloadTxInFlight = 1000; static constexpr ui64 OverloadWritesInFlight = 1000; static constexpr ui64 OverloadWritesSizeInFlight = 128 * 1024 * 1024; @@ -70,14 +78,12 @@ struct TSettings { : BlobWriteGrouppingEnabled(1, 0, 1) , CacheDataAfterIndexing(1, 0, 1) , CacheDataAfterCompaction(1, 0, 1) - , MaxSmallBlobSize(0, 0, 8000000) {} void RegisterControls(TControlBoard& icb) { icb.RegisterSharedControl(BlobWriteGrouppingEnabled, "ColumnShardControls.BlobWriteGrouppingEnabled"); icb.RegisterSharedControl(CacheDataAfterIndexing, "ColumnShardControls.CacheDataAfterIndexing"); icb.RegisterSharedControl(CacheDataAfterCompaction, "ColumnShardControls.CacheDataAfterCompaction"); - icb.RegisterSharedControl(MaxSmallBlobSize, "ColumnShardControls.MaxSmallBlobSize"); } }; @@ -90,6 +96,7 @@ class TColumnShard : public TActor<TColumnShard> , public NTabletFlatExecutor::TTabletExecutedFlat { + friend class TTxInsertTableCleanup; friend class TTxInit; friend class TTxInitSchema; friend class TTxUpdateSchema; @@ -102,7 +109,6 @@ class TColumnShard friend class TTxScan; friend class TTxWriteIndex; friend class TTxExportFinish; - friend class TTxForget; friend class TTxRunGC; friend class TTxProcessGCResult; friend class TTxReadBlobRanges; @@ -113,8 +119,11 @@ class TColumnShard friend class NOlap::TInsertColumnEngineChanges; friend class NOlap::TColumnEngineChanges; friend class NOlap::NCompaction::TGeneralCompactColumnEngineChanges; - friend class NOlap::TBSWriteAction; + friend class NOlap::NBlobOperations::NBlobStorage::TWriteAction; + friend class NOlap::NBlobOperations::NBlobStorage::TOperator; + friend class NOlap::NBlobOperations::NTier::TOperator; + class TStoragesManager; friend class TTxController; friend class TOperationsManager; @@ -122,7 +131,6 @@ class TColumnShard class TTxProgressTx; class TTxProposeCancel; - // proto void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx); void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx); @@ -136,7 +144,6 @@ class TColumnShard void Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContext& ctx); void Handle(TEvColumnShard::TEvRead::TPtr& ev, const TActorContext& ctx); void Handle(TEvColumnShard::TEvScan::TPtr& ev, const TActorContext& ctx); - void Handle(TEvColumnShard::TEvReadBlobRanges::TPtr& ev, const TActorContext& ctx); void Handle(TEvMediatorTimecast::TEvRegisterTabletResult::TPtr& ev, const TActorContext& ctx); void Handle(TEvMediatorTimecast::TEvNotifyPlanStep::TPtr& ev, const TActorContext& ctx); void Handle(TEvPrivate::TEvWriteBlobsResult::TPtr& ev, const TActorContext& ctx); @@ -144,15 +151,13 @@ class TColumnShard void Handle(TEvPrivate::TEvReadFinished::TPtr &ev, const TActorContext &ctx); void Handle(TEvPrivate::TEvPeriodicWakeup::TPtr& ev, const TActorContext& ctx); void Handle(TEvPrivate::TEvWriteIndex::TPtr& ev, const TActorContext& ctx); - void Handle(TEvPrivate::TEvExport::TPtr& ev, const TActorContext& ctx); - void Handle(TEvPrivate::TEvForget::TPtr& ev, const TActorContext& ctx); - void Handle(TEvBlobStorage::TEvCollectGarbageResult::TPtr& ev, const TActorContext& ctx); void Handle(NMetadata::NProvider::TEvRefreshSubscriberData::TPtr& ev); void Handle(NEvents::TDataEvents::TEvWrite::TPtr& ev, const TActorContext& ctx); void Handle(TEvPrivate::TEvWriteDraft::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPrivate::TEvGarbageCollectionFinished::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPrivate::TEvTieringModified::TPtr& ev, const TActorContext&); ITransaction* CreateTxInitSchema(); - ITransaction* CreateTxRunGc(); void OnActivateExecutor(const TActorContext& ctx) override; void OnDetach(const TActorContext& ctx) override; @@ -200,13 +205,13 @@ class TColumnShard } void ActivateTiering(const ui64 pathId, const TString& useTiering); - + void OnTieringModified(); public: enum class EOverloadStatus { Shard /* "shard" */, InsertTable /* "insert_table" */, Disk /* "disk" */, - None + None /* "none" */ }; private: @@ -233,10 +238,11 @@ protected: } STFUNC(StateWork) { - TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())); + const TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("self_id", SelfId()); TRACE_EVENT(NKikimrServices::TX_COLUMNSHARD); switch (ev->GetTypeRewrite()) { hFunc(NMetadata::NProvider::TEvRefreshSubscriberData, Handle); + HFunc(TEvTabletPipe::TEvClientConnected, Handle); HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); HFunc(TEvTabletPipe::TEvServerConnected, Handle); @@ -249,19 +255,17 @@ protected: HFunc(TEvTxProcessing::TEvPlanStep, Handle); HFunc(TEvColumnShard::TEvWrite, Handle); HFunc(TEvColumnShard::TEvRead, Handle); - HFunc(TEvColumnShard::TEvReadBlobRanges, Handle); HFunc(TEvPrivate::TEvWriteBlobsResult, Handle); HFunc(TEvMediatorTimecast::TEvRegisterTabletResult, Handle); HFunc(TEvMediatorTimecast::TEvNotifyPlanStep, Handle); - HFunc(TEvBlobStorage::TEvCollectGarbageResult, Handle); HFunc(TEvPrivate::TEvWriteIndex, Handle); - HFunc(TEvPrivate::TEvExport, Handle); - HFunc(TEvPrivate::TEvForget, Handle); HFunc(TEvPrivate::TEvScanStats, Handle); HFunc(TEvPrivate::TEvReadFinished, Handle); HFunc(TEvPrivate::TEvPeriodicWakeup, Handle); HFunc(NEvents::TDataEvents::TEvWrite, Handle); HFunc(TEvPrivate::TEvWriteDraft, Handle); + HFunc(TEvPrivate::TEvGarbageCollectionFinished, Handle); + HFunc(TEvPrivate::TEvTieringModified, Handle); default: if (!HandleDefaultEvents(ev, SelfId())) { LOG_S_WARN("TColumnShard.StateWork at " << TabletID() @@ -301,8 +305,6 @@ private: ui64 PreparedTxId = 0; }; - TTablesManager TablesManager; - class TWritesMonitor { private: TColumnShard& Owner; @@ -389,6 +391,10 @@ private: TActorId BlobsReadActor; TActorId StatsReportPipe; + std::shared_ptr<NOlap::IStoragesManager> StoragesManager; + TInFlightReadsTracker InFlightReadsTracker; + TTablesManager TablesManager; + bool TiersInitializedFlag = false; std::shared_ptr<TTiersManager> Tiers; std::unique_ptr<TTabletCountersBase> TabletCountersPtr; TTabletCountersBase* TabletCounters; @@ -414,8 +420,6 @@ private: TMultiMap<TRowVersion, TEvColumnShard::TEvScan::TPtr> WaitingScans; ui32 ActiveEvictions = 0; TBackgroundController BackgroundController; - std::unique_ptr<TBlobManager> BlobManager; - TInFlightReadsTracker InFlightReadsTracker; TSettings Settings; TLimits Limits; TCompactionLimits CompactionLimits; @@ -444,7 +448,7 @@ private: void EnqueueProgressTx(const TActorContext& ctx); void EnqueueBackgroundActivities(bool periodic = false, TBackgroundActivity activity = TBackgroundActivity::All()); - void CleanForgottenBlobs(const TActorContext& ctx, const THashSet<TUnifiedBlobId>& allowList = {}); + virtual void Enqueue(STFUNC_SIG) override; void UpdateSchemaSeqNo(const TMessageSeqNo& seqNo, NTabletFlatExecutor::TTransactionContext& txc); void ProtectSchemaSeqNo(const NKikimrTxColumnShard::TSchemaSeqNo& seqNoProto, NTabletFlatExecutor::TTransactionContext& txc); @@ -456,29 +460,31 @@ private: void RunDropTable(const NKikimrTxColumnShard::TDropTable& body, const TRowVersion& version, NTabletFlatExecutor::TTransactionContext& txc); void RunAlterStore(const NKikimrTxColumnShard::TAlterStore& body, const TRowVersion& version, NTabletFlatExecutor::TTransactionContext& txc); - void MapExternBlobs(const TActorContext& ctx, NOlap::TReadMetadata& metadata); - TActorId GetS3ActorForTier(const TString& tierId) const; - void Reexport(const TActorContext& ctx); - void ExportBlobs(const TActorContext& ctx, std::unique_ptr<TEvPrivate::TEvExport>&& ev); - void ForgetTierBlobs(const TActorContext& ctx, const TString& tierName, std::vector<NOlap::TEvictedBlob>&& blobs) const; - void ForgetBlobs(const TActorContext& ctx, const THashMap<TString, THashSet<NOlap::TEvictedBlob>>& evictedBlobs); - bool GetExportedBlob(const TActorContext& ctx, TActorId dst, ui64 cookie, const TString& tierName, - NOlap::TEvictedBlob&& evicted, std::vector<NOlap::TBlobRange>&& ranges); - - void ScheduleNextGC(const TActorContext& ctx, bool cleanupOnly = false); +#ifndef KIKIMR_DISABLE_S3_OPS + NWrappers::NExternalStorage::IExternalStorageOperator::TPtr GetTierStorageOperator(const TString& tierId) const; +#endif void SetupIndexation(); void SetupCompaction(); bool SetupTtl(const THashMap<ui64, NOlap::TTiering>& pathTtls = {}, const bool force = false); - std::unique_ptr<TEvPrivate::TEvWriteIndex> SetupCleanup(); + void SetupCleanup(); + void SetupCleanupInsertTable(); + void SetupGC(); - void UpdateBlobMangerCounters(); void UpdateInsertTableCounters(); void UpdateIndexCounters(); void UpdateResourceMetrics(const TActorContext& ctx, const TUsage& usage); ui64 MemoryUsage() const; void SendPeriodicStats(); public: + const TActorId& GetBlobsReadActorId() const { + return BlobsReadActor; + } + + const std::shared_ptr<NOlap::IStoragesManager>& GetStoragesManager() const { + return StoragesManager; + } + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::TX_COLUMNSHARD_ACTOR; } diff --git a/ydb/core/tx/columnshard/columnshard_private_events.h b/ydb/core/tx/columnshard/columnshard_private_events.h index b95bca26b29..ae63a10f1a0 100644 --- a/ydb/core/tx/columnshard/columnshard_private_events.h +++ b/ydb/core/tx/columnshard/columnshard_private_events.h @@ -1,9 +1,11 @@ #pragma once #include "blob_manager.h" +#include "blobs_action/abstract/gc.h" #include "defs.h" #include <ydb/core/protos/counters_columnshard.pb.h> +#include <ydb/core/tx/columnshard/engines/column_engine.h> #include <ydb/core/tx/columnshard/engines/writer/write_controller.h> #include <ydb/core/tx/ev_write/write_data.h> #include <ydb/core/formats/arrow/special_keys.h> @@ -25,16 +27,28 @@ struct TEvPrivate { EvWriteBlobsResult, EvStartReadTask, EvWriteDraft, + EvGarbageCollectionFinished, + EvTieringModified, EvEnd }; static_assert(EvEnd < EventSpaceEnd(TEvents::ES_PRIVATE), "expect EvEnd < EventSpaceEnd(TEvents::ES_PRIVATE)"); + struct TEvTieringModified: public TEventLocal<TEvTieringModified, EvTieringModified> { + }; + struct TEvWriteDraft: public TEventLocal<TEvWriteDraft, EvWriteDraft> { const std::shared_ptr<IWriteController> WriteController; TEvWriteDraft(std::shared_ptr<IWriteController> controller) - : WriteController(controller) - { + : WriteController(controller) { + + } + }; + + struct TEvGarbageCollectionFinished: public TEventLocal<TEvGarbageCollectionFinished, EvGarbageCollectionFinished> { + const std::shared_ptr<NOlap::IBlobsGCAction> Action; + TEvGarbageCollectionFinished(const std::shared_ptr<NOlap::IBlobsGCAction>& action) + : Action(action) { } }; @@ -48,15 +62,13 @@ struct TEvPrivate { bool CacheData{false}; TDuration Duration; TBlobPutResult::TPtr PutResult; - std::shared_ptr<NOlap::IBlobsAction> BlobsAction; TEvWriteIndex(NOlap::TVersionedIndex&& indexInfo, std::shared_ptr<NOlap::TColumnEngineChanges> indexChanges, - bool cacheData, std::shared_ptr<NOlap::IBlobsAction> action) + bool cacheData) : IndexInfo(std::move(indexInfo)) , IndexChanges(indexChanges) , CacheData(cacheData) - , BlobsAction(action) { PutResult = std::make_shared<TBlobPutResult>(NKikimrProto::UNKNOWN); } @@ -220,7 +232,7 @@ struct TEvPrivate { Y_VERIFY(PutResult); } - TEvWriteBlobsResult(const NColumnShard::TBlobPutResult::TPtr& putResult, TVector<TPutBlobData>&& blobData, const std::vector<std::shared_ptr<NOlap::IBlobsAction>>& actions, const NEvWrite::TWriteMeta& writeMeta, const ui64 schemaVersion) + TEvWriteBlobsResult(const NColumnShard::TBlobPutResult::TPtr& putResult, TVector<TPutBlobData>&& blobData, const std::vector<std::shared_ptr<NOlap::IBlobsWritingAction>>& actions, const NEvWrite::TWriteMeta& writeMeta, const ui64 schemaVersion) : TEvWriteBlobsResult(putResult, writeMeta) { Actions = actions; @@ -228,7 +240,7 @@ struct TEvPrivate { SchemaVersion = schemaVersion; } - const std::vector<std::shared_ptr<NOlap::IBlobsAction>>& GetActions() const { + const std::vector<std::shared_ptr<NOlap::IBlobsWritingAction>>& GetActions() const { return Actions; } @@ -255,7 +267,7 @@ struct TEvPrivate { private: NColumnShard::TBlobPutResult::TPtr PutResult; TVector<TPutBlobData> BlobData; - std::vector<std::shared_ptr<NOlap::IBlobsAction>> Actions; + std::vector<std::shared_ptr<NOlap::IBlobsWritingAction>> Actions; NEvWrite::TWriteMeta WriteMeta; ui64 SchemaVersion = 0; }; diff --git a/ydb/core/tx/columnshard/columnshard_schema.h b/ydb/core/tx/columnshard/columnshard_schema.h index e96920909e3..4910ce98117 100644 --- a/ydb/core/tx/columnshard/columnshard_schema.h +++ b/ydb/core/tx/columnshard/columnshard_schema.h @@ -8,6 +8,7 @@ #include <ydb/core/tx/columnshard/engines/insert_table/insert_table.h> #include <ydb/core/tx/columnshard/engines/granules_table.h> #include <ydb/core/tx/columnshard/engines/columns_table.h> +#include <ydb/core/tx/columnshard/engines/column_engine.h> #include <ydb/core/tx/columnshard/operations/write.h> #include <type_traits> @@ -42,6 +43,12 @@ struct Schema : NIceDb::Schema { OperationsTableId, }; + enum class ETierTables: ui32 { + TierBlobsDraft = 1024, + TierBlobsToKeep, + TierBlobsToDelete + }; + enum class EValueIds : ui32 { CurrentSchemeShardId = 1, ProcessingParams = 2, @@ -264,6 +271,22 @@ struct Schema : NIceDb::Schema { using TColumns = TableColumns<TxId, WriteId, Status, CreatedAt, GlobalWriteId, Metadata>; }; + struct TierBlobsDraft: NIceDb::Schema::Table<(ui32)ETierTables::TierBlobsDraft> { + struct StorageId: Column<1, NScheme::NTypeIds::String> {}; + struct BlobId: Column<2, NScheme::NTypeIds::String> {}; + + using TKey = TableKey<StorageId, BlobId>; + using TColumns = TableColumns<StorageId, BlobId>; + }; + + struct TierBlobsToDelete: NIceDb::Schema::Table<(ui32)ETierTables::TierBlobsToDelete> { + struct StorageId: Column<1, NScheme::NTypeIds::String> {}; + struct BlobId: Column<2, NScheme::NTypeIds::String> {}; + + using TKey = TableKey<StorageId, BlobId>; + using TColumns = TableColumns<StorageId, BlobId>; + }; + using TTables = SchemaTables< Value, TxInfo, @@ -282,7 +305,9 @@ struct Schema : NIceDb::Schema { IndexCounters, SmallBlobs, OneToOneEvictedBlobs, - Operations + Operations, + TierBlobsDraft, + TierBlobsToDelete >; // diff --git a/ydb/core/tx/columnshard/columnshard_ut_common.cpp b/ydb/core/tx/columnshard/columnshard_ut_common.cpp index 24e09426b74..970738a2b09 100644 --- a/ydb/core/tx/columnshard/columnshard_ut_common.cpp +++ b/ydb/core/tx/columnshard/columnshard_ut_common.cpp @@ -42,7 +42,7 @@ void TTester::Setup(TTestActorRuntime& runtime) { runtime.UpdateCurrentTime(TInstant::Now()); } -void ProvideTieringSnapshot(TTestBasicRuntime& runtime, TActorId& sender, NMetadata::NFetcher::ISnapshot::TPtr snapshot) { +void ProvideTieringSnapshot(TTestBasicRuntime& runtime, const TActorId& sender, NMetadata::NFetcher::ISnapshot::TPtr snapshot) { auto event = std::make_unique<NMetadata::NProvider::TEvRefreshSubscriberData>(snapshot); ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, event.release()); @@ -372,9 +372,7 @@ NMetadata::NFetcher::ISnapshot::TPtr TTestSchema::BuildSnapshot(const TTableSpec { NKikimrSchemeOp::TStorageTierConfig cProto; cProto.SetName(tier.Name); - if (tier.S3) { - *cProto.MutableObjectStorage() = *tier.S3; - } + *cProto.MutableObjectStorage() = tier.S3; if (tier.Codec) { cProto.MutableCompression()->SetCompressionCodec(tier.GetCodecId()); } diff --git a/ydb/core/tx/columnshard/columnshard_ut_common.h b/ydb/core/tx/columnshard/columnshard_ut_common.h index dd52f829b3b..93e2751f09c 100644 --- a/ydb/core/tx/columnshard/columnshard_ut_common.h +++ b/ydb/core/tx/columnshard/columnshard_ut_common.h @@ -44,7 +44,7 @@ struct TTestSchema { TString Name; TString Codec; std::optional<int> CompressionLevel; - std::optional<NKikimrSchemeOp::TS3Settings> S3; + NKikimrSchemeOp::TS3Settings S3 = FakeS3(); TStorageTier(const TString& name = {}) : Name(name) @@ -255,15 +255,11 @@ struct TTestSchema { ttlSettings->SetVersion(1); if (specials.HasTiers()) { ttlSettings->SetUseTiering("Tiering1"); - if (specials.HasTtl()) { - InitTtl(specials, ttlSettings->MutableEnabled()); - } - return true; - } else if (specials.HasTtl()) { + } + if (specials.HasTtl()) { InitTtl(specials, ttlSettings->MutableEnabled()); - return true; } - return false; + return specials.HasTiers() || specials.HasTtl(); } static TString CreateTableTxBody(ui64 pathId, const std::vector<std::pair<TString, TTypeInfo>>& columns, @@ -402,7 +398,7 @@ struct TTestSchema { }; bool ProposeSchemaTx(TTestBasicRuntime& runtime, TActorId& sender, const TString& txBody, NOlap::TSnapshot snap); -void ProvideTieringSnapshot(TTestBasicRuntime& runtime, TActorId& sender, NMetadata::NFetcher::ISnapshot::TPtr snapshot); +void ProvideTieringSnapshot(TTestBasicRuntime& runtime, const TActorId& sender, NMetadata::NFetcher::ISnapshot::TPtr snapshot); void PlanSchemaTx(TTestBasicRuntime& runtime, TActorId& sender, NOlap::TSnapshot snap); void PlanWriteTx(TTestBasicRuntime& runtime, TActorId& sender, NOlap::TSnapshot snap, bool waitResult = true); diff --git a/ydb/core/tx/columnshard/defs.h b/ydb/core/tx/columnshard/defs.h index 35df35546aa..f3d5c90bc18 100644 --- a/ydb/core/tx/columnshard/defs.h +++ b/ydb/core/tx/columnshard/defs.h @@ -5,7 +5,8 @@ #include <ydb/library/yverify_stream/yverify_stream.h> #include <ydb/core/tx/ctor_logger.h> #include <ydb/core/control/immediate_control_board_impl.h> -#include <ydb/core/tx/columnshard/engines/column_engine.h> +#include <ydb/core/tx/columnshard/engines/changes/abstract/settings.h> +#include <ydb/core/tx/columnshard/engines/defs.h> #include <ydb/core/tx/columnshard/engines/writer/put_status.h> namespace NKikimr::NColumnShard { diff --git a/ydb/core/tx/columnshard/engines/changes/abstract/CMakeLists.darwin-x86_64.txt b/ydb/core/tx/columnshard/engines/changes/abstract/CMakeLists.darwin-x86_64.txt index 36141798f17..aa8e7dda1cc 100644 --- a/ydb/core/tx/columnshard/engines/changes/abstract/CMakeLists.darwin-x86_64.txt +++ b/ydb/core/tx/columnshard/engines/changes/abstract/CMakeLists.darwin-x86_64.txt @@ -20,12 +20,14 @@ target_link_libraries(engines-changes-abstract PUBLIC columnshard-counters-common ydb-core-tablet_flat yql-core-expr_nodes + tx-columnshard-blobs_action tools-enum_parser-enum_serialization_runtime ) target_sources(engines-changes-abstract PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/changes/abstract/abstract.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/changes/abstract/mark.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/changes/abstract/compaction_info.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/changes/abstract/settings.cpp ) generate_enum_serilization(engines-changes-abstract ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/changes/abstract/abstract.h diff --git a/ydb/core/tx/columnshard/engines/changes/abstract/CMakeLists.linux-aarch64.txt b/ydb/core/tx/columnshard/engines/changes/abstract/CMakeLists.linux-aarch64.txt index 530b75e0b99..23b63ed248d 100644 --- a/ydb/core/tx/columnshard/engines/changes/abstract/CMakeLists.linux-aarch64.txt +++ b/ydb/core/tx/columnshard/engines/changes/abstract/CMakeLists.linux-aarch64.txt @@ -21,12 +21,14 @@ target_link_libraries(engines-changes-abstract PUBLIC columnshard-counters-common ydb-core-tablet_flat yql-core-expr_nodes + tx-columnshard-blobs_action tools-enum_parser-enum_serialization_runtime ) target_sources(engines-changes-abstract PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/changes/abstract/abstract.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/changes/abstract/mark.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/changes/abstract/compaction_info.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/changes/abstract/settings.cpp ) generate_enum_serilization(engines-changes-abstract ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/changes/abstract/abstract.h diff --git a/ydb/core/tx/columnshard/engines/changes/abstract/CMakeLists.linux-x86_64.txt b/ydb/core/tx/columnshard/engines/changes/abstract/CMakeLists.linux-x86_64.txt index 530b75e0b99..23b63ed248d 100644 --- a/ydb/core/tx/columnshard/engines/changes/abstract/CMakeLists.linux-x86_64.txt +++ b/ydb/core/tx/columnshard/engines/changes/abstract/CMakeLists.linux-x86_64.txt @@ -21,12 +21,14 @@ target_link_libraries(engines-changes-abstract PUBLIC columnshard-counters-common ydb-core-tablet_flat yql-core-expr_nodes + tx-columnshard-blobs_action tools-enum_parser-enum_serialization_runtime ) target_sources(engines-changes-abstract PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/changes/abstract/abstract.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/changes/abstract/mark.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/changes/abstract/compaction_info.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/changes/abstract/settings.cpp ) generate_enum_serilization(engines-changes-abstract ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/changes/abstract/abstract.h diff --git a/ydb/core/tx/columnshard/engines/changes/abstract/CMakeLists.windows-x86_64.txt b/ydb/core/tx/columnshard/engines/changes/abstract/CMakeLists.windows-x86_64.txt index 36141798f17..aa8e7dda1cc 100644 --- a/ydb/core/tx/columnshard/engines/changes/abstract/CMakeLists.windows-x86_64.txt +++ b/ydb/core/tx/columnshard/engines/changes/abstract/CMakeLists.windows-x86_64.txt @@ -20,12 +20,14 @@ target_link_libraries(engines-changes-abstract PUBLIC columnshard-counters-common ydb-core-tablet_flat yql-core-expr_nodes + tx-columnshard-blobs_action tools-enum_parser-enum_serialization_runtime ) target_sources(engines-changes-abstract PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/changes/abstract/abstract.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/changes/abstract/mark.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/changes/abstract/compaction_info.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/changes/abstract/settings.cpp ) generate_enum_serilization(engines-changes-abstract ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/changes/abstract/abstract.h diff --git a/ydb/core/tx/columnshard/engines/changes/abstract/abstract.h b/ydb/core/tx/columnshard/engines/changes/abstract/abstract.h index c0be64bb75e..4ef0e00ad7e 100644 --- a/ydb/core/tx/columnshard/engines/changes/abstract/abstract.h +++ b/ydb/core/tx/columnshard/engines/changes/abstract/abstract.h @@ -1,5 +1,7 @@ #pragma once #include "mark.h" +#include "settings.h" +#include <ydb/core/tx/columnshard/blobs_action/abstract/action.h> #include <ydb/core/tx/columnshard/counters/indexation.h> #include <ydb/core/tx/columnshard/engines/columns_table.h> #include <ydb/core/tx/columnshard/engines/portions/portion_info.h> @@ -30,52 +32,16 @@ class TColumnEngineForLogs; class TVersionedIndex; class TPortionInfoWithBlobs; -struct TCompactionLimits { - static constexpr const ui64 MIN_GOOD_BLOB_SIZE = 256 * 1024; // some BlobStorage constant - static constexpr const ui64 MAX_BLOB_SIZE = 8 * 1024 * 1024; // some BlobStorage constant - static constexpr const ui64 EVICT_HOT_PORTION_BYTES = 1 * 1024 * 1024; - static constexpr const ui64 DEFAULT_EVICTION_BYTES = 64 * 1024 * 1024; - static constexpr const ui64 MAX_BLOBS_TO_DELETE = 10000; - - static constexpr const ui64 OVERLOAD_INSERT_TABLE_SIZE_BY_PATH_ID = 1024 * MAX_BLOB_SIZE; - static constexpr const ui64 WARNING_INSERT_TABLE_SIZE_BY_PATH_ID = 0.3 * OVERLOAD_INSERT_TABLE_SIZE_BY_PATH_ID; - static constexpr const ui64 WARNING_INSERT_TABLE_COUNT_BY_PATH_ID = 100; - - static constexpr const i64 OVERLOAD_GRANULE_SIZE = 20 * MAX_BLOB_SIZE; - static constexpr const i64 WARNING_OVERLOAD_GRANULE_SIZE = 0.25 * OVERLOAD_GRANULE_SIZE; - - static constexpr const i64 WARNING_INSERTED_PORTIONS_SIZE = 0.5 * WARNING_OVERLOAD_GRANULE_SIZE; - static constexpr const ui32 WARNING_INSERTED_PORTIONS_COUNT = 100; - static constexpr const TDuration CompactionTimeout = TDuration::Minutes(3); - - ui32 GoodBlobSize{MIN_GOOD_BLOB_SIZE}; - ui32 GranuleBlobSplitSize{MAX_BLOB_SIZE}; - - ui32 InGranuleCompactSeconds = 2 * 60; // Trigger in-granule compaction to guarantee no PK intersections - - i64 GranuleOverloadSize = OVERLOAD_GRANULE_SIZE; - i64 GranuleSizeForOverloadPrevent = WARNING_OVERLOAD_GRANULE_SIZE; - i64 GranuleIndexedPortionsSizeLimit = WARNING_INSERTED_PORTIONS_SIZE; - ui32 GranuleIndexedPortionsCountLimit = WARNING_INSERTED_PORTIONS_COUNT; - - TSplitSettings GetSplitSettings() const { - return TSplitSettings() - .SetMinBlobSize(0.5 * std::min<ui64>(MAX_BLOB_SIZE, GranuleSizeForOverloadPrevent)) - .SetMaxBlobSize(std::min<ui64>(MAX_BLOB_SIZE, GranuleSizeForOverloadPrevent)) - .SetMaxPortionSize(0.5 * GranuleSizeForOverloadPrevent); - } -}; - struct TPortionEvictionFeatures { const TString TargetTierName; const ui64 PathId; // portion path id for cold-storage-key construct - bool NeedExport = false; bool DataChanges = true; + const std::shared_ptr<IBlobsStorageOperator> StorageOperator; - TPortionEvictionFeatures(const TString& targetTierName, const ui64 pathId, const bool needExport) + TPortionEvictionFeatures(const TString& targetTierName, const ui64 pathId, const std::shared_ptr<IBlobsStorageOperator>& storageOperator) : TargetTierName(targetTierName) , PathId(pathId) - , NeedExport(needExport) + , StorageOperator(storageOperator) {} }; @@ -200,8 +166,20 @@ protected: } + TBlobsAction BlobsAction; + virtual NColumnShard::ECumulativeCounters GetCounterIndex(const bool isSuccess) const = 0; public: + TBlobsAction& GetBlobsAction() { + return BlobsAction; + } + + TColumnEngineChanges(const std::shared_ptr<IStoragesManager>& storagesManager) + : BlobsAction(storagesManager) + { + + } + TConclusionStatus ConstructBlobs(TConstructionContext& context) noexcept; virtual ~TColumnEngineChanges(); @@ -234,7 +212,11 @@ public: THashMap<TBlobRange, TString> Blobs; - virtual THashSet<TBlobRange> GetReadBlobRanges() const = 0; + std::vector<std::shared_ptr<IBlobsReadingAction>> GetReadingActions() const { + auto result = BlobsAction.GetReadingActions(); + Y_VERIFY(result.size()); + return result; + } virtual TString TypeString() const = 0; TString DebugString() const; diff --git a/ydb/core/tx/columnshard/engines/changes/abstract/settings.cpp b/ydb/core/tx/columnshard/engines/changes/abstract/settings.cpp new file mode 100644 index 00000000000..36ba5be4f7e --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/abstract/settings.cpp @@ -0,0 +1,5 @@ +#include "settings.h" + +namespace NKikimr::NOlap { + +} diff --git a/ydb/core/tx/columnshard/engines/changes/abstract/settings.h b/ydb/core/tx/columnshard/engines/changes/abstract/settings.h new file mode 100644 index 00000000000..01714771344 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/abstract/settings.h @@ -0,0 +1,44 @@ +#pragma once +#include <ydb/core/tx/columnshard/splitter/settings.h> +#include <util/datetime/base.h> +#include <util/system/types.h> +#include <utility> +namespace NKikimr::NOlap { + +struct TCompactionLimits { + static constexpr const ui64 MIN_GOOD_BLOB_SIZE = 256 * 1024; // some BlobStorage constant + static constexpr const ui64 MAX_BLOB_SIZE = 8 * 1024 * 1024; // some BlobStorage constant + static constexpr const ui64 EVICT_HOT_PORTION_BYTES = 1 * 1024 * 1024; + static constexpr const ui64 DEFAULT_EVICTION_BYTES = 64 * 1024 * 1024; + static constexpr const ui64 MAX_BLOBS_TO_DELETE = 10000; + + static constexpr const ui64 OVERLOAD_INSERT_TABLE_SIZE_BY_PATH_ID = 1024 * MAX_BLOB_SIZE; + static constexpr const ui64 WARNING_INSERT_TABLE_SIZE_BY_PATH_ID = 0.3 * OVERLOAD_INSERT_TABLE_SIZE_BY_PATH_ID; + static constexpr const ui64 WARNING_INSERT_TABLE_COUNT_BY_PATH_ID = 100; + + static constexpr const i64 OVERLOAD_GRANULE_SIZE = 20 * MAX_BLOB_SIZE; + static constexpr const i64 WARNING_OVERLOAD_GRANULE_SIZE = 0.25 * OVERLOAD_GRANULE_SIZE; + + static constexpr const i64 WARNING_INSERTED_PORTIONS_SIZE = 0.5 * WARNING_OVERLOAD_GRANULE_SIZE; + static constexpr const ui32 WARNING_INSERTED_PORTIONS_COUNT = 100; + static constexpr const TDuration CompactionTimeout = TDuration::Minutes(3); + + ui32 GoodBlobSize{MIN_GOOD_BLOB_SIZE}; + ui32 GranuleBlobSplitSize{MAX_BLOB_SIZE}; + + ui32 InGranuleCompactSeconds = 2 * 60; // Trigger in-granule compaction to guarantee no PK intersections + + i64 GranuleOverloadSize = OVERLOAD_GRANULE_SIZE; + i64 GranuleSizeForOverloadPrevent = WARNING_OVERLOAD_GRANULE_SIZE; + i64 GranuleIndexedPortionsSizeLimit = WARNING_INSERTED_PORTIONS_SIZE; + ui32 GranuleIndexedPortionsCountLimit = WARNING_INSERTED_PORTIONS_COUNT; + + TSplitSettings GetSplitSettings() const { + return TSplitSettings() + .SetMinBlobSize(0.5 * std::min<ui64>(MAX_BLOB_SIZE, GranuleSizeForOverloadPrevent)) + .SetMaxBlobSize(std::min<ui64>(MAX_BLOB_SIZE, GranuleSizeForOverloadPrevent)) + .SetMaxPortionSize(0.5 * GranuleSizeForOverloadPrevent); + } +}; + +} diff --git a/ydb/core/tx/columnshard/engines/changes/abstract/ya.make b/ydb/core/tx/columnshard/engines/changes/abstract/ya.make index f5a086121c9..e20654d655e 100644 --- a/ydb/core/tx/columnshard/engines/changes/abstract/ya.make +++ b/ydb/core/tx/columnshard/engines/changes/abstract/ya.make @@ -4,12 +4,14 @@ SRCS( abstract.cpp mark.cpp compaction_info.cpp + settings.cpp ) PEERDIR( ydb/core/tx/columnshard/counters/common ydb/core/tablet_flat ydb/library/yql/core/expr_nodes + ydb/core/tx/columnshard/blobs_action ) GENERATE_ENUM_SERIALIZATION(abstract.h) diff --git a/ydb/core/tx/columnshard/engines/changes/cleanup.cpp b/ydb/core/tx/columnshard/engines/changes/cleanup.cpp index d129a79f8fc..be4fccccd6e 100644 --- a/ydb/core/tx/columnshard/engines/changes/cleanup.cpp +++ b/ydb/core/tx/columnshard/engines/changes/cleanup.cpp @@ -9,54 +9,32 @@ void TCleanupColumnEngineChanges::DoDebugString(TStringOutput& out) const { if (ui32 dropped = PortionsToDrop.size()) { out << "drop " << dropped << " portions"; for (auto& portionInfo : PortionsToDrop) { - out << portionInfo; + out << portionInfo->DebugString(); } } } -void TCleanupColumnEngineChanges::DoWriteIndex(NColumnShard::TColumnShard& self, TWriteIndexContext& context) { +void TCleanupColumnEngineChanges::DoWriteIndex(NColumnShard::TColumnShard& self, TWriteIndexContext& /*context*/) { self.IncCounter(NColumnShard::COUNTER_PORTIONS_ERASED, PortionsToDrop.size()); - THashSet<TUnifiedBlobId> blobsToDrop; - for (const auto& portionInfo : PortionsToDrop) { - for (const auto& rec : portionInfo.Records) { - const auto& blobId = rec.BlobRange.BlobId; - if (blobsToDrop.emplace(blobId).second) { - AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "Delete blob")("blob_id", blobId); - } + THashSet<TUnifiedBlobId> blobIds; + for (auto&& p : PortionsToDrop) { + auto removing = BlobsAction.GetRemoving(*p); + for (auto&& r : p->Records) { + removing->DeclareRemove(r.BlobRange.BlobId); } - self.IncCounter(NColumnShard::COUNTER_RAW_BYTES_ERASED, portionInfo.RawBytesSum()); - } - - for (const auto& blobId : blobsToDrop) { - if (self.BlobManager->DropOneToOne(blobId, *context.BlobManagerDb)) { - NColumnShard::TEvictMetadata meta; - auto evict = self.BlobManager->GetDropped(blobId, meta); - Y_VERIFY(evict.State != EEvictState::UNKNOWN); - Y_VERIFY(!meta.GetTierName().empty()); - - BlobsToForget[meta.GetTierName()].emplace(std::move(evict)); - - if (NOlap::IsDeleted(evict.State)) { - AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "SKIP delete blob")("blob_id", blobId); - continue; - } - } - self.BlobManager->DeleteBlob(blobId, *context.BlobManagerDb); - self.IncCounter(NColumnShard::COUNTER_BLOBS_ERASED); - self.IncCounter(NColumnShard::COUNTER_BYTES_ERASED, blobId.BlobSize()); + self.IncCounter(NColumnShard::COUNTER_RAW_BYTES_ERASED, p->RawBytesSum()); } } bool TCleanupColumnEngineChanges::DoApplyChanges(TColumnEngineForLogs& self, TApplyChangesContext& context) { - // Drop old portions - + THashSet<TUnifiedBlobId> blobIds; for (auto& portionInfo : PortionsToDrop) { - if (!self.ErasePortion(portionInfo)) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "Cannot erase portion")("portion", portionInfo.DebugString()); + if (!self.ErasePortion(*portionInfo)) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "Cannot erase portion")("portion", portionInfo->DebugString()); return false; } - for (auto& record : portionInfo.Records) { - self.ColumnsTable->Erase(context.DB, portionInfo, record); + for (auto& record : portionInfo->Records) { + self.ColumnsTable->Erase(context.DB, *portionInfo, record); } } @@ -67,8 +45,7 @@ void TCleanupColumnEngineChanges::DoStart(NColumnShard::TColumnShard& self) { self.BackgroundController.StartCleanup(); } -void TCleanupColumnEngineChanges::DoWriteIndexComplete(NColumnShard::TColumnShard& self, TWriteIndexCompleteContext& context) { - self.ForgetBlobs(context.ActorContext, BlobsToForget); +void TCleanupColumnEngineChanges::DoWriteIndexComplete(NColumnShard::TColumnShard& /*self*/, TWriteIndexCompleteContext& context) { context.TriggerActivity = NeedRepeat ? NColumnShard::TBackgroundActivity::Cleanup() : NColumnShard::TBackgroundActivity::None(); } diff --git a/ydb/core/tx/columnshard/engines/changes/cleanup.h b/ydb/core/tx/columnshard/engines/changes/cleanup.h index 42b1c058066..c245b471782 100644 --- a/ydb/core/tx/columnshard/engines/changes/cleanup.h +++ b/ydb/core/tx/columnshard/engines/changes/cleanup.h @@ -5,7 +5,9 @@ namespace NKikimr::NOlap { class TCleanupColumnEngineChanges: public TColumnEngineChanges { private: + using TBase = TColumnEngineChanges; THashMap<TString, THashSet<NOlap::TEvictedBlob>> BlobsToForget; + THashMap<TString, std::vector<std::shared_ptr<TPortionInfo>>> StoragePortions; protected: virtual void DoStart(NColumnShard::TColumnShard& self) override; virtual void DoOnFinish(NColumnShard::TColumnShard& self, TChangesFinishContext& context) override; @@ -23,21 +25,19 @@ protected: } virtual NColumnShard::ECumulativeCounters GetCounterIndex(const bool isSuccess) const override; public: + using TBase::TBase; + virtual THashSet<TPortionAddress> GetTouchedPortions() const override { THashSet<TPortionAddress> result; for (const auto& portionInfo : PortionsToDrop) { - result.emplace(portionInfo.GetAddress()); + result.emplace(portionInfo->GetAddress()); } return result; } - std::vector<TPortionInfo> PortionsToDrop; + std::vector<std::shared_ptr<TPortionInfo>> PortionsToDrop; bool NeedRepeat = false; - virtual THashSet<TBlobRange> GetReadBlobRanges() const override { - return {}; - } - virtual ui32 GetWritePortionsCount() const override { return 0; } diff --git a/ydb/core/tx/columnshard/engines/changes/compaction.cpp b/ydb/core/tx/columnshard/engines/changes/compaction.cpp index 8ad647e2916..8f62034e809 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction.cpp +++ b/ydb/core/tx/columnshard/engines/changes/compaction.cpp @@ -18,20 +18,6 @@ void TCompactColumnEngineChanges::DoDebugString(TStringOutput& out) const { } } -THashSet<TBlobRange> TCompactColumnEngineChanges::GetReadBlobRanges() const { - Y_VERIFY(SwitchedPortions.size()); - - THashSet<TBlobRange> result; - for (const auto& portionInfo : SwitchedPortions) { - Y_VERIFY(!portionInfo.Empty()); - - for (const auto& rec : portionInfo.Records) { - Y_VERIFY(result.emplace(rec.BlobRange).second); - } - } - return result; -} - void TCompactColumnEngineChanges::DoCompile(TFinalizationContext& context) { TBase::DoCompile(context); @@ -39,39 +25,10 @@ void TCompactColumnEngineChanges::DoCompile(TFinalizationContext& context) { for (auto& portionInfo : AppendedPortions) { portionInfo.GetPortionInfo().UpdateRecordsMeta(producedClassResultCompaction); } - for (auto& portionInfo : SwitchedPortions) { - Y_VERIFY(portionInfo.IsActive()); - portionInfo.SetRemoveSnapshot(context.GetSnapshot()); - } } bool TCompactColumnEngineChanges::DoApplyChanges(TColumnEngineForLogs& self, TApplyChangesContext& context) { - Y_VERIFY(TBase::DoApplyChanges(self, context)); - auto g = self.GranulesStorage->StartPackModification(); - for (auto& portionInfo : SwitchedPortions) { - Y_VERIFY(!portionInfo.Empty()); - Y_VERIFY(!portionInfo.IsActive()); - - const ui64 granule = portionInfo.GetGranule(); - const ui64 portion = portionInfo.GetPortion(); - - const TPortionInfo& oldInfo = self.GetGranuleVerified(granule).GetPortionVerified(portion); - - auto& granuleStart = self.Granules[granule]->Record.Mark; - - Y_VERIFY(granuleStart <= portionInfo.IndexKeyStart()); - self.UpsertPortion(portionInfo, &oldInfo); - - for (auto& record : portionInfo.Records) { - self.ColumnsTable->Write(context.DB, portionInfo, record); - } - } - - for (auto& portionInfo : SwitchedPortions) { - self.CleanupPortions.insert(portionInfo.GetAddress()); - } - - return true; + return TBase::DoApplyChanges(self, context); } ui32 TCompactColumnEngineChanges::NumSplitInto(const ui32 srcRows) const { @@ -81,25 +38,22 @@ ui32 TCompactColumnEngineChanges::NumSplitInto(const ui32 srcRows) const { return std::max<ui32>(2, numSplitInto); } -void TCompactColumnEngineChanges::DoWriteIndex(NColumnShard::TColumnShard& self, TWriteIndexContext& /*context*/) { - self.IncCounter(NColumnShard::COUNTER_PORTIONS_DEACTIVATED, SwitchedPortions.size()); - - THashSet<TUnifiedBlobId> blobsDeactivated; - for (auto& portionInfo : SwitchedPortions) { - for (auto& rec : portionInfo.Records) { - blobsDeactivated.insert(rec.BlobRange.BlobId); - } - self.IncCounter(NColumnShard::COUNTER_RAW_BYTES_DEACTIVATED, portionInfo.RawBytesSum()); - } - - self.IncCounter(NColumnShard::COUNTER_BLOBS_DEACTIVATED, blobsDeactivated.size()); - for (auto& blobId : blobsDeactivated) { - self.IncCounter(NColumnShard::COUNTER_BYTES_DEACTIVATED, blobId.BlobSize()); - } +void TCompactColumnEngineChanges::DoWriteIndex(NColumnShard::TColumnShard& self, TWriteIndexContext& context) { + TBase::DoWriteIndex(self, context); } void TCompactColumnEngineChanges::DoStart(NColumnShard::TColumnShard& self) { TBase::DoStart(self); + + Y_VERIFY(SwitchedPortions.size()); + for (const auto& p : SwitchedPortions) { + Y_VERIFY(!p.Empty()); + auto action = BlobsAction.GetReading(p); + for (const auto& rec : p.Records) { + action->AddRange(rec.BlobRange); + } + } + self.BackgroundController.StartCompaction(NKikimr::NOlap::TPlanCompactionInfo(GranuleMeta->GetPathId()), *this); NeedGranuleStatusProvide = true; GranuleMeta->OnCompactionStarted(); @@ -121,36 +75,19 @@ void TCompactColumnEngineChanges::DoOnFinish(NColumnShard::TColumnShard& self, T NeedGranuleStatusProvide = false; } -TCompactColumnEngineChanges::TCompactColumnEngineChanges(const TCompactionLimits& limits, std::shared_ptr<TGranuleMeta> granule, const TCompactionSrcGranule& srcGranule) - : TBase(limits.GetSplitSettings()) +TCompactColumnEngineChanges::TCompactColumnEngineChanges(const TCompactionLimits& limits, std::shared_ptr<TGranuleMeta> granule, const std::map<ui64, std::shared_ptr<TPortionInfo>>& portions, const TSaverContext& saverContext) + : TBase(limits.GetSplitSettings(), saverContext) , Limits(limits) , GranuleMeta(granule) - , SrcGranule(srcGranule) { Y_VERIFY(GranuleMeta); - SwitchedPortions.reserve(GranuleMeta->GetPortions().size()); - for (const auto& [_, portionInfo] : GranuleMeta->GetPortions()) { - if (portionInfo->IsActive()) { - SwitchedPortions.push_back(*portionInfo); - Y_VERIFY(portionInfo->GetGranule() == GranuleMeta->GetGranuleId()); - } - } - Y_VERIFY(SwitchedPortions.size()); -} - -TCompactColumnEngineChanges::TCompactColumnEngineChanges(const TCompactionLimits& limits, std::shared_ptr<TGranuleMeta> granule, const std::map<ui64, std::shared_ptr<TPortionInfo>>& portions) - : TBase(limits.GetSplitSettings()) - , Limits(limits) - , GranuleMeta(granule) -{ -// Y_VERIFY(GranuleMeta); - SwitchedPortions.reserve(portions.size()); for (const auto& [_, portionInfo] : portions) { Y_VERIFY(portionInfo->IsActive()); - SwitchedPortions.push_back(*portionInfo); - Y_VERIFY(!GranuleMeta || portionInfo->GetGranule() == GranuleMeta->GetGranuleId()); + SwitchedPortions.emplace_back(*portionInfo); + PortionsToRemove.emplace_back(*portionInfo); + Y_VERIFY(portionInfo->GetGranule() == GranuleMeta->GetGranuleId()); } Y_VERIFY(SwitchedPortions.size()); } diff --git a/ydb/core/tx/columnshard/engines/changes/compaction.h b/ydb/core/tx/columnshard/engines/changes/compaction.h index b97f9bc8c29..778a557ebc8 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction.h +++ b/ydb/core/tx/columnshard/engines/changes/compaction.h @@ -15,7 +15,6 @@ private: protected: const TCompactionLimits Limits; std::shared_ptr<TGranuleMeta> GranuleMeta; - std::optional<TCompactionSrcGranule> SrcGranule; virtual void DoStart(NColumnShard::TColumnShard& self) override; virtual void DoWriteIndexComplete(NColumnShard::TColumnShard& self, TWriteIndexCompleteContext& context) override; @@ -29,14 +28,11 @@ protected: NeedGranuleStatusProvide = false; } public: - virtual THashSet<TBlobRange> GetReadBlobRanges() const override; - std::vector<TPortionInfo> SwitchedPortions; // Portions that would be replaced by new ones virtual THashSet<TPortionAddress> GetTouchedPortions() const override; - TCompactColumnEngineChanges(const TCompactionLimits& limits, std::shared_ptr<TGranuleMeta> granule, const std::map<ui64, std::shared_ptr<TPortionInfo>>& portions); - TCompactColumnEngineChanges(const TCompactionLimits& limits, std::shared_ptr<TGranuleMeta> granule, const TCompactionSrcGranule& srcGranule); + TCompactColumnEngineChanges(const TCompactionLimits& limits, std::shared_ptr<TGranuleMeta> granule, const std::map<ui64, std::shared_ptr<TPortionInfo>>& portions, const TSaverContext& saverContext); ~TCompactColumnEngineChanges(); ui32 NumSplitInto(const ui32 srcRows) const; diff --git a/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp b/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp index 5f9352fe9cc..3fe4f8e5651 100644 --- a/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp +++ b/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp @@ -14,7 +14,6 @@ namespace NKikimr::NOlap::NCompaction { TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstructionContext& context) noexcept { - const ui64 pathId = GranuleMeta->GetPathId(); std::vector<TPortionInfoWithBlobs> portions = TPortionInfoWithBlobs::RestorePortions(SwitchedPortions, Blobs); std::optional<TSnapshot> maxSnapshot; for (auto&& i : SwitchedPortions) { @@ -80,13 +79,12 @@ TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstruc } std::map<std::string, std::vector<TColumnPortionResult>> columnChunks; - const auto saverContext = GetSaverContext(pathId); for (auto&& f : resultSchema->GetSchema()->fields()) { const ui32 columnId = resultSchema->GetColumnId(f->name()); auto columnInfo = stats.GetColumnInfo(columnId); Y_VERIFY(columnInfo); - TColumnMergeContext context(resultSchema, portionRecordsCountLimit, 50 * 1024 * 1024, f, *columnInfo, saverContext); + TColumnMergeContext context(resultSchema, portionRecordsCountLimit, 50 * 1024 * 1024, f, *columnInfo, SaverContext); TMergedColumn mColumn(context); auto c = batchResult->GetColumnByName(f->name()); if (c) { @@ -122,7 +120,7 @@ TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstruc } std::vector<TGeneralSerializedSlice> batchSlices; - std::shared_ptr<TDefaultSchemaDetails> schemaDetails(new TDefaultSchemaDetails(resultSchema, saverContext, std::move(stats))); + std::shared_ptr<TDefaultSchemaDetails> schemaDetails(new TDefaultSchemaDetails(resultSchema, SaverContext, std::move(stats))); for (ui32 i = 0; i < columnChunks.begin()->second.size(); ++i) { std::map<ui32, std::vector<IPortionColumnChunk::TPtr>> portionColumns; @@ -140,8 +138,8 @@ TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstruc TGeneralSerializedSlice slice(std::move(i)); std::vector<std::vector<IPortionColumnChunk::TPtr>> chunksByBlobs = slice.GroupChunksByBlobs(); auto b = batchResult->Slice(recordIdx, slice.GetRecordsCount()); - AppendedPortions.emplace_back(TPortionInfoWithBlobs::BuildByBlobs(chunksByBlobs, nullptr, GranuleMeta->GetGranuleId(), *maxSnapshot)); - AppendedPortions.back().GetPortionInfo().AddMetadata(*resultSchema, b, saverContext.GetTierName()); + AppendedPortions.emplace_back(TPortionInfoWithBlobs::BuildByBlobs(chunksByBlobs, nullptr, GranuleMeta->GetGranuleId(), *maxSnapshot, SaverContext.GetStorageOperator())); + AppendedPortions.back().GetPortionInfo().AddMetadata(*resultSchema, b, SaverContext.GetTierName()); recordIdx += slice.GetRecordsCount(); } if (IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD)) { diff --git a/ydb/core/tx/columnshard/engines/changes/indexation.cpp b/ydb/core/tx/columnshard/engines/changes/indexation.cpp index b7e366d1b40..c8ed28bfd5f 100644 --- a/ydb/core/tx/columnshard/engines/changes/indexation.cpp +++ b/ydb/core/tx/columnshard/engines/changes/indexation.cpp @@ -7,16 +7,6 @@ namespace NKikimr::NOlap { -THashSet<TBlobRange> TInsertColumnEngineChanges::GetReadBlobRanges() const { - THashSet<TBlobRange> result; - for (size_t i = 0; i < DataToIndex.size(); ++i) { - const auto& insertedData = DataToIndex[i]; - Y_VERIFY(insertedData.GetBlobRange().IsFullBlob()); - Y_VERIFY(result.emplace(insertedData.GetBlobRange()).second); - } - return result; -} - bool TInsertColumnEngineChanges::DoApplyChanges(TColumnEngineForLogs& self, TApplyChangesContext& context) { if (!TBase::DoApplyChanges(self, context)) { return false; @@ -29,7 +19,6 @@ void TInsertColumnEngineChanges::DoWriteIndex(NColumnShard::TColumnShard& self, for (const auto& insertedData : DataToIndex) { self.InsertTable->EraseCommitted(context.DBWrapper, insertedData); Y_VERIFY(insertedData.GetBlobRange().IsFullBlob()); - self.BlobManager->DeleteBlob(insertedData.GetBlobRange().GetBlobId(), *context.BlobManagerDb); } if (!DataToIndex.empty()) { self.UpdateInsertTableCounters(); @@ -52,6 +41,16 @@ bool TInsertColumnEngineChanges::AddPathIfNotExists(ui64 pathId) { void TInsertColumnEngineChanges::DoStart(NColumnShard::TColumnShard& self) { TBase::DoStart(self); + Y_VERIFY(DataToIndex.size()); + auto removing = BlobsAction.GetRemoving(IStoragesManager::DefaultStorageId); + auto reading = BlobsAction.GetReading(IStoragesManager::DefaultStorageId); + for (size_t i = 0; i < DataToIndex.size(); ++i) { + const auto& insertedData = DataToIndex[i]; + Y_VERIFY(insertedData.GetBlobRange().IsFullBlob()); + reading->AddRange(insertedData.GetBlobRange()); + removing->DeclareRemove(insertedData.GetBlobRange().GetBlobId()); + } + self.BackgroundController.StartIndexing(*this); } @@ -122,7 +121,7 @@ TConclusionStatus TInsertColumnEngineChanges::DoConstructBlobs(TConstructionCont auto granuleBatches = TMarksGranules::SliceIntoGranules(merged, PathToGranule[pathId], resultSchema->GetIndexInfo()); for (auto& [granule, batch] : granuleBatches) { - auto portions = MakeAppendedPortions(pathId, batch, granule, maxSnapshot, nullptr, context); + auto portions = MakeAppendedPortions(batch, granule, maxSnapshot, nullptr, context); Y_VERIFY(portions.size() > 0); for (auto& portion : portions) { AppendedPortions.emplace_back(std::move(portion)); diff --git a/ydb/core/tx/columnshard/engines/changes/indexation.h b/ydb/core/tx/columnshard/engines/changes/indexation.h index 6b48ef2fd70..4aa50998bfb 100644 --- a/ydb/core/tx/columnshard/engines/changes/indexation.h +++ b/ydb/core/tx/columnshard/engines/changes/indexation.h @@ -12,7 +12,6 @@ private: std::shared_ptr<arrow::RecordBatch> AddSpecials(const std::shared_ptr<arrow::RecordBatch>& srcBatch, const TIndexInfo& indexInfo, const TInsertedData& inserted) const; std::vector<NOlap::TInsertedData> DataToIndex; - protected: virtual void DoStart(NColumnShard::TColumnShard& self) override; virtual void DoWriteIndexComplete(NColumnShard::TColumnShard& self, TWriteIndexCompleteContext& context) override; @@ -25,11 +24,11 @@ public: const TMark DefaultMark; THashMap<ui64, std::vector<std::pair<TMark, ui64>>> PathToGranule; // pathId -> {mark, granule} public: - TInsertColumnEngineChanges(const TMark& defaultMark, std::vector<NOlap::TInsertedData>&& dataToIndex, const TSplitSettings& splitSettings) - : TBase(splitSettings) + TInsertColumnEngineChanges(const TMark& defaultMark, std::vector<NOlap::TInsertedData>&& dataToIndex, const TSplitSettings& splitSettings, const TSaverContext& saverContext) + : TBase(splitSettings, saverContext) , DataToIndex(std::move(dataToIndex)) - , DefaultMark(defaultMark) { - + , DefaultMark(defaultMark) + { } const std::vector<NOlap::TInsertedData>& GetDataToIndex() const { @@ -40,7 +39,6 @@ public: return TBase::GetTouchedPortions(); } - virtual THashSet<TBlobRange> GetReadBlobRanges() const override; virtual TString TypeString() const override { return "INSERT"; } diff --git a/ydb/core/tx/columnshard/engines/changes/ttl.cpp b/ydb/core/tx/columnshard/engines/changes/ttl.cpp index ed8ff71be6d..33dd6d78245 100644 --- a/ydb/core/tx/columnshard/engines/changes/ttl.cpp +++ b/ydb/core/tx/columnshard/engines/changes/ttl.cpp @@ -12,136 +12,23 @@ void TTTLColumnEngineChanges::DoDebugString(TStringOutput& out) const { if (PortionsToEvict.size()) { out << "eviction=(count=" << PortionsToEvict.size() << ";portions=["; for (auto& info : PortionsToEvict) { - out << info.GetActualPortionInfo() << ";to=" << info.GetFeatures().TargetTierName << ";"; + out << info.GetPortionInfo() << ";to=" << info.GetFeatures().TargetTierName << ";"; } out << "];"; } } -THashSet<TBlobRange> TTTLColumnEngineChanges::GetReadBlobRanges() const { - Y_VERIFY(PortionsToEvict.size()); - - THashSet<TBlobRange> result; +void TTTLColumnEngineChanges::DoStart(NColumnShard::TColumnShard& self) { + Y_VERIFY(PortionsToEvict.size() || PortionsToRemove.size()); for (const auto& p : PortionsToEvict) { Y_VERIFY(!p.GetPortionInfo().Empty()); + PortionsToRemove.emplace_back(p.GetPortionInfo()); + auto agent = BlobsAction.GetReading(p.GetPortionInfo()); for (const auto& rec : p.GetPortionInfo().Records) { - Y_VERIFY(result.emplace(rec.BlobRange).second); - } - } - return result; -} - -bool TTTLColumnEngineChanges::DoApplyChanges(TColumnEngineForLogs& self, TApplyChangesContext& context) { - if (!TBase::DoApplyChanges(self, context)) { - return false; - } - - for (auto& info : PortionsToEvict) { - auto& portionInfo = info.GetActualPortionInfo(); - const ui64 granule = portionInfo.GetGranule(); - const ui64 portion = portionInfo.GetPortion(); - if (!self.IsPortionExists(granule, portion)) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "Cannot evict unknown portion")("portion", portionInfo.DebugString()); - return false; - } - - const TPortionInfo& oldInfo = self.GetGranuleVerified(granule).GetPortionVerified(portion); - Y_VERIFY(oldInfo.IsActive()); - Y_VERIFY(portionInfo.GetMeta().GetTierName() != oldInfo.GetMeta().GetTierName()); - - self.UpsertPortion(portionInfo, &oldInfo); - - for (auto& record : portionInfo.Records) { - self.ColumnsTable->Write(context.DB, portionInfo, record); - } - } - - return true; -} - -void TTTLColumnEngineChanges::DoWriteIndex(NColumnShard::TColumnShard& self, TWriteIndexContext& context) { - TBase::DoWriteIndex(self, context); - THashMap<TUnifiedBlobId, NOlap::TPortionEvictionFeatures> blobsToExport; - THashSet<TUnifiedBlobId> protectedBlobs; - - self.IncCounter(NColumnShard::COUNTER_EVICTION_PORTIONS_WRITTEN, PortionsToEvict.size()); - for (const auto& info : PortionsToEvict) { - const auto& portionInfo = info.GetPortionWithBlobs().GetPortionInfo(); - const auto& evictionFeatures = info.GetFeatures(); - // Mark exported blobs - if (evictionFeatures.NeedExport) { - auto& tierName = portionInfo.GetMeta().GetTierName(); - Y_VERIFY(!tierName.empty()); - - for (auto& rec : portionInfo.Records) { - auto& blobId = rec.BlobRange.BlobId; - if (!blobsToExport.contains(blobId)) { - NKikimrTxColumnShard::TEvictMetadata meta; - meta.SetTierName(tierName); - - NOlap::TEvictedBlob evict{ - .State = EEvictState::EVICTING, - .Blob = blobId, - .ExternBlob = blobId.MakeS3BlobId(evictionFeatures.PathId) - }; - if (self.BlobManager->ExportOneToOne(std::move(evict), meta, *context.BlobManagerDb)) { - blobsToExport.emplace(blobId, evictionFeatures); - } else { - // TODO: support S3 -> S3 eviction - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "Prevent evict evicted blob")("blob_id", blobId); - protectedBlobs.insert(blobId); - } - } - } + agent->AddRange(rec.BlobRange); } } - // Note: RAW_BYTES_ERASED and BYTES_ERASED counters are not in sync for evicted data - THashSet<TUnifiedBlobId> blobsToDrop; - for (const auto& rec : EvictedRecords) { - const auto& blobId = rec.BlobRange.BlobId; - if (blobsToExport.contains(blobId)) { - // Eviction to S3. TTxExportFinish will delete src blob when dst blob get EEvictState::EXTERN state. - } else if (!protectedBlobs.contains(blobId)) { - // We could drop the blob immediately - if (blobsToDrop.emplace(blobId).second) { - AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "Delete evicted blob")("blob_id", blobId); - } - - } - } - - if (blobsToExport.size()) { - for (auto& [blobId, evFeatures] : blobsToExport) { - ExportTierBlobs[evFeatures.TargetTierName][evFeatures.PathId].emplace(blobId); - } - blobsToExport.clear(); - - ui32 numExports = 0; - for (auto& [tierName, pathBlobs] : ExportTierBlobs) { - numExports += pathBlobs.size(); - } - - ExportNo = self.LastExportNo; - self.LastExportNo += numExports; - - // Do not start new TTL till we finish current tx. TODO: check if this protection needed - Y_VERIFY(!self.ActiveEvictions, "Unexpected active evictions count at tablet %lu", self.TabletID()); - self.ActiveEvictions += numExports; - - NIceDb::TNiceDb db(context.Txc.DB); - NColumnShard::Schema::SaveSpecialValue(db, NColumnShard::Schema::EValueIds::LastExportNumber, self.LastExportNo); - } -} - -void TTTLColumnEngineChanges::DoCompile(TFinalizationContext& context) { - TBase::DoCompile(context); - for (auto& info : PortionsToEvict) { - info.GetPortionWithBlobs().GetPortionInfo().UpdateRecordsMeta(TPortionMeta::EProduced::EVICTED); - } -} - -void TTTLColumnEngineChanges::DoStart(NColumnShard::TColumnShard& self) { self.BackgroundController.StartTtl(*this); } @@ -149,26 +36,7 @@ void TTTLColumnEngineChanges::DoOnFinish(NColumnShard::TColumnShard& self, TChan self.BackgroundController.FinishTtl(); } -void TTTLColumnEngineChanges::DoWriteIndexComplete(NColumnShard::TColumnShard& self, TWriteIndexCompleteContext& context) { - TBase::DoWriteIndexComplete(self, context); - for (auto& [tierName, pathBlobs] : ExportTierBlobs) { - for (auto& [pathId, blobs] : pathBlobs) { - ++ExportNo; - Y_VERIFY(pathId); - auto event = std::make_unique<NColumnShard::TEvPrivate::TEvExport>(ExportNo, tierName, pathId, std::move(blobs)); - self.ExportBlobs(context.ActorContext, std::move(event)); - } - self.ActiveEvictions -= pathBlobs.size(); - } - if (ExportTierBlobs.size()) { - Y_VERIFY(!self.ActiveEvictions, "Unexpected active evictions count at tablet %lu", self.TabletID()); - } - - self.IncCounter(NColumnShard::COUNTER_EVICTION_BLOBS_WRITTEN, context.BlobsWritten); - self.IncCounter(NColumnShard::COUNTER_EVICTION_BYTES_WRITTEN, context.BytesWritten); -} - -bool TTTLColumnEngineChanges::UpdateEvictedPortion(TPortionForEviction& info, const THashMap<TBlobRange, TString>& srcBlobs, std::vector<TColumnRecord>& evictedRecords, +std::optional<TPortionInfoWithBlobs> TTTLColumnEngineChanges::UpdateEvictedPortion(TPortionForEviction& info, const THashMap<TBlobRange, TString>& srcBlobs, TConstructionContext& context) const { const TPortionInfo& portionInfo = info.GetPortionInfo(); auto& evictFeatures = info.GetFeatures(); @@ -178,58 +46,39 @@ bool TTTLColumnEngineChanges::UpdateEvictedPortion(TPortionForEviction& info, co Y_VERIFY(tiering); auto compression = tiering->GetCompression(evictFeatures.TargetTierName); if (!compression) { - // Noting to recompress. We have no other kinds of evictions yet. + // Nothing to recompress. We have no other kinds of evictions yet. evictFeatures.DataChanges = false; - info.SetPortionWithBlobs(TPortionInfoWithBlobs::RestorePortion(portionInfo, srcBlobs)); - info.GetPortionWithBlobs().GetPortionInfo().MutableMeta().SetTierName(evictFeatures.TargetTierName); - return true; + auto result = TPortionInfoWithBlobs::RestorePortion(portionInfo, srcBlobs); + result.GetPortionInfo().InitOperator(evictFeatures.StorageOperator, true); + result.GetPortionInfo().MutableMeta().SetTierName(evictFeatures.TargetTierName); + return result; } - Y_VERIFY(!evictFeatures.NeedExport); - - TPortionInfo undo = portionInfo; - - auto blobSchema = context.SchemaVersions.GetSchema(undo.GetMinSnapshot()); + auto blobSchema = context.SchemaVersions.GetSchema(portionInfo.GetMinSnapshot()); auto resultSchema = context.SchemaVersions.GetLastSchema(); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("portion_for_eviction", portionInfo.DebugString()); auto batch = portionInfo.AssembleInBatch(*blobSchema, *resultSchema, srcBlobs); - TSaverContext saverContext; + TSaverContext saverContext(evictFeatures.StorageOperator, SaverContext.GetStoragesManager()); saverContext.SetTierName(evictFeatures.TargetTierName).SetExternalCompression(compression); auto withBlobs = TPortionInfoWithBlobs::RestorePortion(portionInfo, srcBlobs); + withBlobs.GetPortionInfo().InitOperator(evictFeatures.StorageOperator, true); withBlobs.GetPortionInfo().MutableMeta().SetTierName(evictFeatures.TargetTierName); - std::optional<TPortionInfoWithBlobs> actualPortion = withBlobs.ChangeSaver(resultSchema, saverContext); - if (!actualPortion) { - return false; - } - info.SetPortionWithBlobs(std::move(*actualPortion)); - - for (auto& rec : undo.Records) { - evictedRecords.emplace_back(std::move(rec)); - } - - return true; + return withBlobs.ChangeSaver(resultSchema, saverContext); } NKikimr::TConclusionStatus TTTLColumnEngineChanges::DoConstructBlobs(TConstructionContext& context) noexcept { - Y_VERIFY(!Blobs.empty()); // src data - Y_VERIFY(!PortionsToEvict.empty()); // src meta - Y_VERIFY(EvictedRecords.empty()); // dst meta - - auto baseResult = TBase::DoConstructBlobs(context); - Y_VERIFY(baseResult.Ok()); - - std::vector<TPortionForEviction> evicted; - evicted.reserve(PortionsToEvict.size()); + Y_VERIFY(!Blobs.empty()); + Y_VERIFY(!PortionsToEvict.empty()); for (auto&& info : PortionsToEvict) { - if (UpdateEvictedPortion(info, Blobs, EvictedRecords, context)) { - Y_VERIFY(info.GetPortionWithBlobs().GetPortionInfo().GetMeta().GetTierName() == info.GetFeatures().TargetTierName); - evicted.emplace_back(std::move(info)); + if (auto pwb = UpdateEvictedPortion(info, Blobs, context)) { + PortionsToRemove.emplace_back(info.GetPortionInfo()); + AppendedPortions.emplace_back(std::move(*pwb)); } } - PortionsToEvict.swap(evicted); + PortionsToEvict.clear(); return TConclusionStatus::Success(); } diff --git a/ydb/core/tx/columnshard/engines/changes/ttl.h b/ydb/core/tx/columnshard/engines/changes/ttl.h index c03a0b09583..8d71dc736ea 100644 --- a/ydb/core/tx/columnshard/engines/changes/ttl.h +++ b/ydb/core/tx/columnshard/engines/changes/ttl.h @@ -1,21 +1,19 @@ #pragma once -#include "cleanup.h" +#include "compaction.h" #include <ydb/core/tx/columnshard/engines/scheme/tier_info.h> namespace NKikimr::NOlap { -class TTTLColumnEngineChanges: public TCleanupColumnEngineChanges { +class TTTLColumnEngineChanges: public TChangesWithAppend { private: using TPathIdBlobs = THashMap<ui64, THashSet<TUnifiedBlobId>>; - using TBase = TCleanupColumnEngineChanges; + using TBase = TChangesWithAppend; THashMap<TString, TPathIdBlobs> ExportTierBlobs; - ui64 ExportNo = 0; class TPortionForEviction { private: TPortionInfo PortionInfo; TPortionEvictionFeatures Features; - std::optional<TPortionInfoWithBlobs> PortionWithBlobs; public: TPortionForEviction(const TPortionInfo& portion, TPortionEvictionFeatures&& features) : PortionInfo(portion) @@ -33,43 +31,19 @@ private: } const TPortionInfo& GetPortionInfo() const { - Y_VERIFY(!PortionWithBlobs); return PortionInfo; } - - void SetPortionWithBlobs(TPortionInfoWithBlobs&& data) { - Y_VERIFY(!PortionWithBlobs); - PortionWithBlobs = std::move(data); - } - - TPortionInfoWithBlobs& GetPortionWithBlobs() { - Y_VERIFY(PortionWithBlobs); - return *PortionWithBlobs; - } - - const TPortionInfoWithBlobs& GetPortionWithBlobs() const { - Y_VERIFY(PortionWithBlobs); - return *PortionWithBlobs; - } - - const TPortionInfo& GetActualPortionInfo() const { - return PortionWithBlobs ? PortionWithBlobs->GetPortionInfo() : PortionInfo; - } }; - bool UpdateEvictedPortion(TPortionForEviction& info, const THashMap<TBlobRange, TString>& srcBlobs, - std::vector<TColumnRecord>& evictedRecords, TConstructionContext& context) const; + std::optional<TPortionInfoWithBlobs> UpdateEvictedPortion(TPortionForEviction& info, const THashMap<TBlobRange, TString>& srcBlobs, + TConstructionContext& context) const; std::vector<TPortionForEviction> PortionsToEvict; // {portion, TPortionEvictionFeatures} protected: - virtual void DoWriteIndexComplete(NColumnShard::TColumnShard& self, TWriteIndexCompleteContext& context) override; - virtual void DoCompile(TFinalizationContext& context) override; virtual void DoStart(NColumnShard::TColumnShard& self) override; virtual void DoOnFinish(NColumnShard::TColumnShard& self, TChangesFinishContext& context) override; - virtual bool DoApplyChanges(TColumnEngineForLogs& self, TApplyChangesContext& context) override; virtual void DoDebugString(TStringOutput& out) const override; - virtual void DoWriteIndex(NColumnShard::TColumnShard& self, TWriteIndexContext& context) override; virtual TConclusionStatus DoConstructBlobs(TConstructionContext& context) noexcept override; virtual NColumnShard::ECumulativeCounters GetCounterIndex(const bool isSuccess) const override; public: @@ -84,35 +58,23 @@ public: return result; } - std::vector<TColumnRecord> EvictedRecords; THashMap<ui64, NOlap::TTiering> Tiering; - virtual THashSet<TBlobRange> GetReadBlobRanges() const override; - - void AddPortionToEvict(const TPortionInfo& info, TPortionEvictionFeatures&& features) { - Y_VERIFY(!info.Empty()); - Y_VERIFY(info.IsActive()); - PortionsToEvict.emplace_back(info, std::move(features)); - } ui32 GetPortionsToEvictCount() const { return PortionsToEvict.size(); } - virtual ui32 GetWritePortionsCount() const override { - return PortionsToEvict.size(); - } - virtual TPortionInfoWithBlobs* GetWritePortionInfo(const ui32 index) override { - Y_VERIFY(index < PortionsToEvict.size()); - return &PortionsToEvict[index].GetPortionWithBlobs(); - } - virtual bool NeedWritePortion(const ui32 index) const override { - Y_VERIFY(index < PortionsToEvict.size()); - return PortionsToEvict[index].GetFeatures().DataChanges; + void AddPortionToEvict(const TPortionInfo& info, TPortionEvictionFeatures&& features) { + Y_VERIFY(!info.Empty()); + Y_VERIFY(info.IsActive()); + PortionsToEvict.emplace_back(info, std::move(features)); } virtual TString TypeString() const override { return "TTL"; } + + using TBase::TBase; }; } diff --git a/ydb/core/tx/columnshard/engines/changes/with_appended.cpp b/ydb/core/tx/columnshard/engines/changes/with_appended.cpp index a16552bc53b..a354cf5ceab 100644 --- a/ydb/core/tx/columnshard/engines/changes/with_appended.cpp +++ b/ydb/core/tx/columnshard/engines/changes/with_appended.cpp @@ -37,7 +37,20 @@ void TChangesWithAppend::DoWriteIndex(NColumnShard::TColumnShard& self, TWriteIn Y_FAIL("Unexpected inactive case"); break; } + } + self.IncCounter(NColumnShard::COUNTER_PORTIONS_DEACTIVATED, PortionsToRemove.size()); + + THashSet<TUnifiedBlobId> blobsDeactivated; + for (auto& portionInfo : PortionsToRemove) { + for (auto& rec : portionInfo.Records) { + blobsDeactivated.insert(rec.BlobRange.BlobId); + } + self.IncCounter(NColumnShard::COUNTER_RAW_BYTES_DEACTIVATED, portionInfo.RawBytesSum()); + } + self.IncCounter(NColumnShard::COUNTER_BLOBS_DEACTIVATED, blobsDeactivated.size()); + for (auto& blobId : blobsDeactivated) { + self.IncCounter(NColumnShard::COUNTER_BYTES_DEACTIVATED, blobId.BlobSize()); } } @@ -62,6 +75,30 @@ bool TChangesWithAppend::DoApplyChanges(TColumnEngineForLogs& self, TApplyChange } } + auto g = self.GranulesStorage->StartPackModification(); + for (auto& portionInfo : PortionsToRemove) { + Y_VERIFY(!portionInfo.Empty()); + Y_VERIFY(!portionInfo.IsActive()); + + const ui64 granule = portionInfo.GetGranule(); + const ui64 portion = portionInfo.GetPortion(); + + const TPortionInfo& oldInfo = self.GetGranuleVerified(granule).GetPortionVerified(portion); + + auto& granuleStart = self.Granules[granule]->Record.Mark; + + Y_VERIFY(granuleStart <= portionInfo.IndexKeyStart()); + self.UpsertPortion(portionInfo, &oldInfo); + + for (auto& record : portionInfo.Records) { + self.ColumnsTable->Write(context.DB, portionInfo, record); + } + } + + for (auto& portionInfo : PortionsToRemove) { + self.CleanupPortions.insert(portionInfo.GetAddress()); + } + return true; } @@ -70,46 +107,31 @@ void TChangesWithAppend::DoCompile(TFinalizationContext& context) { i.GetPortionInfo().SetPortion(context.NextPortionId()); i.GetPortionInfo().UpdateRecordsMeta(TPortionMeta::EProduced::INSERTED); } -} - -TSaverContext TChangesWithAppend::GetSaverContext(const ui32 pathId) const { - TString tierName; - std::optional<NArrow::TCompression> compression; - if (pathId) { - if (auto* tiering = TieringInfo.FindPtr(pathId)) { - tierName = tiering->GetHottestTierName(); - if (const auto& tierCompression = tiering->GetCompression(tierName)) { - compression = *tierCompression; - } - } + for (auto& portionInfo : PortionsToRemove) { + Y_VERIFY(portionInfo.IsActive()); + portionInfo.SetRemoveSnapshot(context.GetSnapshot()); } - TSaverContext saverContext; - saverContext.SetTierName(tierName).SetExternalCompression(compression); - return saverContext; } -std::vector<TPortionInfoWithBlobs> TChangesWithAppend::MakeAppendedPortions( - const ui64 pathId, const std::shared_ptr<arrow::RecordBatch> batch, const ui64 granule, const TSnapshot& snapshot, - const TGranuleMeta* granuleMeta, TConstructionContext& context) const { +std::vector<TPortionInfoWithBlobs> TChangesWithAppend::MakeAppendedPortions(const std::shared_ptr<arrow::RecordBatch> batch, + const ui64 granule, const TSnapshot& snapshot, const TGranuleMeta* granuleMeta, TConstructionContext& context) const { Y_VERIFY(batch->num_rows()); auto resultSchema = context.SchemaVersions.GetSchema(snapshot); std::vector<TPortionInfoWithBlobs> out; - const TSaverContext saverContext = GetSaverContext(pathId); - NOlap::TSerializationStats stats; if (granuleMeta) { stats = granuleMeta->BuildSerializationStats(resultSchema); } - auto schema = std::make_shared<TDefaultSchemaDetails>(resultSchema, saverContext, std::move(stats)); + auto schema = std::make_shared<TDefaultSchemaDetails>(resultSchema, SaverContext, std::move(stats)); TRBSplitLimiter limiter(context.Counters.SplitterCounters, schema, batch, SplitSettings); std::vector<std::vector<IPortionColumnChunk::TPtr>> chunkByBlobs; std::shared_ptr<arrow::RecordBatch> portionBatch; while (limiter.Next(chunkByBlobs, portionBatch)) { - TPortionInfoWithBlobs infoWithBlob = TPortionInfoWithBlobs::BuildByBlobs(chunkByBlobs, nullptr, granule, snapshot); - infoWithBlob.GetPortionInfo().AddMetadata(*resultSchema, portionBatch, saverContext.GetTierName()); + TPortionInfoWithBlobs infoWithBlob = TPortionInfoWithBlobs::BuildByBlobs(chunkByBlobs, nullptr, granule, snapshot, SaverContext.GetStorageOperator()); + infoWithBlob.GetPortionInfo().AddMetadata(*resultSchema, portionBatch, SaverContext.GetTierName()); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("portion_appended", infoWithBlob.GetPortionInfo().DebugString()); out.emplace_back(std::move(infoWithBlob)); } @@ -117,10 +139,7 @@ std::vector<TPortionInfoWithBlobs> TChangesWithAppend::MakeAppendedPortions( return out; } -void TChangesWithAppend::DoStart(NColumnShard::TColumnShard& self) { - if (self.Tiers) { - TieringInfo = self.Tiers->GetTiering(); - } +void TChangesWithAppend::DoStart(NColumnShard::TColumnShard& /*self*/) { } } diff --git a/ydb/core/tx/columnshard/engines/changes/with_appended.h b/ydb/core/tx/columnshard/engines/changes/with_appended.h index 6e41042925f..0db6dae7859 100644 --- a/ydb/core/tx/columnshard/engines/changes/with_appended.h +++ b/ydb/core/tx/columnshard/engines/changes/with_appended.h @@ -8,9 +8,10 @@ namespace NKikimr::NOlap { class TChangesWithAppend: public TColumnEngineChanges { private: - THashMap<ui64, NOlap::TTiering> TieringInfo; + using TBase = TColumnEngineChanges; TSplitSettings SplitSettings; protected: + TSaverContext SaverContext; virtual void DoDebugString(TStringOutput& out) const override; virtual void DoCompile(TFinalizationContext& context) override; virtual bool DoApplyChanges(TColumnEngineForLogs& self, TApplyChangesContext& context) override; @@ -18,11 +19,8 @@ protected: virtual void DoWriteIndexComplete(NColumnShard::TColumnShard& /*self*/, TWriteIndexCompleteContext& /*context*/) override { } - TSaverContext GetSaverContext(const ui32 pathId) const; virtual void DoStart(NColumnShard::TColumnShard& self) override; - std::vector<TPortionInfoWithBlobs> MakeAppendedPortions(const ui64 pathId, - const std::shared_ptr<arrow::RecordBatch> batch, - const ui64 granule, + std::vector<TPortionInfoWithBlobs> MakeAppendedPortions(const std::shared_ptr<arrow::RecordBatch> batch, const ui64 granule, const TSnapshot& snapshot, const TGranuleMeta* granuleMeta, TConstructionContext& context) const; public: @@ -30,18 +28,25 @@ public: return SplitSettings; } - TChangesWithAppend(const TSplitSettings& splitSettings) - : SplitSettings(splitSettings) + TChangesWithAppend(const TSplitSettings& splitSettings, const TSaverContext& saverContext) + : TBase(saverContext.GetStoragesManager()) + , SplitSettings(splitSettings) + , SaverContext(saverContext) { } virtual THashSet<TPortionAddress> GetTouchedPortions() const override { - return {}; + THashSet<TPortionAddress> result; + for (auto&& i : PortionsToRemove) { + result.emplace(i.GetAddress()); + } + return result; } - std::vector<TPortionInfoWithBlobs> AppendedPortions; // New portions after indexing or compaction - THashMap<ui64, std::pair<ui64, TMark>> NewGranules; // granule -> {pathId, key} + std::vector<TPortionInfo> PortionsToRemove; + std::vector<TPortionInfoWithBlobs> AppendedPortions; + THashMap<ui64, std::pair<ui64, TMark>> NewGranules; ui64 FirstGranuleId = 0; virtual ui32 GetWritePortionsCount() const override { return AppendedPortions.size(); diff --git a/ydb/core/tx/columnshard/engines/column_engine.cpp b/ydb/core/tx/columnshard/engines/column_engine.cpp index e22e5e2cf5f..efe84977f65 100644 --- a/ydb/core/tx/columnshard/engines/column_engine.cpp +++ b/ydb/core/tx/columnshard/engines/column_engine.cpp @@ -1,4 +1,5 @@ #include "column_engine.h" +#include "changes/abstract/abstract.h" #include <util/stream/output.h> namespace NKikimr::NOlap { diff --git a/ydb/core/tx/columnshard/engines/column_engine.h b/ydb/core/tx/columnshard/engines/column_engine.h index 436c4679aba..6b55eb75b0a 100644 --- a/ydb/core/tx/columnshard/engines/column_engine.h +++ b/ydb/core/tx/columnshard/engines/column_engine.h @@ -1,9 +1,9 @@ #pragma once -#include "changes/abstract/abstract.h" #include "granules_table.h" #include "portions/portion_info.h" #include "scheme/snapshot_scheme.h" #include "predicate/filter.h" +#include "changes/abstract/settings.h" #include "changes/abstract/compaction_info.h" #include <ydb/core/tx/columnshard/common/reverse_accessor.h> @@ -15,8 +15,10 @@ class TTtl; namespace NKikimr::NOlap { class TInsertColumnEngineChanges; class TCompactColumnEngineChanges; +class TColumnEngineChanges; class TTTLColumnEngineChanges; class TCleanupColumnEngineChanges; + struct TSelectInfo { struct TStats { size_t Granules{}; diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp index 912be31e36d..14053fd85fb 100644 --- a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp +++ b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp @@ -17,8 +17,9 @@ namespace NKikimr::NOlap { -TColumnEngineForLogs::TColumnEngineForLogs(ui64 tabletId, const TCompactionLimits& limits) - : GranulesStorage(std::make_shared<TGranulesStorage>(SignalCounters, limits)) +TColumnEngineForLogs::TColumnEngineForLogs(ui64 tabletId, const TCompactionLimits& limits, const std::shared_ptr<IStoragesManager>& storagesManager) + : GranulesStorage(std::make_shared<TGranulesStorage>(SignalCounters, limits, storagesManager)) + , StoragesManager(storagesManager) , TabletId(tabletId) , LastPortion(0) , LastGranule(0) @@ -247,7 +248,9 @@ bool TColumnEngineForLogs::LoadCounters(IDbWrapper& db) { std::shared_ptr<TInsertColumnEngineChanges> TColumnEngineForLogs::StartInsert(std::vector<TInsertedData>&& dataToIndex) noexcept { Y_VERIFY(dataToIndex.size()); - auto changes = std::make_shared<TInsertColumnEngineChanges>(DefaultMark(), std::move(dataToIndex), TSplitSettings()); + TSaverContext saverContext(StoragesManager->GetInsertOperator(), StoragesManager); + + auto changes = std::make_shared<TInsertColumnEngineChanges>(DefaultMark(), std::move(dataToIndex), TSplitSettings(), saverContext); ui32 reserveGranules = 0; for (const auto& data : changes->GetDataToIndex()) { const ui64 pathId = data.PathId; @@ -288,7 +291,7 @@ std::shared_ptr<TColumnEngineChanges> TColumnEngineForLogs::StartCompaction(cons std::shared_ptr<TCleanupColumnEngineChanges> TColumnEngineForLogs::StartCleanup(const TSnapshot& snapshot, THashSet<ui64>& pathsToDrop, ui32 maxRecords) noexcept { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "StartCleanup")("portions_count", CleanupPortions.size()); - auto changes = std::make_shared<TCleanupColumnEngineChanges>(); + auto changes = std::make_shared<TCleanupColumnEngineChanges>(StoragesManager); ui32 affectedRecords = 0; // Add all portions from dropped paths @@ -306,7 +309,7 @@ std::shared_ptr<TCleanupColumnEngineChanges> TColumnEngineForLogs::StartCleanup( Y_VERIFY(spg); for (auto& [portion, info] : spg->GetPortions()) { affectedRecords += info->NumChunks(); - changes->PortionsToDrop.push_back(*info); + changes->PortionsToDrop.push_back(info); dropPortions.insert(portion); } @@ -342,12 +345,12 @@ std::shared_ptr<TCleanupColumnEngineChanges> TColumnEngineForLogs::StartCleanup( granuleMeta = itGranule->second; } Y_VERIFY(granuleMeta); - auto* portionInfo = granuleMeta->GetPortionPointer(it->GetPortionId()); + auto portionInfo = granuleMeta->GetPortionPtr(it->GetPortionId()); if (!portionInfo) { it = CleanupPortions.erase(it); } else if (portionInfo->CheckForCleanup(snapshot)) { affectedRecords += portionInfo->NumChunks(); - changes->PortionsToDrop.push_back(*portionInfo); + changes->PortionsToDrop.push_back(portionInfo); it = CleanupPortions.erase(it); if (affectedRecords > maxRecords) { changes->NeedRepeat = true; @@ -361,6 +364,10 @@ std::shared_ptr<TCleanupColumnEngineChanges> TColumnEngineForLogs::StartCleanup( } AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "StartCleanup")("portions_count", CleanupPortions.size())("portions_prepared", changes->PortionsToDrop.size()); + if (changes->PortionsToDrop.empty()) { + return nullptr; + } + return changes; } @@ -371,15 +378,16 @@ TDuration TColumnEngineForLogs::ProcessTiering(const ui64 pathId, const TTiering auto& indexInfo = VersionedIndex.GetLastSchema()->GetIndexInfo(); Y_VERIFY(context.Changes->Tiering.emplace(pathId, ttl).second); - TDuration dWaiting = TDuration::Minutes(5); + TDuration dWaiting = NYDBTest::TControllers::GetColumnShardController()->GetTTLDefaultWaitingDuration(TDuration::Minutes(5)); auto itGranules = PathGranules.find(pathId); if (itGranules == PathGranules.end()) { return dWaiting; } - auto expireTimestampOpt = ttl.GetEvictInstant(context.Now); - Y_VERIFY(expireTimestampOpt); - auto expireTimestamp = *expireTimestampOpt; + std::optional<TInstant> expireTimestampOpt; + if (ttl.Ttl) { + expireTimestampOpt = ttl.Ttl->GetEvictInstant(context.Now); + } auto ttlColumnNames = ttl.GetTtlColumns(); Y_VERIFY(ttlColumnNames.size() == 1); // TODO: support different ttl columns @@ -400,17 +408,17 @@ TDuration TColumnEngineForLogs::ProcessTiering(const ui64 pathId, const TTiering context.AllowEviction = (evictionSize <= context.MaxEvictBytes); context.AllowDrop = (dropBlobs <= TCompactionLimits::MAX_BLOBS_TO_DELETE); - const bool tryEvictPortion = context.AllowEviction && ttl.HasTiers() && info->EvictReady(TCompactionLimits::EVICT_HOT_PORTION_BYTES); + const bool tryEvictPortion = context.AllowEviction && ttl.HasTiers(); if (auto max = info->MaxValue(ttlColumnId)) { - bool keep = false; - { - auto mpiOpt = ttl.ScalarToInstant(max); + bool keep = !expireTimestampOpt; + if (expireTimestampOpt) { + auto mpiOpt = ttl.Ttl->ScalarToInstant(max); Y_VERIFY(mpiOpt); const TInstant maxTtlPortionInstant = *mpiOpt; - const TDuration d = maxTtlPortionInstant - expireTimestamp; + const TDuration d = maxTtlPortionInstant - *expireTimestampOpt; keep = !!d; - AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "keep_detect")("max", maxTtlPortionInstant.Seconds())("expire", expireTimestamp.Seconds()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "keep_detect")("max", maxTtlPortionInstant.Seconds())("expire", expireTimestampOpt->Seconds()); if (d && dWaiting > d) { dWaiting = d; } @@ -418,7 +426,7 @@ TDuration TColumnEngineForLogs::ProcessTiering(const ui64 pathId, const TTiering AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "scalar_less_result")("keep", keep)("tryEvictPortion", tryEvictPortion)("allowDrop", context.AllowDrop); if (keep && tryEvictPortion) { - TString tierName; + TString tierName = ""; for (auto& tierRef : ttl.GetOrderedTiers()) { auto& tierInfo = tierRef.Get(); if (!indexInfo.AllowTtlOverColumn(tierInfo.GetEvictColumnName())) { @@ -429,29 +437,34 @@ TDuration TColumnEngineForLogs::ProcessTiering(const ui64 pathId, const TTiering Y_VERIFY(mpiOpt); const TInstant maxTieringPortionInstant = *mpiOpt; - const TDuration d = maxTieringPortionInstant - tierInfo.GetEvictInstant(context.Now); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "tiering")("max", maxTieringPortionInstant.Seconds()) - ("evict", tierInfo.GetEvictInstant(context.Now).Seconds()); + const TDuration d = tierInfo.GetEvictInstant(context.Now) - maxTieringPortionInstant; + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "tiering_choosing")("max", maxTieringPortionInstant.Seconds()) + ("evict", tierInfo.GetEvictInstant(context.Now).Seconds())("tier_name", tierInfo.GetName())("d", d); if (d) { - if (dWaiting > d) { - dWaiting = d; - } tierName = tierInfo.GetName(); - } else { break; + } else { + auto dWaitLocal = maxTieringPortionInstant - tierInfo.GetEvictInstant(context.Now); + if (dWaiting > dWaitLocal) { + dWaiting = dWaitLocal; + } } } - if (info->GetMeta().GetTierName() != tierName) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "tiering switch detected")("from", info->GetMeta().GetTierName())("to", tierName); + if (!tierName) { + tierName = IStoragesManager::DefaultStorageId; + } + const TString currentTierName = info->GetMeta().GetTierName() ? info->GetMeta().GetTierName() : IStoragesManager::DefaultStorageId; + if (currentTierName != tierName) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "tiering switch detected")("from", currentTierName)("to", tierName); evictionSize += info->BlobsSizes().first; - const bool needExport = ttl.NeedExport(tierName); - context.Changes->AddPortionToEvict(*info, TPortionEvictionFeatures(tierName, pathId, needExport)); + context.Changes->AddPortionToEvict(*info, TPortionEvictionFeatures(tierName, pathId, StoragesManager->GetOperator(tierName))); SignalCounters.OnPortionToEvict(info->BlobsBytes()); } } if (!keep && context.AllowDrop) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "portion_remove")("portion", info->DebugString()); dropBlobs += info->NumBlobs(); - context.Changes->PortionsToDrop.push_back(*info); + context.Changes->PortionsToRemove.emplace_back(*info); SignalCounters.OnPortionToDrop(info->BlobsBytes()); } } else { @@ -490,7 +503,7 @@ bool TColumnEngineForLogs::DrainEvictionQueue(std::map<TMonotonic, std::vector<T } else { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "stop scan")("reason", "task_ready")("first", evictionsQueue.begin()->first)("now", nowMonotonic) ("internal", hasChanges)("evict_portions", context.Changes->GetPortionsToEvictCount()) - ("drop_portions", context.Changes->PortionsToDrop.size()); + ("drop_portions", context.Changes->PortionsToRemove.size()); } } else { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "stop scan")("reason", "no data in queue"); @@ -502,7 +515,10 @@ std::shared_ptr<TTTLColumnEngineChanges> TColumnEngineForLogs::StartTtl(const TH AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "StartTtl")("external", pathEviction.size()) ("internal", EvictionsController.MutableNextCheckInstantForTierings().size()) ; - auto changes = std::make_shared<TTTLColumnEngineChanges>(); + + TSaverContext saverContext(StoragesManager->GetDefaultOperator(), StoragesManager); + + auto changes = std::make_shared<TTTLColumnEngineChanges>(TSplitSettings(), saverContext); TTieringProcessContext context(maxEvictBytes, changes, busyPortions); bool hasExternalChanges = false; @@ -516,13 +532,9 @@ std::shared_ptr<TTTLColumnEngineChanges> TColumnEngineForLogs::StartTtl(const TH DrainEvictionQueue(EvictionsController.MutableNextCheckInstantForTierings(), context); } - if (changes->PortionsToDrop.empty() && !changes->GetPortionsToEvictCount()) { + if (changes->PortionsToRemove.empty() && !changes->GetPortionsToEvictCount()) { return nullptr; } - - if (!context.AllowEviction || !context.AllowDrop) { - changes->NeedRepeat = true; - } return changes; } @@ -610,7 +622,7 @@ bool TColumnEngineForLogs::ErasePortion(const TPortionInfo& portionInfo, bool up Y_VERIFY(it != Granules.end()); auto& spg = it->second; Y_VERIFY(spg); - auto* p = spg->GetPortionPointer(portion); + auto p = spg->GetPortionPtr(portion); if (!p) { LOG_S_WARN("Portion erased already " << portionInfo << " at tablet " << TabletId); diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.h b/ydb/core/tx/columnshard/engines/column_engine_logs.h index 085a819d164..69d46ae5e0f 100644 --- a/ydb/core/tx/columnshard/engines/column_engine_logs.h +++ b/ydb/core/tx/columnshard/engines/column_engine_logs.h @@ -81,6 +81,7 @@ class TColumnEngineForLogs : public IColumnEngine { private: const NColumnShard::TEngineLogsCounters SignalCounters; std::shared_ptr<TGranulesStorage> GranulesStorage; + std::shared_ptr<IStoragesManager> StoragesManager; TEvictionsController EvictionsController; class TTieringProcessContext { public: @@ -114,7 +115,7 @@ public: ADD, }; - TColumnEngineForLogs(ui64 tabletId, const TCompactionLimits& limits = {}); + TColumnEngineForLogs(ui64 tabletId, const TCompactionLimits& limits, const std::shared_ptr<IStoragesManager>& storagesManager); virtual void OnTieringModified(std::shared_ptr<NColumnShard::TTiersManager> manager, const NColumnShard::TTtl& ttl) override; @@ -174,7 +175,7 @@ public: if (it == Granules.end()) { return false; } - return it->second->GetPortionPointer(portionId); + return !!it->second->GetPortionPtr(portionId); } bool IsGranuleExists(const ui64 granuleId) const { diff --git a/ydb/core/tx/columnshard/engines/portions/CMakeLists.darwin-x86_64.txt b/ydb/core/tx/columnshard/engines/portions/CMakeLists.darwin-x86_64.txt index b8559cce9c0..2c3e14a9a4e 100644 --- a/ydb/core/tx/columnshard/engines/portions/CMakeLists.darwin-x86_64.txt +++ b/ydb/core/tx/columnshard/engines/portions/CMakeLists.darwin-x86_64.txt @@ -20,6 +20,7 @@ target_link_libraries(columnshard-engines-portions PUBLIC columnshard-engines-scheme tx-columnshard-splitter tx-columnshard-common + ydb-core-tablet_flat tools-enum_parser-enum_serialization_runtime ) target_sources(columnshard-engines-portions PRIVATE diff --git a/ydb/core/tx/columnshard/engines/portions/CMakeLists.linux-aarch64.txt b/ydb/core/tx/columnshard/engines/portions/CMakeLists.linux-aarch64.txt index 679d414a1e4..be9cb086451 100644 --- a/ydb/core/tx/columnshard/engines/portions/CMakeLists.linux-aarch64.txt +++ b/ydb/core/tx/columnshard/engines/portions/CMakeLists.linux-aarch64.txt @@ -21,6 +21,7 @@ target_link_libraries(columnshard-engines-portions PUBLIC columnshard-engines-scheme tx-columnshard-splitter tx-columnshard-common + ydb-core-tablet_flat tools-enum_parser-enum_serialization_runtime ) target_sources(columnshard-engines-portions PRIVATE diff --git a/ydb/core/tx/columnshard/engines/portions/CMakeLists.linux-x86_64.txt b/ydb/core/tx/columnshard/engines/portions/CMakeLists.linux-x86_64.txt index 679d414a1e4..be9cb086451 100644 --- a/ydb/core/tx/columnshard/engines/portions/CMakeLists.linux-x86_64.txt +++ b/ydb/core/tx/columnshard/engines/portions/CMakeLists.linux-x86_64.txt @@ -21,6 +21,7 @@ target_link_libraries(columnshard-engines-portions PUBLIC columnshard-engines-scheme tx-columnshard-splitter tx-columnshard-common + ydb-core-tablet_flat tools-enum_parser-enum_serialization_runtime ) target_sources(columnshard-engines-portions PRIVATE diff --git a/ydb/core/tx/columnshard/engines/portions/CMakeLists.windows-x86_64.txt b/ydb/core/tx/columnshard/engines/portions/CMakeLists.windows-x86_64.txt index b8559cce9c0..2c3e14a9a4e 100644 --- a/ydb/core/tx/columnshard/engines/portions/CMakeLists.windows-x86_64.txt +++ b/ydb/core/tx/columnshard/engines/portions/CMakeLists.windows-x86_64.txt @@ -20,6 +20,7 @@ target_link_libraries(columnshard-engines-portions PUBLIC columnshard-engines-scheme tx-columnshard-splitter tx-columnshard-common + ydb-core-tablet_flat tools-enum_parser-enum_serialization_runtime ) target_sources(columnshard-engines-portions PRIVATE diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp index 1c7cd58e18d..115a5bb20f7 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp @@ -67,7 +67,7 @@ std::shared_ptr<arrow::Scalar> TPortionInfo::MaxValue(ui32 columnId) const { } TPortionInfo TPortionInfo::CopyWithFilteredColumns(const THashSet<ui32>& columnIds) const { - TPortionInfo result(Granule, Portion, GetMinSnapshot()); + TPortionInfo result(Granule, Portion, GetMinSnapshot(), BlobsOperator); result.Meta = Meta; result.Records.reserve(columnIds.size()); @@ -117,6 +117,9 @@ TString TPortionInfo::DebugString() const { if (RemoveSnapshot.Valid()) { sb << "remove_snapshot:(" << RemoveSnapshot.DebugString() << ");"; } + if (BlobsOperator) { + sb << "blobs_operator:" << BlobsOperator->DebugString() << ";"; + } sb << "chunks:(" << Records.size() << ");"; if (IS_TRACE_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD)) { std::set<TString> blobIds; diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.h b/ydb/core/tx/columnshard/engines/portions/portion_info.h index d7f008e55a8..67743fb931c 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.h +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.h @@ -4,6 +4,7 @@ #include <ydb/core/tx/columnshard/common/snapshot.h> #include <ydb/core/tx/columnshard/engines/scheme/column_features.h> #include <ydb/core/tx/columnshard/engines/scheme/abstract_scheme.h> +#include <ydb/core/tx/columnshard/blobs_action/abstract/storage.h> #include <ydb/library/yverify_stream/yverify_stream.h> namespace NKikimr::NOlap { @@ -20,9 +21,28 @@ private: bool HasPkMinMax() const; TPortionMeta Meta; + std::shared_ptr<NOlap::IBlobsStorageOperator> BlobsOperator; public: + bool HasStorageOperator() const { + return !!BlobsOperator; + } + + void InitOperator(const std::shared_ptr<NOlap::IBlobsStorageOperator>& bOperator, const bool rewrite) { + if (rewrite) { + AFL_VERIFY(!!BlobsOperator); + } else { + AFL_VERIFY(!BlobsOperator); + } + AFL_VERIFY(!!bOperator); + BlobsOperator = bOperator; + } + static constexpr const ui32 BLOB_BYTES_LIMIT = 8 * 1024 * 1024; + const std::shared_ptr<NOlap::IBlobsStorageOperator>& GetBlobsStorage() const { + Y_VERIFY(BlobsOperator); + return BlobsOperator; + } std::vector<const TColumnRecord*> GetColumnChunksPointers(const ui32 columnId) const; TSerializationStats GetSerializationStat(const ISnapshotSchema& schema) const { @@ -35,6 +55,7 @@ public: void ResetMeta() { Meta = TPortionMeta(); + BlobsOperator = nullptr; } const TPortionMeta& GetMeta() const { @@ -78,10 +99,11 @@ public: return TPortionInfo(); } - TPortionInfo(const ui64 granuleId, const ui64 portionId, const TSnapshot& minSnapshot) + TPortionInfo(const ui64 granuleId, const ui64 portionId, const TSnapshot& minSnapshot, const std::shared_ptr<NOlap::IBlobsStorageOperator>& blobsOperator) : Granule(granuleId) , Portion(portionId) , MinSnapshot(minSnapshot) + , BlobsOperator(blobsOperator) { } @@ -104,13 +126,6 @@ public: || Meta.GetProduced() == TPortionMeta::EProduced::SPLIT_COMPACTED; } - bool EvictReady(size_t hotSize) const { - return Meta.GetProduced() == TPortionMeta::EProduced::COMPACTED - || Meta.GetProduced() == TPortionMeta::EProduced::SPLIT_COMPACTED - || Meta.GetProduced() == TPortionMeta::EProduced::EVICTED - || (Meta.GetProduced() == TPortionMeta::EProduced::INSERTED && BlobsSizes().first >= hotSize); - } - ui64 GetPortion() const { return Portion; } diff --git a/ydb/core/tx/columnshard/engines/portions/with_blobs.cpp b/ydb/core/tx/columnshard/engines/portions/with_blobs.cpp index 3e3b45b18ce..1f05635ab24 100644 --- a/ydb/core/tx/columnshard/engines/portions/with_blobs.cpp +++ b/ydb/core/tx/columnshard/engines/portions/with_blobs.cpp @@ -117,9 +117,9 @@ std::vector<NKikimr::NOlap::TPortionInfoWithBlobs> TPortionInfoWithBlobs::Restor } NKikimr::NOlap::TPortionInfoWithBlobs TPortionInfoWithBlobs::BuildByBlobs(std::vector<std::vector<IPortionColumnChunk::TPtr>>& chunksByBlobs, - std::shared_ptr<arrow::RecordBatch> batch, const ui64 granule, const TSnapshot& snapshot) + std::shared_ptr<arrow::RecordBatch> batch, const ui64 granule, const TSnapshot& snapshot, const std::shared_ptr<NOlap::IBlobsStorageOperator>& bStorageOperator) { - TPortionInfoWithBlobs result(TPortionInfo(granule, 0, snapshot), batch); + TPortionInfoWithBlobs result(TPortionInfo(granule, 0, snapshot, bStorageOperator), batch); for (auto& blob : chunksByBlobs) { auto blobInfo = result.StartBlob(); for (auto&& chunk : blob) { diff --git a/ydb/core/tx/columnshard/engines/portions/with_blobs.h b/ydb/core/tx/columnshard/engines/portions/with_blobs.h index a2a88c2b40c..c4a68771f82 100644 --- a/ydb/core/tx/columnshard/engines/portions/with_blobs.h +++ b/ydb/core/tx/columnshard/engines/portions/with_blobs.h @@ -88,7 +88,7 @@ public: } static TPortionInfoWithBlobs BuildByBlobs(std::vector<std::vector<IPortionColumnChunk::TPtr>>& chunksByBlobs, std::shared_ptr<arrow::RecordBatch> batch, - const ui64 granule, const TSnapshot& snapshot); + const ui64 granule, const TSnapshot& snapshot, const std::shared_ptr<NOlap::IBlobsStorageOperator>& bStorageOperator); std::optional<TPortionInfoWithBlobs> ChangeSaver(ISnapshotSchema::TPtr currentSchema, const TSaverContext& saverContext) const; diff --git a/ydb/core/tx/columnshard/engines/portions/ya.make b/ydb/core/tx/columnshard/engines/portions/ya.make index 2af9b511fdf..7a6c96a9a8a 100644 --- a/ydb/core/tx/columnshard/engines/portions/ya.make +++ b/ydb/core/tx/columnshard/engines/portions/ya.make @@ -12,6 +12,7 @@ PEERDIR( ydb/core/tx/columnshard/engines/scheme ydb/core/tx/columnshard/splitter ydb/core/tx/columnshard/common + ydb/core/tablet_flat ) GENERATE_ENUM_SERIALIZATION(portion_info.h) diff --git a/ydb/core/tx/columnshard/engines/reader/conveyor_task.cpp b/ydb/core/tx/columnshard/engines/reader/conveyor_task.cpp index a3c005fd2a5..736dbcf5be7 100644 --- a/ydb/core/tx/columnshard/engines/reader/conveyor_task.cpp +++ b/ydb/core/tx/columnshard/engines/reader/conveyor_task.cpp @@ -3,52 +3,8 @@ namespace NKikimr::NColumnShard { -bool IDataTasksProcessor::ITask::DoExecute() { - if (OwnerOperator && OwnerOperator->IsStopped()) { - return true; - } else { - DataProcessed = true; - return DoExecuteImpl(); - } -} - bool IDataTasksProcessor::ITask::Apply(NOlap::IDataReader& indexedDataRead) const { - if (OwnerOperator) { - OwnerOperator->ReplyReceived(); - if (OwnerOperator->IsStopped()) { - return true; - } - } return DoApply(indexedDataRead); } -TDataTasksProcessorContainer IDataTasksProcessor::ITask::GetTasksProcessorContainer() const { - return TDataTasksProcessorContainer(OwnerOperator); -} - -bool IDataTasksProcessor::ITask::IsSameProcessor(const TDataTasksProcessorContainer& receivedProcessor) const { - return receivedProcessor.IsSameProcessor(GetTasksProcessorContainer()); -} - -bool IDataTasksProcessor::Add(ITask::TPtr task) { - if (IsStopped()) { - return false; - } - if (DoAdd(task)) { - DataProcessorAddDataCounter.Inc(); - return true; - } - return false; -} - - -void TDataTasksProcessorContainer::Add(NOlap::IDataReader& reader, IDataTasksProcessor::ITask::TPtr task) { - if (Object) { - Object->Add(task); - } else { - task->Execute(nullptr); - task->Apply(reader); - } -} - } diff --git a/ydb/core/tx/columnshard/engines/reader/conveyor_task.h b/ydb/core/tx/columnshard/engines/reader/conveyor_task.h index c1fba77d407..ef535257cfa 100644 --- a/ydb/core/tx/columnshard/engines/reader/conveyor_task.h +++ b/ydb/core/tx/columnshard/engines/reader/conveyor_task.h @@ -8,98 +8,23 @@ class IDataReader; namespace NKikimr::NColumnShard { -class TDataTasksProcessorContainer; - class IDataTasksProcessor { -private: - TAtomicCounter DataProcessorAddDataCounter = 0; - void ReplyReceived() { - Y_VERIFY(DataProcessorAddDataCounter.Dec() >= 0); - } public: class ITask: public NConveyor::ITask { private: - std::shared_ptr<IDataTasksProcessor> OwnerOperator; - bool DataProcessed = false; + using TBase = NConveyor::ITask; protected: - TDataTasksProcessorContainer GetTasksProcessorContainer() const; virtual bool DoApply(NOlap::IDataReader& indexedDataRead) const = 0; - virtual bool DoExecuteImpl() = 0; - - virtual bool DoExecute() override final; public: - ITask(std::shared_ptr<IDataTasksProcessor> ownerOperator) - : OwnerOperator(ownerOperator) { + ITask(const std::optional<NActors::TActorId> ownerId = {}) + : TBase(ownerId) { } - bool IsSameProcessor(const TDataTasksProcessorContainer& receivedProcessor) const; - using TPtr = std::shared_ptr<ITask>; virtual ~ITask() = default; bool Apply(NOlap::IDataReader& indexedDataRead) const; - - bool IsDataProcessed() const noexcept { - return DataProcessed; - } }; -protected: - virtual bool DoAdd(ITask::TPtr task) = 0; - std::atomic<bool> Stopped = false; -public: - i64 GetDataCounter() const { - return DataProcessorAddDataCounter.Val(); - } - - void Stop() { - Stopped = true; - } - bool IsStopped() const { - return Stopped; - } - bool InWaiting() const { - return !IsStopped() && DataProcessorAddDataCounter.Val(); - } - - using TPtr = std::shared_ptr<IDataTasksProcessor>; - virtual ~IDataTasksProcessor() = default; - bool Add(ITask::TPtr task); -}; - -class TDataTasksProcessorContainer { -private: - IDataTasksProcessor::TPtr Object; -public: - TDataTasksProcessorContainer() = default; - TDataTasksProcessorContainer(IDataTasksProcessor::TPtr object) - : Object(object) - { - - } - - bool IsSameProcessor(const TDataTasksProcessorContainer& container) const { - return (ui64)Object.get() == (ui64)container.Object.get(); - } - - void Stop() { - if (Object) { - Object->Stop(); - } - } - - bool InWaiting() const { - return Object && Object->InWaiting(); - } - - bool IsStopped() const { - return Object && Object->IsStopped(); - } - - IDataTasksProcessor::TPtr GetObject() const noexcept { - return Object; - } - - void Add(NOlap::IDataReader& reader, IDataTasksProcessor::ITask::TPtr task); }; } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/CMakeLists.darwin-x86_64.txt b/ydb/core/tx/columnshard/engines/reader/plain_reader/CMakeLists.darwin-x86_64.txt index 5d1bd47355b..10911162af6 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/CMakeLists.darwin-x86_64.txt +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/CMakeLists.darwin-x86_64.txt @@ -12,6 +12,7 @@ target_link_libraries(engines-reader-plain_reader PUBLIC contrib-libs-cxxsupp yutil core-formats-arrow + tx-columnshard-blobs_action ) target_sources(engines-reader-plain_reader PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/scanner.cpp @@ -22,5 +23,6 @@ target_sources(engines-reader-plain_reader PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/plain_read_data.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/filter_assembler.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/column_assembler.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/committed_assembler.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/columns_set.cpp ) diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/CMakeLists.linux-aarch64.txt b/ydb/core/tx/columnshard/engines/reader/plain_reader/CMakeLists.linux-aarch64.txt index ef7f01c7669..52f5d3db88a 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/CMakeLists.linux-aarch64.txt +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/CMakeLists.linux-aarch64.txt @@ -13,6 +13,7 @@ target_link_libraries(engines-reader-plain_reader PUBLIC contrib-libs-cxxsupp yutil core-formats-arrow + tx-columnshard-blobs_action ) target_sources(engines-reader-plain_reader PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/scanner.cpp @@ -23,5 +24,6 @@ target_sources(engines-reader-plain_reader PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/plain_read_data.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/filter_assembler.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/column_assembler.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/committed_assembler.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/columns_set.cpp ) diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/CMakeLists.linux-x86_64.txt b/ydb/core/tx/columnshard/engines/reader/plain_reader/CMakeLists.linux-x86_64.txt index ef7f01c7669..52f5d3db88a 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/CMakeLists.linux-x86_64.txt +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/CMakeLists.linux-x86_64.txt @@ -13,6 +13,7 @@ target_link_libraries(engines-reader-plain_reader PUBLIC contrib-libs-cxxsupp yutil core-formats-arrow + tx-columnshard-blobs_action ) target_sources(engines-reader-plain_reader PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/scanner.cpp @@ -23,5 +24,6 @@ target_sources(engines-reader-plain_reader PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/plain_read_data.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/filter_assembler.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/column_assembler.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/committed_assembler.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/columns_set.cpp ) diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/CMakeLists.windows-x86_64.txt b/ydb/core/tx/columnshard/engines/reader/plain_reader/CMakeLists.windows-x86_64.txt index 5d1bd47355b..10911162af6 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/CMakeLists.windows-x86_64.txt +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/CMakeLists.windows-x86_64.txt @@ -12,6 +12,7 @@ target_link_libraries(engines-reader-plain_reader PUBLIC contrib-libs-cxxsupp yutil core-formats-arrow + tx-columnshard-blobs_action ) target_sources(engines-reader-plain_reader PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/scanner.cpp @@ -22,5 +23,6 @@ target_sources(engines-reader-plain_reader PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/plain_read_data.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/filter_assembler.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/column_assembler.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/committed_assembler.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/reader/plain_reader/columns_set.cpp ) diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/column_assembler.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/column_assembler.cpp index baba224ae90..d079c3bf2b7 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/column_assembler.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/column_assembler.cpp @@ -3,7 +3,7 @@ namespace NKikimr::NOlap::NPlainReader { -bool TAssembleBatch::DoExecuteImpl() { +bool TAssembleBatch::DoExecute() { /// @warning The replace logic is correct only in assumption that predicate is applied over a part of ReplaceKey. /// It's not OK to apply predicate before replacing key duplicates otherwise. /// Assumption: dup(A, B) <=> PK(A) = PK(B) => Predicate(A) = Predicate(B) => all or no dups for PK(A) here @@ -26,10 +26,9 @@ bool TAssembleFFBatch::DoApply(IDataReader& owner) const { return true; } -TAssembleBatch::TAssembleBatch(TPortionInfo::TPreparedBatchData&& batchConstructor, - const ui32 sourceIdx, const std::shared_ptr<NArrow::TColumnFilter>& filter, - const NColumnShard::IDataTasksProcessor::TPtr& processor) - : TBase(processor) +TAssembleBatch::TAssembleBatch(const NActors::TActorId& scanActorId, TPortionInfo::TPreparedBatchData&& batchConstructor, + const ui32 sourceIdx, const std::shared_ptr<NArrow::TColumnFilter>& filter) + : TBase(scanActorId) , BatchConstructor(batchConstructor) , Filter(filter) , SourceIdx(sourceIdx) diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/column_assembler.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/column_assembler.h index 31d0f2b60a7..3a8262b2ceb 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/column_assembler.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/column_assembler.h @@ -15,14 +15,14 @@ private: protected: std::shared_ptr<arrow::RecordBatch> Result; const ui32 SourceIdx; - virtual bool DoExecuteImpl() override; + virtual bool DoExecute() override; public: virtual TString GetTaskClassIdentifier() const override { return "PlainReader::TAssembleBatch"; } - TAssembleBatch(TPortionInfo::TPreparedBatchData&& batchConstructor, - const ui32 sourceIdx, const std::shared_ptr<NArrow::TColumnFilter>& filter, const NColumnShard::IDataTasksProcessor::TPtr& processor); + TAssembleBatch(const NActors::TActorId& scanActorId, TPortionInfo::TPreparedBatchData&& batchConstructor, + const ui32 sourceIdx, const std::shared_ptr<NArrow::TColumnFilter>& filter); }; class TAssembleFFBatch: public TAssembleBatch { diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/committed_assembler.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/committed_assembler.cpp new file mode 100644 index 00000000000..fb72af0d520 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/committed_assembler.cpp @@ -0,0 +1,34 @@ +#include "committed_assembler.h" +#include "plain_read_data.h" + +namespace NKikimr::NOlap::NPlainReader { + +bool TCommittedAssembler::DoExecute() { + ResultBatch = NArrow::DeserializeBatch(BlobData, ReadMetadata->GetBlobSchema(SchemaSnapshot)); + Y_VERIFY(ResultBatch); + ResultBatch = ReadMetadata->GetIndexInfo().AddSpecialColumns(ResultBatch, DataSnapshot); + Y_VERIFY(ResultBatch); + ReadMetadata->GetPKRangesFilter().BuildFilter(ResultBatch).Apply(ResultBatch); + EarlyFilter = ReadMetadata->GetProgram().BuildEarlyFilter(ResultBatch); + return true; +} + +bool TCommittedAssembler::DoApply(IDataReader& owner) const { + auto& source = owner.GetMeAs<TPlainReadData>().GetSourceByIdxVerified(SourceIdx); + source.InitFilterStageData(nullptr, EarlyFilter, NArrow::ExtractColumnsValidate(ResultBatch, source.GetFetchingPlan().GetFilterStage()->GetColumnNamesVector())); + source.InitFetchStageData(NArrow::ExtractColumnsValidate(ResultBatch, source.GetFetchingPlan().GetFetchingStage()->GetColumnNamesVector())); + return true; +} + +TCommittedAssembler::TCommittedAssembler(const NActors::TActorId& scanActorId, const TString& blobData, const TReadMetadata::TConstPtr& readMetadata, const ui32 sourceIdx, + const TCommittedBlob& cBlob) + : TBase(scanActorId) + , BlobData(blobData) + , ReadMetadata(readMetadata) + , SourceIdx(sourceIdx) + , SchemaSnapshot(cBlob.GetSchemaSnapshot()) + , DataSnapshot(cBlob.GetSnapshot()) +{ +} + +} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/committed_assembler.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/committed_assembler.h new file mode 100644 index 00000000000..2dc0fdff17a --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/committed_assembler.h @@ -0,0 +1,32 @@ +#pragma once +#include "source.h" +#include <ydb/core/tx/columnshard/engines/reader/conveyor_task.h> +#include <ydb/core/tx/columnshard/engines/reader/read_metadata.h> +#include <ydb/core/tx/columnshard/engines/portions/portion_info.h> +#include <ydb/core/tx/columnshard/counters/common/object_counter.h> +#include <ydb/core/formats/arrow/arrow_filter.h> + +namespace NKikimr::NOlap::NPlainReader { +class TCommittedAssembler: public NColumnShard::IDataTasksProcessor::ITask, public NColumnShard::TMonitoringObjectsCounter<TCommittedAssembler, true> { +private: + using TBase = NColumnShard::IDataTasksProcessor::ITask; + TString BlobData; + TReadMetadata::TConstPtr ReadMetadata; + const ui32 SourceIdx; + TSnapshot SchemaSnapshot; + TSnapshot DataSnapshot; + + std::shared_ptr<NArrow::TColumnFilter> EarlyFilter; + std::shared_ptr<arrow::RecordBatch> ResultBatch; +protected: + virtual bool DoExecute() override; + virtual bool DoApply(IDataReader& owner) const override; +public: + virtual TString GetTaskClassIdentifier() const override { + return "PlainReader::TCommittedAssembler"; + } + + TCommittedAssembler(const NActors::TActorId& scanActorId, const TString& blobData, const TReadMetadata::TConstPtr& readMetadata, const ui32 sourceIdx, + const TCommittedBlob& cBlob); +}; +} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor.cpp index ddfa7c12beb..52b6d728d8d 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor.cpp @@ -1,14 +1,25 @@ #include "constructor.h" #include "filter_assembler.h" #include "column_assembler.h" +#include "committed_assembler.h" #include <ydb/core/tx/columnshard/engines/reader/read_context.h> #include <ydb/core/tx/columnshard/engines/scheme/filtered_scheme.h> +#include <ydb/core/tx/conveyor/usage/events.h> +#include <ydb/core/tx/conveyor/usage/service.h> namespace NKikimr::NOlap::NPlainReader { -TPortionInfo::TPreparedBatchData TAssembleColumnsTaskConstructor::BuildBatchAssembler(IDataReader& reader) { - auto blobSchema = reader.GetReadMetadata()->GetLoadSchema(PortionInfo->GetMinSnapshot()); - auto readSchema = reader.GetReadMetadata()->GetLoadSchema(reader.GetReadMetadata()->GetSnapshot()); +TPortionInfo::TPreparedBatchData TAssembleColumnsTaskConstructor::BuildBatchAssembler() { + auto blobs = ExtractBlobsData(); + THashMap<TBlobRange, TPortionInfo::TAssembleBlobInfo> blobsDataAssemble; + for (auto&& i : blobs) { + blobsDataAssemble.emplace(i.first, i.second); + } + for (auto&& i : NullBlocks) { + AFL_VERIFY(blobsDataAssemble.emplace(i.first, i.second).second); + } + auto blobSchema = ReadMetadata->GetLoadSchema(PortionInfo->GetMinSnapshot()); + auto readSchema = ReadMetadata->GetLoadSchema(ReadMetadata->GetSnapshot()); ISnapshotSchema::TPtr resultSchema; if (ColumnIds.size()) { resultSchema = std::make_shared<TFilteredSnapshotSchema>(readSchema, ColumnIds); @@ -16,17 +27,30 @@ TPortionInfo::TPreparedBatchData TAssembleColumnsTaskConstructor::BuildBatchAsse resultSchema = readSchema; } - return PortionInfo->PrepareForAssemble(*blobSchema, *resultSchema, Data); + return PortionInfo->PrepareForAssemble(*blobSchema, *resultSchema, blobs); +} + +void TEFTaskConstructor::DoOnDataReady() { + NConveyor::TScanServiceOperator::SendTaskToExecute(std::make_shared<TAssembleFilter>(ScanActorId, BuildBatchAssembler(), + ReadMetadata, SourceIdx, ColumnIds, UseEarlyFilter)); +} + +void TFFColumnsTaskConstructor::DoOnDataReady() { + NConveyor::TScanServiceOperator::SendTaskToExecute(std::make_shared<TAssembleFFBatch>(ScanActorId, BuildBatchAssembler(), + SourceIdx, AppliedFilter)); } -void TEFTaskConstructor::DoOnDataReady(IDataReader& reader) { - reader.GetContext().MutableProcessor().Add(reader, std::make_shared<TAssembleFilter>(BuildBatchAssembler(reader), - reader.GetReadMetadata(), SourceIdx, ColumnIds, reader.GetContext().GetProcessor().GetObject(), UseEarlyFilter)); +void TCommittedColumnsTaskConstructor::DoOnDataReady() { + auto blobs = ExtractBlobsData(); + Y_VERIFY(NullBlocks.size() == 0); + Y_VERIFY(blobs.size() == 1); + NConveyor::TScanServiceOperator::SendTaskToExecute(std::make_shared<TCommittedAssembler>(ScanActorId, blobs.begin()->second, + ReadMetadata, SourceIdx, CommittedBlob)); } -void TFFColumnsTaskConstructor::DoOnDataReady(IDataReader& reader) { - reader.GetContext().MutableProcessor().Add(reader, std::make_shared<TAssembleFFBatch>(BuildBatchAssembler(reader), - SourceIdx, AppliedFilter, reader.GetContext().GetProcessor().GetObject())); +bool IFetchTaskConstructor::DoOnError(const TBlobRange& range) { + NActors::TActorContext::AsActorContext().Send(ScanActorId, std::make_unique<NConveyor::TEvExecution::TEvTaskProcessedResult>(TConclusionStatus::Fail("cannot read blob range " + range.ToString()))); + return false; } } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor.h index 71c892777f9..58925f1a48e 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor.h @@ -1,61 +1,49 @@ #pragma once +#include <ydb/core/tx/columnshard/engines/reader/read_metadata.h> +#include <ydb/core/tx/columnshard/engines/reader/read_context.h> #include <ydb/core/tx/columnshard/engines/portions/column_record.h> +#include <ydb/core/tx/columnshard/blobs_reader/task.h> #include <ydb/core/tx/columnshard/blob.h> #include "source.h" namespace NKikimr::NOlap::NPlainReader { -class IFetchTaskConstructor { +class IFetchTaskConstructor: public NBlobOperations::NRead::ITask { private: - bool Constructed = false; - IDataReader& Reader; - bool Started = false; + using TBase = NBlobOperations::NRead::ITask; protected: - THashSet<TBlobRange> WaitingData; - THashMap<TBlobRange, TPortionInfo::TAssembleBlobInfo> Data; - virtual void DoOnDataReady(IDataReader& reader) = 0; - - void OnDataReady(IDataReader& reader) { - if (WaitingData.empty()) { - Constructed = true; - return DoOnDataReady(reader); - } - } + NActors::TActorId ScanActorId; + const ui32 SourceIdx; + std::shared_ptr<const TReadMetadata> ReadMetadata; + TReadContext Context; + THashMap<TBlobRange, ui32> NullBlocks; + virtual bool DoOnError(const TBlobRange& range) override; public: - IFetchTaskConstructor(IDataReader& reader) - : Reader(reader) + IFetchTaskConstructor(IDataReader& reader, const std::vector<std::shared_ptr<IBlobsReadingAction>>& readActions, THashMap<TBlobRange, ui32>&& nullBlocks, const IDataSource& source) + : TBase(readActions) + , ScanActorId(NActors::TActorContext::AsActorContext().SelfID) + , SourceIdx(source.GetSourceIdx()) + , ReadMetadata(reader.GetReadMetadata()) + , Context(reader.GetContext()) + , NullBlocks(std::move(nullBlocks)) { } +}; - void StartDataWaiting() { - Started = true; - OnDataReady(Reader); - } - - void Abort() { - Constructed = true; - } - - virtual ~IFetchTaskConstructor() { - Y_VERIFY(Constructed); - } - - void AddWaitingRecord(const TColumnRecord& rec) { - Y_VERIFY(!Started); - Y_VERIFY(WaitingData.emplace(rec.BlobRange).second); - } - - void AddData(const TBlobRange& range, TString&& data) { - Y_VERIFY(Started); - Y_VERIFY(WaitingData.erase(range)); - Y_VERIFY(Data.emplace(range, std::move(data)).second); - OnDataReady(Reader); - } +class TCommittedColumnsTaskConstructor: public IFetchTaskConstructor { +private: + TCommittedBlob CommittedBlob; + using TBase = IFetchTaskConstructor; +protected: + virtual void DoOnDataReady() override; +public: + TCommittedColumnsTaskConstructor(IDataReader& reader, const std::vector<std::shared_ptr<IBlobsReadingAction>>& readActions, THashMap<TBlobRange, ui32>&& nullBlocks, + const TCommittedDataSource& source) + : TBase(reader, readActions, std::move(nullBlocks), source) + , CommittedBlob(source.GetCommitted()) + { - void AddNullData(const TBlobRange& range, const ui32 rowsCount) { - Y_VERIFY(!Started); - Y_VERIFY(Data.emplace(range, rowsCount).second); } }; @@ -64,14 +52,13 @@ private: using TBase = IFetchTaskConstructor; protected: std::set<ui32> ColumnIds; - const ui32 SourceIdx; std::shared_ptr<TPortionInfo> PortionInfo; - TPortionInfo::TPreparedBatchData BuildBatchAssembler(IDataReader& reader); + TPortionInfo::TPreparedBatchData BuildBatchAssembler(); public: - TAssembleColumnsTaskConstructor(const std::set<ui32>& columnIds, const TPortionDataSource& portion, IDataReader& reader) - : TBase(reader) + TAssembleColumnsTaskConstructor(IDataReader& reader, const std::vector<std::shared_ptr<IBlobsReadingAction>>& readActions, THashMap<TBlobRange, ui32>&& nullBlocks, + const std::set<ui32>& columnIds, const TPortionDataSource& portion) + : TBase(reader, readActions, std::move(nullBlocks), portion) , ColumnIds(columnIds) - , SourceIdx(portion.GetSourceIdx()) , PortionInfo(portion.GetPortionInfoPtr()) { @@ -82,10 +69,11 @@ class TFFColumnsTaskConstructor: public TAssembleColumnsTaskConstructor { private: using TBase = TAssembleColumnsTaskConstructor; std::shared_ptr<NArrow::TColumnFilter> AppliedFilter; - virtual void DoOnDataReady(IDataReader& reader) override; + virtual void DoOnDataReady() override; public: - TFFColumnsTaskConstructor(const std::set<ui32>& columnIds, const TPortionDataSource& portion, IDataReader& reader) - : TBase(columnIds, portion, reader) + TFFColumnsTaskConstructor(IDataReader& reader, const std::vector<std::shared_ptr<IBlobsReadingAction>>& readActions, THashMap<TBlobRange, ui32>&& nullBlocks, + const std::set<ui32>& columnIds, const TPortionDataSource& portion) + : TBase(reader, readActions, std::move(nullBlocks), columnIds, portion) , AppliedFilter(portion.GetFilterStageData().GetAppliedFilter()) { } @@ -95,10 +83,11 @@ class TEFTaskConstructor: public TAssembleColumnsTaskConstructor { private: bool UseEarlyFilter = false; using TBase = TAssembleColumnsTaskConstructor; - virtual void DoOnDataReady(IDataReader& reader) override; + virtual void DoOnDataReady() override; public: - TEFTaskConstructor(const std::set<ui32>& columnIds, const TPortionDataSource& portion, IDataReader& reader, const bool useEarlyFilter) - : TBase(columnIds, portion, reader) + TEFTaskConstructor(IDataReader& reader, const std::vector<std::shared_ptr<IBlobsReadingAction>>& readActions, THashMap<TBlobRange, ui32>&& nullBlocks, + const std::set<ui32>& columnIds, const TPortionDataSource& portion, const bool useEarlyFilter) + : TBase(reader, readActions, std::move(nullBlocks), columnIds, portion) , UseEarlyFilter(useEarlyFilter) { } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/filter_assembler.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/filter_assembler.cpp index 2a37c87504e..326fa444399 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/filter_assembler.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/filter_assembler.cpp @@ -5,7 +5,7 @@ namespace NKikimr::NOlap::NPlainReader { -bool TAssembleFilter::DoExecuteImpl() { +bool TAssembleFilter::DoExecute() { /// @warning The replace logic is correct only in assumption that predicate is applied over a part of ReplaceKey. /// It's not OK to apply predicate before replacing key duplicates otherwise. /// Assumption: dup(A, B) <=> PK(A) = PK(B) => Predicate(A) = Predicate(B) => all or no dups for PK(A) here @@ -51,6 +51,7 @@ bool TAssembleFilter::DoExecuteImpl() { } bool TAssembleFilter::DoApply(IDataReader& owner) const { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "apply"); owner.GetMeAs<TPlainReadData>().GetSourceByIdxVerified(SourceIdx).InitFilterStageData(AppliedFilter, EarlyFilter, FilteredBatch); return true; } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/filter_assembler.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/filter_assembler.h index afb549cacb8..30ece244114 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/filter_assembler.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/filter_assembler.h @@ -23,16 +23,16 @@ namespace NKikimr::NOlap::NPlainReader { const bool UseFilter = true; protected: virtual bool DoApply(IDataReader& owner) const override; - virtual bool DoExecuteImpl() override; + virtual bool DoExecute() override; public: virtual TString GetTaskClassIdentifier() const override { return "PlainReading::TAssembleFilter"; } - TAssembleFilter(TPortionInfo::TPreparedBatchData&& batchConstructor, NOlap::TReadMetadata::TConstPtr readMetadata, - const ui32 sourceIdx, const std::set<ui32>& filterColumnIds, NColumnShard::IDataTasksProcessor::TPtr processor, const bool useFilter) - : TBase(processor) + TAssembleFilter(const NActors::TActorId& scanActorId, TPortionInfo::TPreparedBatchData&& batchConstructor, NOlap::TReadMetadata::TConstPtr readMetadata, + const ui32 sourceIdx, const std::set<ui32>& filterColumnIds, const bool useFilter) + : TBase(scanActorId) , BatchConstructor(batchConstructor) , SourceIdx(sourceIdx) , ReadMetadata(readMetadata) diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/plain_read_data.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/plain_read_data.cpp index 21f0d065a3e..a5f3ff41468 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/plain_read_data.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/plain_read_data.cpp @@ -79,25 +79,29 @@ std::vector<NKikimr::NOlap::TPartialReadResult> TPlainReadData::DoExtractReadyRe return result; } -void TPlainReadData::DoAddData(const TBlobRange& blobRange, const TString& data) { - AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "DoAddData")("range", blobRange); - auto it = Sources.find(blobRange); - Y_VERIFY(it != Sources.end()); - TString dataForMove = data; - it->second->AddData(blobRange, std::move(dataForMove)); - Sources.erase(it); -} - -std::optional<NKikimr::NOlap::TBlobRange> TPlainReadData::DoExtractNextBlob(const bool /*hasReadyResults*/) { - while (Queue.empty() && Scanner->BuildNextInterval()) { +std::shared_ptr<NBlobOperations::NRead::ITask> TPlainReadData::DoExtractNextReadTask(const bool /*hasReadyResults*/) { + while (PriorityQueue.empty() && Queue.empty() && Scanner->BuildNextInterval()) { } - auto blobRange = Queue.pop_front(); - if (blobRange) { - AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "DoExtractNextBlob")("range", *blobRange); - } else { - AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "DoExtractNextBlob")("range", "nothing"); + { + auto task = PriorityQueue.pop_front(); + if (task) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "DoExtractNextBlob")("task", (*task)->DebugString()); + return *task; + } else { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "DoExtractNextBlob")("task", "nothing"); + } + } + + { + auto task = Queue.pop_front(); + if (task) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "DoExtractNextBlob")("task", (*task)->DebugString()); + return *task; + } else { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "DoExtractNextBlob")("task", "nothing"); + } } - return blobRange; + return nullptr; } void TPlainReadData::OnIntervalResult(std::shared_ptr<arrow::RecordBatch> batch) { diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/plain_read_data.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/plain_read_data.h index 5a50f4c8fd9..4d3c5c7ef98 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/plain_read_data.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/plain_read_data.h @@ -25,7 +25,7 @@ private: std::vector<TPartialReadResult> PartialResults; ui32 ReadyResultsCount = 0; TFetchBlobsQueue Queue; - THashMap<TBlobRange, std::shared_ptr<IDataSource>> Sources; + TFetchBlobsQueue PriorityQueue; bool AbortedFlag = false; protected: virtual TString DoDebugString() const override { @@ -48,8 +48,7 @@ protected: return (Scanner->IsFinished() && PartialResults.empty()); } - virtual void DoAddData(const TBlobRange& blobRange, const TString& data) override; - virtual std::optional<TBlobRange> DoExtractNextBlob(const bool hasReadyResults) override; + virtual std::shared_ptr<NBlobOperations::NRead::ITask> DoExtractNextReadTask(const bool hasReadyResults) override; public: TFetchingPlan GetColumnsFetchingPlan(const bool exclusiveSource) const; @@ -57,9 +56,12 @@ public: return *Scanner->GetSourceVerified(sourceIdx); } - void AddBlobForFetch(const ui64 objectId, const TBlobRange& bRange) { - Queue.emplace_back(objectId, bRange); - Y_VERIFY(Sources.emplace(bRange, Scanner->GetSourceVerified(objectId)).second); + void AddForFetch(const ui64 objectId, const std::shared_ptr<NBlobOperations::NRead::ITask>& readTask, const bool priority) { + if (priority) { + PriorityQueue.emplace_back(objectId, readTask); + } else { + Queue.emplace_back(objectId, readTask); + } } void OnIntervalResult(std::shared_ptr<arrow::RecordBatch> batch); diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/source.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/source.cpp index ea035e3cc07..ba2e970fe10 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/source.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/source.cpp @@ -56,7 +56,9 @@ bool IDataSource::OnIntervalFinished(const ui32 intervalIdx) { return Intervals.empty(); } -void TPortionDataSource::NeedFetchColumns(const std::set<ui32>& columnIds, std::shared_ptr<IFetchTaskConstructor> constructor, const std::shared_ptr<NArrow::TColumnFilter>& filter) { +void TPortionDataSource::NeedFetchColumns(const std::set<ui32>& columnIds, + const std::shared_ptr<IBlobsReadingAction>& readingAction, THashMap<TBlobRange, ui32>& nullBlocks, + const std::shared_ptr<NArrow::TColumnFilter>& filter) { const NArrow::TColumnFilter& cFilter = filter ? *filter : NArrow::TColumnFilter::BuildAllowFilter(); for (auto&& i : columnIds) { auto columnChunks = Portion->GetColumnChunksPointers(i); @@ -68,52 +70,51 @@ void TPortionDataSource::NeedFetchColumns(const std::set<ui32>& columnIds, std:: for (auto&& c : columnChunks) { Y_VERIFY(!itFinished); if (!itFilter.IsBatchForSkip(c->GetMeta().GetNumRowsVerified())) { - constructor->AddWaitingRecord(*c); - Y_VERIFY(BlobsWaiting.emplace(c->BlobRange, constructor).second); - ReadData.AddBlobForFetch(GetSourceIdx(), c->BlobRange); + readingAction->AddRange(c->BlobRange); } else { - constructor->AddNullData(c->BlobRange, c->GetMeta().GetNumRowsVerified()); + nullBlocks.emplace(c->BlobRange, c->GetMeta().GetNumRowsVerified()); } itFinished = !itFilter.Next(c->GetMeta().GetNumRowsVerified()); } AFL_VERIFY(itFinished)("filter", itFilter.DebugString())("count", Portion->NumRows(i)); } - constructor->StartDataWaiting(); } void TPortionDataSource::DoStartFilterStage() { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "DoFetchEF"); Y_VERIFY(FetchingPlan->GetFilterStage()->GetSize()); auto& columnIds = FetchingPlan->GetFilterStage()->GetColumnIds(); - NeedFetchColumns(columnIds, std::make_shared<TEFTaskConstructor>(columnIds, *this, ReadData, FetchingPlan->CanUseEarlyFilterImmediately()), nullptr); + + auto readAction = Portion->GetBlobsStorage()->StartReadingAction(); + THashMap<TBlobRange, ui32> nullBlocks; + NeedFetchColumns(columnIds, readAction, nullBlocks, nullptr); + + std::vector<std::shared_ptr<IBlobsReadingAction>> actions = {readAction}; + auto constructor = std::make_shared<TEFTaskConstructor>(ReadData, actions, std::move(nullBlocks), columnIds, *this, FetchingPlan->CanUseEarlyFilterImmediately()); + ReadData.AddForFetch(GetSourceIdx(), constructor, false); } void TPortionDataSource::DoStartFetchStage() { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "DoStartFetchStage"); Y_VERIFY(!FetchStageData); Y_VERIFY(FilterStageData); - if (!FetchingPlan->GetFetchingStage()->GetSize()) { - InitFetchStageData(nullptr); - } else if (!FilterStageData->IsEmptyFilter()) { + if (FetchingPlan->GetFetchingStage()->GetSize() && !FilterStageData->IsEmptyFilter()) { auto& columnIds = FetchingPlan->GetFetchingStage()->GetColumnIds(); - NeedFetchColumns(columnIds, std::make_shared<TFFColumnsTaskConstructor>(columnIds, *this, ReadData), - GetFilterStageData().GetActualFilter()); - } else { - InitFetchStageData(nullptr); - } -} -void TPortionDataSource::AddData(const TBlobRange& range, TString&& data) { - auto it = BlobsWaiting.find(range); - Y_VERIFY(it != BlobsWaiting.end()); - it->second->AddData(range, std::move(data)); - BlobsWaiting.erase(it); + auto readAction = Portion->GetBlobsStorage()->StartReadingAction(); + THashMap<TBlobRange, ui32> nullBlocks; + NeedFetchColumns(columnIds, readAction, nullBlocks, GetFilterStageData().GetActualFilter()); + if (readAction->GetExpectedBlobsCount()) { + std::vector<std::shared_ptr<IBlobsReadingAction>> actions = {readAction}; + auto constructor = std::make_shared<TFFColumnsTaskConstructor>(ReadData, actions, std::move(nullBlocks), columnIds, *this); + ReadData.AddForFetch(GetSourceIdx(), constructor, true); + return; + } + } + InitFetchStageData(nullptr); } void TPortionDataSource::DoAbort() { - for (auto&& i : BlobsWaiting) { - i.second->Abort(); - } } void TCommittedDataSource::DoFetch() { @@ -121,20 +122,16 @@ void TCommittedDataSource::DoFetch() { if (!ReadStarted) { Y_VERIFY(!ResultReady); ReadStarted = true; - ReadData.AddBlobForFetch(GetSourceIdx(), CommittedBlob.GetBlobRange()); - } -} -void TCommittedDataSource::AddData(const TBlobRange& /*range*/, TString&& data) { - Y_VERIFY(!ResultReady); - ResultReady = true; - auto resultBatch = NArrow::DeserializeBatch(data, ReadData.GetReadMetadata()->GetBlobSchema(CommittedBlob.GetSchemaSnapshot())); - Y_VERIFY(resultBatch); - resultBatch = ReadData.GetReadMetadata()->GetIndexInfo().AddSpecialColumns(resultBatch, CommittedBlob.GetSnapshot()); - Y_VERIFY(resultBatch); - ReadData.GetReadMetadata()->GetPKRangesFilter().BuildFilter(resultBatch).Apply(resultBatch); - InitFilterStageData(nullptr, ReadData.GetReadMetadata()->GetProgram().BuildEarlyFilter(resultBatch), NArrow::ExtractColumnsValidate(resultBatch, FetchingPlan->GetFilterStage()->GetColumnNamesVector())); - InitFetchStageData(NArrow::ExtractColumnsValidate(resultBatch, FetchingPlan->GetFetchingStage()->GetColumnNamesVector())); + std::shared_ptr<IBlobsStorageOperator> storageOperator = ReadData.GetContext().GetStoragesManager()->GetInsertOperator(); + auto readAction = storageOperator->StartReadingAction(); + readAction->AddRange(CommittedBlob.GetBlobRange()); + + THashMap<TBlobRange, ui32> nullBlocks; + std::vector<std::shared_ptr<IBlobsReadingAction>> actions = {readAction}; + auto constructor = std::make_shared<TCommittedColumnsTaskConstructor>(ReadData, actions, std::move(nullBlocks), *this); + ReadData.AddForFetch(GetSourceIdx(), constructor, true); + } } } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/source.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/source.h index edc1275e250..42b804efe94 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/source.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/source.h @@ -59,6 +59,11 @@ protected: virtual void DoAbort() = 0; public: + const TFetchingPlan& GetFetchingPlan() const { + Y_VERIFY(FetchingPlan); + return *FetchingPlan; + } + bool IsMergingStarted() const { return MergingStartedFlag; } @@ -123,16 +128,16 @@ public: virtual ~IDataSource() { Y_VERIFY(Intervals.empty()); } - virtual void AddData(const TBlobRange& range, TString&& data) = 0; }; class TPortionDataSource: public IDataSource { private: using TBase = IDataSource; std::shared_ptr<TPortionInfo> Portion; - THashMap<TBlobRange, std::shared_ptr<IFetchTaskConstructor>> BlobsWaiting; - void NeedFetchColumns(const std::set<ui32>& columnIds, std::shared_ptr<IFetchTaskConstructor> constructor, const std::shared_ptr<NArrow::TColumnFilter>& filter); + void NeedFetchColumns(const std::set<ui32>& columnIds, + const std::shared_ptr<IBlobsReadingAction>& readingAction, THashMap<TBlobRange, ui32>& nullBlocks, + const std::shared_ptr<NArrow::TColumnFilter>& filter); virtual void DoStartFilterStage() override; virtual void DoStartFetchStage() override; @@ -159,8 +164,6 @@ public: , Portion(portion) { } - - virtual void AddData(const TBlobRange& range, TString&& data) override; }; class TCommittedDataSource: public IDataSource { @@ -199,8 +202,6 @@ public: , CommittedBlob(committed) { } - - virtual void AddData(const TBlobRange& range, TString&& data) override; }; } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/ya.make b/ydb/core/tx/columnshard/engines/reader/plain_reader/ya.make index c5a23130f43..e500dac1ef8 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/ya.make +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/ya.make @@ -9,11 +9,13 @@ SRCS( plain_read_data.cpp filter_assembler.cpp column_assembler.cpp + committed_assembler.cpp columns_set.cpp ) PEERDIR( ydb/core/formats/arrow + ydb/core/tx/columnshard/blobs_action ) END() diff --git a/ydb/core/tx/columnshard/engines/reader/queue.h b/ydb/core/tx/columnshard/engines/reader/queue.h index c3f0ee8b13f..b12e375ce7f 100644 --- a/ydb/core/tx/columnshard/engines/reader/queue.h +++ b/ydb/core/tx/columnshard/engines/reader/queue.h @@ -1,27 +1,27 @@ #pragma once #include <ydb/library/accessor/accessor.h> #include <ydb/core/tx/columnshard/blob.h> +#include <ydb/core/tx/columnshard/blobs_reader/task.h> namespace NKikimr::NOlap { -class TBatchBlobRange { +class TBatchReadTask { private: const ui64 ObjectId; - const TBlobRange Range; + const std::shared_ptr<NBlobOperations::NRead::ITask> ReadTask; public: ui64 GetObjectId() const { return ObjectId; } - const TBlobRange& GetRange() const { - return Range; + const std::shared_ptr<NBlobOperations::NRead::ITask>& GetTask() const { + return ReadTask; } - TBatchBlobRange(const ui64 objectId, const TBlobRange range) + TBatchReadTask(const ui64 objectId, const std::shared_ptr<NBlobOperations::NRead::ITask>& readTask) : ObjectId(objectId) - , Range(range) + , ReadTask(readTask) { - Y_VERIFY(range.BlobId.IsValid()); } }; @@ -59,23 +59,24 @@ public: return &IteratorBlobsSequential.front(); } - std::optional<TBlobRange> pop_front() { + std::optional<std::shared_ptr<NBlobOperations::NRead::ITask>> pop_front() { if (!StoppedFlag && IteratorBlobsSequential.size()) { auto result = IteratorBlobsSequential.front(); IteratorBlobsSequential.pop_front(); - return result.GetRange(); + return result.GetTask(); } else { return {}; } } - void emplace_back(const ui64 objectId, const TBlobRange& range) { + void emplace_back(const ui64 objectId, const std::shared_ptr<NBlobOperations::NRead::ITask>& task) { Y_VERIFY(!StoppedFlag); - IteratorBlobsSequential.emplace_back(objectId, range); + Y_VERIFY(task); + IteratorBlobsSequential.emplace_back(objectId, task); } }; -using TFetchBlobsQueue = TFetchBlobsQueueImpl<TBatchBlobRange>; +using TFetchBlobsQueue = TFetchBlobsQueueImpl<TBatchReadTask>; } diff --git a/ydb/core/tx/columnshard/engines/reader/read_context.cpp b/ydb/core/tx/columnshard/engines/reader/read_context.cpp index a57a2f82a77..f1658ae4440 100644 --- a/ydb/core/tx/columnshard/engines/reader/read_context.cpp +++ b/ydb/core/tx/columnshard/engines/reader/read_context.cpp @@ -4,10 +4,10 @@ namespace NKikimr::NOlap { -TReadContext::TReadContext(const NColumnShard::TDataTasksProcessorContainer& processor, +TReadContext::TReadContext(const std::shared_ptr<IStoragesManager>& storagesManager, const NColumnShard::TConcreteScanCounters& counters, std::shared_ptr<NOlap::TActorBasedMemoryAccesor> memoryAccessor, const bool isInternalRead) - : Processor(processor) + : StoragesManager(storagesManager) , Counters(counters) , MemoryAccessor(memoryAccessor) , IsInternalRead(isInternalRead) diff --git a/ydb/core/tx/columnshard/engines/reader/read_context.h b/ydb/core/tx/columnshard/engines/reader/read_context.h index 360f2050a95..7af57996200 100644 --- a/ydb/core/tx/columnshard/engines/reader/read_context.h +++ b/ydb/core/tx/columnshard/engines/reader/read_context.h @@ -1,6 +1,7 @@ #pragma once #include "conveyor_task.h" #include <ydb/core/tx/columnshard/blob.h> +#include <ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.h> #include <ydb/core/tx/columnshard/columnshard__scan.h> #include <ydb/core/tx/columnshard/counters/scan.h> #include <ydb/core/tx/columnshard/resources/memory.h> @@ -26,22 +27,23 @@ public: class TReadContext { private: - YDB_ACCESSOR_DEF(NColumnShard::TDataTasksProcessorContainer, Processor); + YDB_READONLY_DEF(std::shared_ptr<IStoragesManager>, StoragesManager); const NColumnShard::TConcreteScanCounters Counters; - YDB_READONLY_DEF(std::shared_ptr<NOlap::TActorBasedMemoryAccesor>, MemoryAccessor); + YDB_READONLY_DEF(std::shared_ptr<TActorBasedMemoryAccesor>, MemoryAccessor); YDB_READONLY(bool, IsInternalRead, false); public: const NColumnShard::TConcreteScanCounters& GetCounters() const { return Counters; } - TReadContext(const NColumnShard::TDataTasksProcessorContainer& processor, + TReadContext(const std::shared_ptr<IStoragesManager>& storagesManager, const NColumnShard::TConcreteScanCounters& counters, std::shared_ptr<NOlap::TActorBasedMemoryAccesor> memoryAccessor, const bool isInternalRead ); - TReadContext(const NColumnShard::TConcreteScanCounters& counters, const bool isInternalRead) - : Counters(counters) + TReadContext(const std::shared_ptr<IStoragesManager>& storagesManager, const NColumnShard::TConcreteScanCounters& counters, const bool isInternalRead) + : StoragesManager(storagesManager) + , Counters(counters) , IsInternalRead(isInternalRead) { @@ -52,8 +54,7 @@ class IDataReader { protected: TReadContext Context; std::shared_ptr<const TReadMetadata> ReadMetadata; - virtual void DoAddData(const TBlobRange& blobRange, const TString& data) = 0; - virtual std::optional<TBlobRange> DoExtractNextBlob(const bool hasReadyResults) = 0; + virtual std::shared_ptr<NBlobOperations::NRead::ITask> DoExtractNextReadTask(const bool hasReadyResults) = 0; virtual TString DoDebugString() const = 0; virtual void DoAbort() = 0; virtual bool DoIsFinished() const = 0; @@ -82,10 +83,6 @@ public: return Context.GetCounters(); } - const NColumnShard::TDataTasksProcessorContainer& GetTasksProcessor() const noexcept { - return Context.GetProcessor(); - } - void Abort() { return DoAbort(); } @@ -120,12 +117,8 @@ public: sb << DoDebugString(); return sb; } - - void AddData(const TBlobRange& blobRange, const TString& data) { - DoAddData(blobRange, data); - } - std::optional<TBlobRange> ExtractNextBlob(const bool hasReadyResults) { - return DoExtractNextBlob(hasReadyResults); + std::shared_ptr<NBlobOperations::NRead::ITask> ExtractNextReadTask(const bool hasReadyResults) { + return DoExtractNextReadTask(hasReadyResults); } }; diff --git a/ydb/core/tx/columnshard/engines/reader/read_metadata.h b/ydb/core/tx/columnshard/engines/reader/read_metadata.h index 4bf3b64841d..5ba8e09235c 100644 --- a/ydb/core/tx/columnshard/engines/reader/read_metadata.h +++ b/ydb/core/tx/columnshard/engines/reader/read_metadata.h @@ -98,7 +98,6 @@ public: std::shared_ptr<NOlap::TPredicate> LessPredicate; std::shared_ptr<NOlap::TPredicate> GreaterPredicate; - std::shared_ptr<const THashSet<TUnifiedBlobId>> ExternBlobs; ui64 Limit{0}; // TODO virtual void Dump(IOutputStream& out) const { diff --git a/ydb/core/tx/columnshard/engines/scheme/column_features.h b/ydb/core/tx/columnshard/engines/scheme/column_features.h index d7f5adcefe0..24e07ac7c61 100644 --- a/ydb/core/tx/columnshard/engines/scheme/column_features.h +++ b/ydb/core/tx/columnshard/engines/scheme/column_features.h @@ -3,6 +3,8 @@ #include <ydb/core/formats/arrow/dictionary/object.h> #include <ydb/core/formats/arrow/serializer/abstract.h> #include <ydb/core/formats/arrow/transformer/abstract.h> +#include <ydb/core/tx/columnshard/blobs_action/abstract/storage.h> +#include <ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.h> #include <contrib/libs/apache/arrow/cpp/src/arrow/type.h> #include <contrib/libs/apache/arrow/cpp/src/arrow/array/array_base.h> @@ -12,7 +14,16 @@ class TSaverContext { private: TString TierName; std::optional<NArrow::TCompression> ExternalCompression; + YDB_READONLY_DEF(std::shared_ptr<IBlobsStorageOperator>, StorageOperator); + YDB_READONLY_DEF(std::shared_ptr<IStoragesManager>, StoragesManager); public: + TSaverContext(const std::shared_ptr<IBlobsStorageOperator>& storageOperator, const std::shared_ptr<IStoragesManager>& storagesManager) + : StorageOperator(storageOperator) + , StoragesManager(storagesManager) + { + + } + const std::optional<NArrow::TCompression>& GetExternalCompression() const { return ExternalCompression; } diff --git a/ydb/core/tx/columnshard/engines/scheme/tier_info.cpp b/ydb/core/tx/columnshard/engines/scheme/tier_info.cpp index ce84fe4c504..1672d7a97ac 100644 --- a/ydb/core/tx/columnshard/engines/scheme/tier_info.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/tier_info.cpp @@ -18,24 +18,4 @@ std::optional<TInstant> TTierInfo::ScalarToInstant(const std::shared_ptr<arrow:: } } -std::shared_ptr<NKikimr::NOlap::TTierInfo> TTiering::GetMainTierInfo() const { - auto ttl = Ttl; - auto tier = OrderedTiers.size() ? OrderedTiers.begin()->GetPtr() : nullptr; - if (!ttl && !tier) { - return nullptr; - } else if (!tier) { - return ttl; - } else if (!ttl) { - return tier; - } else { - const TDuration ttlDuration = ttl->GetEvictDuration(); - const TDuration tierDuration = tier->GetEvictDuration(); - if (tierDuration < ttlDuration) { - return tier; - } else { - return ttl; - } - } -} - } diff --git a/ydb/core/tx/columnshard/engines/scheme/tier_info.h b/ydb/core/tx/columnshard/engines/scheme/tier_info.h index 4ec4c3233bf..2838f28cd38 100644 --- a/ydb/core/tx/columnshard/engines/scheme/tier_info.h +++ b/ydb/core/tx/columnshard/engines/scheme/tier_info.h @@ -13,13 +13,12 @@ namespace NKikimr::NOlap { class TTierInfo { private: - TString Name; - TString EvictColumnName; - TDuration EvictDuration; - bool NeedExport = false; + YDB_READONLY_DEF(TString, Name); + YDB_READONLY_DEF(TString, EvictColumnName); + YDB_READONLY_DEF(TDuration, EvictDuration); ui32 TtlUnitsInSecond; - std::optional<NArrow::TCompression> Compression; + YDB_READONLY_DEF(std::optional<NArrow::TCompression>, Compression); public: TTierInfo(const TString& tierName, TDuration evictDuration, const TString& column, ui32 unitsInSecond = 0) : Name(tierName) @@ -31,43 +30,15 @@ public: Y_VERIFY(!!EvictColumnName); } - const TString& GetName() const { - return Name; - } - - const TString& GetEvictColumnName() const { - return EvictColumnName; - } - TInstant GetEvictInstant(const TInstant now) const { return now - EvictDuration; } - TDuration GetEvictDuration() const { - return EvictDuration; - } - - bool GetNeedExport() const { - return NeedExport; - } - - TTierInfo& SetNeedExport(const bool value) { - NeedExport = value; - return *this; - } - TTierInfo& SetCompression(const NArrow::TCompression& value) { Compression = value; return *this; } - const std::optional<NArrow::TCompression> GetCompression() const { - if (NeedExport) { - return {}; - } - return Compression; - } - std::shared_ptr<arrow::Field> GetEvictColumn(const std::shared_ptr<arrow::Schema>& schema) const { return schema->GetFieldByName(EvictColumnName); } @@ -131,8 +102,6 @@ class TTiering { TSet<TTierRef> OrderedTiers; public: - std::shared_ptr<TTierInfo> GetMainTierInfo() const; - std::shared_ptr<TTierInfo> Ttl; const TTiersMap& GetTierByName() const { @@ -164,24 +133,6 @@ public: return {}; } - std::optional<TInstant> ScalarToInstant(const std::shared_ptr<arrow::Scalar>& scalar) const { - auto mainTier = GetMainTierInfo(); - if (!mainTier) { - return {}; - } else { - return mainTier->ScalarToInstant(scalar); - } - } - - std::optional<TInstant> GetEvictInstant(const TInstant now) const { - auto mainTier = GetMainTierInfo(); - if (!mainTier) { - return {}; - } else { - return mainTier->GetEvictInstant(now); - } - } - std::optional<NArrow::TCompression> GetCompression(const TString& name) const { auto it = TierByName.find(name); if (it != TierByName.end()) { @@ -191,15 +142,6 @@ public: return {}; } - bool NeedExport(const TString& name) const { - auto it = TierByName.find(name); - if (it != TierByName.end()) { - Y_VERIFY(!name.empty()); - return it->second->GetNeedExport(); - } - return false; - } - THashSet<TString> GetTtlColumns() const { THashSet<TString> out; if (Ttl) { diff --git a/ydb/core/tx/columnshard/engines/storage/granule.cpp b/ydb/core/tx/columnshard/engines/storage/granule.cpp index 0bb0a5dbb39..1a13559ac9e 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule.cpp +++ b/ydb/core/tx/columnshard/engines/storage/granule.cpp @@ -63,6 +63,9 @@ void TGranuleMeta::AddColumnRecord(const TIndexInfo& indexInfo, const TPortionIn Y_VERIFY(it->second->IsEqualWithSnapshots(portion)); it->second->AddRecord(indexInfo, rec, portionMeta); } + if (portionMeta) { + it->second->InitOperator(Owner->GetStoragesManager()->InitializePortionOperator(*it->second), false); + } } void TGranuleMeta::OnAfterChangePortion(const std::shared_ptr<TPortionInfo> portionAfter) { @@ -158,7 +161,7 @@ TGranuleMeta::TGranuleMeta(const TGranuleRecord& rec, std::shared_ptr<TGranulesS , Record(rec) { Y_VERIFY(Owner); - OptimizerPlanner = std::make_shared<NStorageOptimizer::TIntervalsOptimizerPlanner>(rec.Granule); + OptimizerPlanner = std::make_shared<NStorageOptimizer::TIntervalsOptimizerPlanner>(rec.Granule, owner->GetStoragesManager()); } diff --git a/ydb/core/tx/columnshard/engines/storage/granule.h b/ydb/core/tx/columnshard/engines/storage/granule.h index 5053a4fcef7..955c2a83dd0 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule.h +++ b/ydb/core/tx/columnshard/engines/storage/granule.h @@ -273,16 +273,16 @@ public: const TPortionInfo& GetPortionVerified(const ui64 portion) const { auto it = Portions.find(portion); - Y_VERIFY(it != Portions.end()); + AFL_VERIFY(it != Portions.end())("portion_id", portion)("count", Portions.size()); return *it->second; } - const TPortionInfo* GetPortionPointer(const ui64 portion) const { + std::shared_ptr<TPortionInfo> GetPortionPtr(const ui64 portion) const { auto it = Portions.find(portion); if (it == Portions.end()) { return nullptr; } - return it->second.get(); + return it->second; } bool ErasePortion(const ui64 portion); diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/CMakeLists.darwin-x86_64.txt b/ydb/core/tx/columnshard/engines/storage/optimizer/CMakeLists.darwin-x86_64.txt index 1d702939ccd..7a3a568f8c9 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/CMakeLists.darwin-x86_64.txt +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/CMakeLists.darwin-x86_64.txt @@ -14,6 +14,7 @@ target_link_libraries(engines-storage-optimizer PUBLIC libs-apache-arrow ydb-core-protos core-formats-arrow + engines-changes-abstract ) target_sources(engines-storage-optimizer PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/storage/optimizer/optimizer.cpp diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/CMakeLists.linux-aarch64.txt b/ydb/core/tx/columnshard/engines/storage/optimizer/CMakeLists.linux-aarch64.txt index 8953a79b601..ef0e77c9397 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/CMakeLists.linux-aarch64.txt +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/CMakeLists.linux-aarch64.txt @@ -15,6 +15,7 @@ target_link_libraries(engines-storage-optimizer PUBLIC libs-apache-arrow ydb-core-protos core-formats-arrow + engines-changes-abstract ) target_sources(engines-storage-optimizer PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/storage/optimizer/optimizer.cpp diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/CMakeLists.linux-x86_64.txt b/ydb/core/tx/columnshard/engines/storage/optimizer/CMakeLists.linux-x86_64.txt index 8953a79b601..ef0e77c9397 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/CMakeLists.linux-x86_64.txt +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/CMakeLists.linux-x86_64.txt @@ -15,6 +15,7 @@ target_link_libraries(engines-storage-optimizer PUBLIC libs-apache-arrow ydb-core-protos core-formats-arrow + engines-changes-abstract ) target_sources(engines-storage-optimizer PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/storage/optimizer/optimizer.cpp diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/CMakeLists.windows-x86_64.txt b/ydb/core/tx/columnshard/engines/storage/optimizer/CMakeLists.windows-x86_64.txt index 1d702939ccd..7a3a568f8c9 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/CMakeLists.windows-x86_64.txt +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/CMakeLists.windows-x86_64.txt @@ -14,6 +14,7 @@ target_link_libraries(engines-storage-optimizer PUBLIC libs-apache-arrow ydb-core-protos core-formats-arrow + engines-changes-abstract ) target_sources(engines-storage-optimizer PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/storage/optimizer/optimizer.cpp diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/intervals_optimizer.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/intervals_optimizer.cpp index ebbb1a57257..f5016a83a34 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/intervals_optimizer.cpp +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/intervals_optimizer.cpp @@ -1,7 +1,8 @@ #include "intervals_optimizer.h" -#include <ydb/core/tx/columnshard/engines/changes/general_compaction.h> +#include <ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.h> #include <ydb/core/tx/columnshard/counters/common/owner.h> #include <ydb/core/tx/columnshard/counters/engine_logs.h> +#include <ydb/core/tx/columnshard/engines/changes/general_compaction.h> namespace NKikimr::NOlap::NStorageOptimizer { @@ -80,24 +81,32 @@ public: }; -std::shared_ptr<TColumnEngineChanges> TIntervalsOptimizerPlanner::GetSmallPortionsMergeTask(const TCompactionLimits& limits, std::shared_ptr<TGranuleMeta> granule) const { +std::shared_ptr<TColumnEngineChanges> TIntervalsOptimizerPlanner::GetSmallPortionsMergeTask(const TCompactionLimits& limits, std::shared_ptr<TGranuleMeta> granule, const THashSet<TPortionAddress>& busyPortions) const { if (SumSmall > (i64)LimitSmallBlobsMerge) { ui64 currentSum = 0; std::map<ui64, std::shared_ptr<TPortionInfo>> portions; + std::optional<TString> tierName; for (auto&& i : SmallBlobs) { for (auto&& c : i.second) { + if (busyPortions.contains(c.second->GetAddress())) { + return nullptr; + } + if (c.second->GetMeta().GetTierName() && (!tierName || *tierName < c.second->GetMeta().GetTierName())) { + tierName = c.second->GetMeta().GetTierName(); + } currentSum += c.second->RawBytesSum(); portions.emplace(c.first, c.second); } } AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "take_granule_with_small")("portions", portions.size())("current_sum", currentSum)("remained", SmallBlobs.size())("remained_size", SumSmall); - return std::make_shared<NCompaction::TGeneralCompactColumnEngineChanges>(limits, granule, portions); + TSaverContext saverContext(StoragesManager->GetOperator(tierName.value_or(IStoragesManager::DefaultStorageId)), StoragesManager); + return std::make_shared<NCompaction::TGeneralCompactColumnEngineChanges>(limits, granule, portions, saverContext); } return nullptr; } std::shared_ptr<TColumnEngineChanges> TIntervalsOptimizerPlanner::DoGetOptimizationTask(const TCompactionLimits& limits, std::shared_ptr<TGranuleMeta> granule, const THashSet<TPortionAddress>& busyPortions) const { - if (auto result = GetSmallPortionsMergeTask(limits, granule)) { + if (auto result = GetSmallPortionsMergeTask(limits, granule, busyPortions)) { return result; } if (RangedSegments.empty()) { @@ -165,7 +174,11 @@ std::shared_ptr<TColumnEngineChanges> TIntervalsOptimizerPlanner::DoGetOptimizat return nullptr; } + std::optional<TString> tierName; for (auto&& i : features.GetSummaryPortions()) { + if (i.second->GetMeta().GetTierName() && (!tierName || *tierName < i.second->GetMeta().GetTierName())) { + tierName = i.second->GetMeta().GetTierName(); + } if (busyPortions.contains(i.second->GetAddress())) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "take_granule_skip")("features", features.DebugJson().GetStringRobust()) ("count", features.GetSummaryPortions().size())("reason", "busy_portion")("portion_address", i.second->GetAddress().DebugString()); @@ -173,7 +186,9 @@ std::shared_ptr<TColumnEngineChanges> TIntervalsOptimizerPlanner::DoGetOptimizat } } AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "take_granule")("features", features.DebugJson().GetStringRobust())("count", features.GetSummaryPortions().size()); - return std::make_shared<NCompaction::TGeneralCompactColumnEngineChanges>(limits, granule, features.GetSummaryPortions()); + + TSaverContext saverContext(StoragesManager->GetOperator(tierName.value_or(IStoragesManager::DefaultStorageId)), StoragesManager); + return std::make_shared<NCompaction::TGeneralCompactColumnEngineChanges>(limits, granule, features.GetSummaryPortions(), saverContext); } bool TIntervalsOptimizerPlanner::RemoveSmallPortion(const std::shared_ptr<TPortionInfo>& info) { @@ -275,8 +290,10 @@ void TIntervalsOptimizerPlanner::AddRanged(const TBorderPositions& data) { } } -TIntervalsOptimizerPlanner::TIntervalsOptimizerPlanner(const ui64 granuleId) - : TBase(granuleId) { +TIntervalsOptimizerPlanner::TIntervalsOptimizerPlanner(const ui64 granuleId, const std::shared_ptr<IStoragesManager>& storagesManager) + : TBase(granuleId) + , StoragesManager(storagesManager) +{ Counters = std::make_shared<TCounters>(); } diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/intervals_optimizer.h b/ydb/core/tx/columnshard/engines/storage/optimizer/intervals_optimizer.h index ea302c33962..1977f256138 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/intervals_optimizer.h +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/intervals_optimizer.h @@ -3,6 +3,8 @@ #include <ydb/core/formats/arrow/replace_key.h> #include <ydb/library/accessor/accessor.h> #include <ydb/core/tx/columnshard/splitter/settings.h> +#include <ydb/core/tx/columnshard/blobs_action/abstract/write.h> +#include <ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.h> namespace NKikimr::NOlap::NStorageOptimizer { @@ -167,6 +169,7 @@ class TIntervalsOptimizerPlanner: public IOptimizerPlanner { private: static ui64 LimitSmallBlobsMerge; static ui64 LimitSmallBlobDetect; + std::shared_ptr<IStoragesManager> StoragesManager; std::shared_ptr<TCounters> Counters; @@ -255,7 +258,7 @@ private: bool AddSmallPortion(const std::shared_ptr<TPortionInfo>& info); - std::shared_ptr<TColumnEngineChanges> GetSmallPortionsMergeTask(const TCompactionLimits& limits, std::shared_ptr<TGranuleMeta> granule) const; + std::shared_ptr<TColumnEngineChanges> GetSmallPortionsMergeTask(const TCompactionLimits& limits, std::shared_ptr<TGranuleMeta> granule, const THashSet<TPortionAddress>& busyPortions) const; protected: virtual void DoAddPortion(const std::shared_ptr<TPortionInfo>& info) override; @@ -267,7 +270,7 @@ protected: virtual TString DoDebugString() const override; public: - TIntervalsOptimizerPlanner(const ui64 granuleId); + TIntervalsOptimizerPlanner(const ui64 granuleId, const std::shared_ptr<IStoragesManager>& storagesManager); }; } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/optimizer.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/optimizer.cpp index b9382c8f316..d3e82f8b210 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/optimizer.cpp +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/optimizer.cpp @@ -1,5 +1,18 @@ #include "optimizer.h" +#include <ydb/core/tx/columnshard/engines/changes/abstract/abstract.h> namespace NKikimr::NOlap::NStorageOptimizer { +std::shared_ptr<TColumnEngineChanges> IOptimizerPlanner::GetOptimizationTask(const TCompactionLimits& limits, std::shared_ptr<TGranuleMeta> granule, const THashSet<TPortionAddress>& busyPortions) const { + NActors::TLogContextGuard g(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("granule_id", GranuleId)); + auto result = DoGetOptimizationTask(limits, granule, busyPortions); + if (!!result) { + auto portions = result->GetTouchedPortions(); + for (auto&& i : portions) { + AFL_VERIFY(!busyPortions.contains(i))("portion_address", i.DebugString()); + } + } + return result; +} + } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/optimizer.h b/ydb/core/tx/columnshard/engines/storage/optimizer/optimizer.h index e834073b9fb..9638b6836fe 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/optimizer.h +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/optimizer.h @@ -50,10 +50,7 @@ public: NActors::TLogContextGuard g(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("granule_id", GranuleId)); DoRemovePortion(info); } - std::shared_ptr<TColumnEngineChanges> GetOptimizationTask(const TCompactionLimits& limits, std::shared_ptr<TGranuleMeta> granule, const THashSet<TPortionAddress>& busyPortions) const { - NActors::TLogContextGuard g(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("granule_id", GranuleId)); - return DoGetOptimizationTask(limits, granule, busyPortions); - } + std::shared_ptr<TColumnEngineChanges> GetOptimizationTask(const TCompactionLimits& limits, std::shared_ptr<TGranuleMeta> granule, const THashSet<TPortionAddress>& busyPortions) const; i64 GetUsefulMetric() const { return DoGetUsefulMetric(); } diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/ya.make b/ydb/core/tx/columnshard/engines/storage/optimizer/ya.make index 99e60e419e2..15879284dca 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/ya.make +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/ya.make @@ -9,6 +9,7 @@ PEERDIR( contrib/libs/apache/arrow ydb/core/protos ydb/core/formats/arrow + ydb/core/tx/columnshard/engines/changes/abstract ) END() diff --git a/ydb/core/tx/columnshard/engines/storage/storage.h b/ydb/core/tx/columnshard/engines/storage/storage.h index 38feb816fea..ed78f7ce3ec 100644 --- a/ydb/core/tx/columnshard/engines/storage/storage.h +++ b/ydb/core/tx/columnshard/engines/storage/storage.h @@ -1,6 +1,8 @@ #pragma once #include "granule.h" #include <ydb/core/tx/columnshard/counters/engine_logs.h> +#include <ydb/core/tx/columnshard/blobs_action/abstract/storage.h> +#include <ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.h> namespace NKikimr::NOlap { @@ -10,6 +12,7 @@ private: const NColumnShard::TEngineLogsCounters Counters; THashMap<ui64, TCompactionPriority> GranulesCompactionPriority; std::map<TCompactionPriority, std::set<ui64>> GranuleCompactionPrioritySorting; + std::shared_ptr<IStoragesManager> StoragesManager; bool PackModificationFlag = false; THashMap<ui64, const TGranuleMeta*> PackModifiedGranules; void StartModificationImpl() { @@ -27,12 +30,18 @@ private: } public: - TGranulesStorage(const NColumnShard::TEngineLogsCounters counters, const TCompactionLimits& limits) + TGranulesStorage(const NColumnShard::TEngineLogsCounters counters, const TCompactionLimits& limits, const std::shared_ptr<IStoragesManager>& storagesManager) : Limits(limits) - , Counters(counters) { + , Counters(counters) + , StoragesManager(storagesManager) + { } + const std::shared_ptr<IStoragesManager>& GetStoragesManager() const { + return StoragesManager; + } + const NColumnShard::TEngineLogsCounters& GetCounters() const { return Counters; } diff --git a/ydb/core/tx/columnshard/engines/ut_logs_engine.cpp b/ydb/core/tx/columnshard/engines/ut_logs_engine.cpp index a4cf3738120..07d2cdfe9f4 100644 --- a/ydb/core/tx/columnshard/engines/ut_logs_engine.cpp +++ b/ydb/core/tx/columnshard/engines/ut_logs_engine.cpp @@ -1,9 +1,11 @@ #include <library/cpp/testing/unittest/registar.h> #include "column_engine_logs.h" #include "predicate/predicate.h" +#include "changes/cleanup.h" #include <ydb/core/tx/columnshard/columnshard_ut_common.h> #include <ydb/core/tx/columnshard/engines/changes/compaction.h> +#include <ydb/core/tx/columnshard/blobs_action/bs/storage.h> namespace NKikimr { @@ -358,7 +360,10 @@ bool Compact(TColumnEngineForLogs& engine, TTestDbWrapper& db, TSnapshot snap, T bool Cleanup(TColumnEngineForLogs& engine, TTestDbWrapper& db, TSnapshot snap, ui32 expectedToDrop) { THashSet<ui64> pathsToDrop; std::shared_ptr<TCleanupColumnEngineChanges> changes = engine.StartCleanup(snap, pathsToDrop, 1000); - UNIT_ASSERT(changes); + UNIT_ASSERT(changes || !expectedToDrop); + if (!expectedToDrop && !changes) { + return true; + } UNIT_ASSERT_VALUES_EQUAL(changes->PortionsToDrop.size(), expectedToDrop); @@ -372,7 +377,7 @@ bool Ttl(TColumnEngineForLogs& engine, TTestDbWrapper& db, const THashMap<ui64, NOlap::TTiering>& pathEviction, ui32 expectedToDrop) { std::shared_ptr<TTTLColumnEngineChanges> changes = engine.StartTtl(pathEviction, {}); UNIT_ASSERT(changes); - UNIT_ASSERT_VALUES_EQUAL(changes->PortionsToDrop.size(), expectedToDrop); + UNIT_ASSERT_VALUES_EQUAL(changes->PortionsToRemove.size(), expectedToDrop); changes->StartEmergency(); @@ -399,6 +404,20 @@ std::shared_ptr<TPredicate> MakeStrPredicate(const std::string& key, NArrow::EOp } // namespace +class TTestStoragesManager: public NOlap::IStoragesManager { +private: + using TBase = NOlap::IStoragesManager; + TIntrusivePtr<TTabletStorageInfo> TabletInfo = new TTabletStorageInfo(); +protected: + virtual std::shared_ptr<NOlap::IBlobsStorageOperator> DoBuildOperator(const TString& storageId) override { + if (storageId == TBase::DefaultStorageId) { + return std::make_shared<NOlap::NBlobOperations::NBlobStorage::TOperator>(storageId, NActors::TActorId(), TabletInfo, 1); + } else + return nullptr; + } +}; + +std::shared_ptr<NKikimr::NOlap::IStoragesManager> CommonStoragesManager = std::make_shared<TTestStoragesManager>(); Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { void WriteLoadRead(const std::vector<std::pair<TString, TTypeInfo>>& ydbSchema, @@ -416,7 +435,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { // PlanStep, TxId, PathId, DedupId, BlobId, Data, [Metadata] // load - TColumnEngineForLogs engine(0); + TColumnEngineForLogs engine(0, TestLimits(), CommonStoragesManager); TSnapshot indexSnaphot(1, 1); engine.UpdateDefaultSchema(indexSnaphot, TIndexInfo(tableInfo)); THashSet<TUnifiedBlobId> lostBlobs; @@ -497,7 +516,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { TIndexInfo tableInfo = NColumnShard::BuildTableInfo(ydbSchema, key); TSnapshot indexSnapshot(1, 1); - TColumnEngineForLogs engine(0, TestLimits()); + TColumnEngineForLogs engine(0, TestLimits(), CommonStoragesManager); engine.UpdateDefaultSchema(indexSnapshot, TIndexInfo(tableInfo)); THashSet<TUnifiedBlobId> lostBlobs; engine.Load(db, lostBlobs); @@ -598,7 +617,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { // inserts ui64 planStep = 1; - TColumnEngineForLogs engine(0, TestLimits()); + TColumnEngineForLogs engine(0, TestLimits(), CommonStoragesManager); TSnapshot indexSnapshot(1, 1); engine.UpdateDefaultSchema(indexSnapshot, TIndexInfo(tableInfo)); THashSet<TUnifiedBlobId> lostBlobs; @@ -622,7 +641,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { } { // check it's overloaded after reload - TColumnEngineForLogs tmpEngine(0, TestLimits()); + TColumnEngineForLogs tmpEngine(0, TestLimits(), CommonStoragesManager); tmpEngine.UpdateDefaultSchema(TSnapshot::Zero(), TIndexInfo(tableInfo)); tmpEngine.Load(db, lostBlobs); } @@ -651,7 +670,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { } { // check it's not overloaded after reload - TColumnEngineForLogs tmpEngine(0, TestLimits()); + TColumnEngineForLogs tmpEngine(0, TestLimits(), CommonStoragesManager); tmpEngine.UpdateDefaultSchema(TSnapshot::Zero(), TIndexInfo(tableInfo)); tmpEngine.Load(db, lostBlobs); } @@ -668,7 +687,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { ui64 planStep = 1; TSnapshot indexSnapshot(1, 1); - TColumnEngineForLogs engine(0, TestLimits()); + TColumnEngineForLogs engine(0, TestLimits(), CommonStoragesManager); engine.UpdateDefaultSchema(indexSnapshot, TIndexInfo(tableInfo)); THashSet<TUnifiedBlobId> lostBlobs; engine.Load(db, lostBlobs); diff --git a/ydb/core/tx/columnshard/engines/writer/blob_constructor.h b/ydb/core/tx/columnshard/engines/writer/blob_constructor.h index cadaf2ca2d8..73157e33bfa 100644 --- a/ydb/core/tx/columnshard/engines/writer/blob_constructor.h +++ b/ydb/core/tx/columnshard/engines/writer/blob_constructor.h @@ -1,7 +1,7 @@ #pragma once #include <ydb/core/protos/base.pb.h> -#include <ydb/core/tx/columnshard/blobs_action/abstract.h> +#include <ydb/core/tx/columnshard/blobs_action/abstract/write.h> #include <ydb/library/accessor/accessor.h> #include <library/cpp/actors/core/event.h> @@ -19,22 +19,20 @@ struct TUsage; namespace NOlap { -class TUnifiedBlobId; - class TBlobWriteInfo { private: YDB_READONLY_DEF(TUnifiedBlobId, BlobId); YDB_READONLY_DEF(TString, Data); - YDB_READONLY_DEF(std::shared_ptr<IBlobsAction>, WriteOperator); + YDB_READONLY_DEF(std::shared_ptr<IBlobsWritingAction>, WriteOperator); - TBlobWriteInfo(const TString& data, const std::shared_ptr<IBlobsAction>& writeOperator) + TBlobWriteInfo(const TString& data, const std::shared_ptr<IBlobsWritingAction>& writeOperator) : Data(data) , WriteOperator(writeOperator) { Y_VERIFY(WriteOperator); - BlobId = WriteOperator->AllocateNextBlobId(data); + BlobId = WriteOperator->AddDataForWrite(data); } public: - static TBlobWriteInfo BuildWriteTask(const TString& data, const std::shared_ptr<IBlobsAction>& writeOperator) { + static TBlobWriteInfo BuildWriteTask(const TString& data, const std::shared_ptr<IBlobsWritingAction>& writeOperator) { return TBlobWriteInfo(data, writeOperator); } }; diff --git a/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.cpp b/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.cpp index 974909dd4c2..6af7650f125 100644 --- a/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.cpp +++ b/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.cpp @@ -2,39 +2,39 @@ #include <ydb/core/tx/columnshard/defs.h> #include <ydb/core/tx/columnshard/blob.h> - +#include <ydb/core/tx/columnshard/engines/changes/abstract/abstract.h> namespace NKikimr::NOlap { -std::optional<TBlobWriteInfo> TCompactedWriteController::Next() { +TCompactedWriteController::TCompactedWriteController(const TActorId& dstActor, TAutoPtr<NColumnShard::TEvPrivate::TEvWriteIndex> writeEv) + : WriteIndexEv(writeEv) + , DstActor(dstActor) +{ auto& changes = *WriteIndexEv->IndexChanges; - while (CurrentPortion < changes.GetWritePortionsCount()) { - auto* pInfo = changes.GetWritePortionInfo(CurrentPortion); + for (ui32 i = 0; i < changes.GetWritePortionsCount(); ++i) { + if (!changes.NeedWritePortion(i)) { + continue; + } + auto* pInfo = changes.GetWritePortionInfo(i); Y_VERIFY(pInfo); TPortionInfoWithBlobs& portionWithBlobs = *pInfo; - if (CurrentBlobIndex < portionWithBlobs.GetBlobs().size() && changes.NeedWritePortion(CurrentPortion)) { - CurrentBlobInfo = &portionWithBlobs.GetBlobs()[CurrentBlobIndex]; - ++CurrentBlobIndex; - auto result = TBlobWriteInfo::BuildWriteTask(CurrentBlobInfo->GetBlob(), WriteIndexEv->BlobsAction); - CurrentBlobInfo->RegisterBlobId(portionWithBlobs, result.GetBlobId()); - return result; - } else { - ++CurrentPortion; - CurrentBlobIndex = 0; + auto action = changes.GetBlobsAction().GetWriting(portionWithBlobs.GetPortionInfo()); + for (auto&& b : portionWithBlobs.GetBlobs()) { + auto& task = AddWriteTask(TBlobWriteInfo::BuildWriteTask(b.GetBlob(), action)); + b.RegisterBlobId(portionWithBlobs, task.GetBlobId()); } } - return {}; } -TCompactedWriteController::TCompactedWriteController(const TActorId& dstActor, TAutoPtr<NColumnShard::TEvPrivate::TEvWriteIndex> writeEv, bool /*blobGrouppingEnabled*/) - : WriteIndexEv(writeEv) - , Action(WriteIndexEv->BlobsAction) - , DstActor(dstActor) -{} - void TCompactedWriteController::DoOnReadyResult(const NActors::TActorContext& ctx, const NColumnShard::TBlobPutResult::TPtr& putResult) { WriteIndexEv->PutResult = putResult; ctx.Send(DstActor, WriteIndexEv.Release()); } +TCompactedWriteController::~TCompactedWriteController() { + if (WriteIndexEv && WriteIndexEv->IndexChanges) { + WriteIndexEv->IndexChanges->AbortEmergency(); + } +} + } diff --git a/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.h b/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.h index bfe1d986a31..36a5d9a34b4 100644 --- a/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.h +++ b/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.h @@ -5,40 +5,19 @@ #include <ydb/core/tx/columnshard/columnshard.h> #include <ydb/core/tx/columnshard/columnshard_private_events.h> +#include <ydb/core/tx/columnshard/engines/portions/with_blobs.h> namespace NKikimr::NOlap { class TCompactedWriteController : public NColumnShard::IWriteController { private: - ui64 CurrentPortion = 0; - ui64 CurrentBlobIndex = 0; - TPortionInfoWithBlobs::TBlobInfo* CurrentBlobInfo = nullptr; - TAutoPtr<NColumnShard::TEvPrivate::TEvWriteIndex> WriteIndexEv; - std::shared_ptr<IBlobsAction> Action; TActorId DstActor; protected: void DoOnReadyResult(const NActors::TActorContext& ctx, const NColumnShard::TBlobPutResult::TPtr& putResult) override; - virtual bool IsBlobActionsReady() const override { - return Action->IsReady(); - } public: - TCompactedWriteController(const TActorId& dstActor, TAutoPtr<NColumnShard::TEvPrivate::TEvWriteIndex> writeEv, bool blobGrouppingEnabled); - ~TCompactedWriteController() { - if (WriteIndexEv && WriteIndexEv->IndexChanges) { - WriteIndexEv->IndexChanges->AbortEmergency(); - } - } - - virtual std::vector<std::shared_ptr<IBlobsAction>> GetBlobActions() const override { - return {Action}; - } - - virtual std::optional<TBlobWriteInfo> Next() override; - - virtual void OnBlobWriteResult(const TEvBlobStorage::TEvPutResult& result) override { - Action->OnBlobWriteResult(result.Id, result.Status); - } + TCompactedWriteController(const TActorId& dstActor, TAutoPtr<NColumnShard::TEvPrivate::TEvWriteIndex> writeEv); + ~TCompactedWriteController(); }; } diff --git a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp index c27df8fd4e6..e4b8b122ae2 100644 --- a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp +++ b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp @@ -6,29 +6,22 @@ namespace NKikimr::NOlap { -std::optional<TBlobWriteInfo> TIndexedWriteController::Next() { - if (CurrentIndex == BlobsSplitted.size()) { - return {}; - } - CurrentIndex++; - auto& bInfo = BlobsSplitted[CurrentIndex - 1]; - auto result = TBlobWriteInfo::BuildWriteTask(bInfo.GetData(), Action); - BlobData.emplace_back(TBlobRange(result.GetBlobId(), 0, result.GetBlobId().BlobSize()), bInfo.GetSpecialKeys(), bInfo.GetRowsCount(), bInfo.GetRawBytes(), AppData()->TimeProvider->Now()); - return result; -} - -TIndexedWriteController::TIndexedWriteController(const TActorId& dstActor, const NEvWrite::TWriteData& writeData, const std::shared_ptr<IBlobsAction>& action, std::vector<NArrow::TSerializedBatch>&& blobsSplitted) +TIndexedWriteController::TIndexedWriteController(const TActorId& dstActor, const NEvWrite::TWriteData& writeData, const std::shared_ptr<IBlobsWritingAction>& action, std::vector<NArrow::TSerializedBatch>&& blobsSplitted) : BlobsSplitted(std::move(blobsSplitted)) , WriteData(writeData) , DstActor(dstActor) , Action(action) { + for (auto&& bInfo : BlobsSplitted) { + auto& task = AddWriteTask(TBlobWriteInfo::BuildWriteTask(bInfo.GetData(), Action)); + BlobData.emplace_back(TBlobRange::FromBlobId(task.GetBlobId()), bInfo.GetSpecialKeys(), bInfo.GetRowsCount(), bInfo.GetRawBytes(), AppData()->TimeProvider->Now()); + } ResourceUsage.SourceMemorySize = WriteData.GetSize(); } void TIndexedWriteController::DoOnReadyResult(const NActors::TActorContext& ctx, const NColumnShard::TBlobPutResult::TPtr& putResult) { if (putResult->GetPutStatus() == NKikimrProto::OK) { - std::vector<std::shared_ptr<IBlobsAction>> actions = {Action}; + std::vector<std::shared_ptr<IBlobsWritingAction>> actions = {Action}; auto result = std::make_unique<NColumnShard::TEvPrivate::TEvWriteBlobsResult>(putResult, std::move(BlobData), actions, WriteData.GetWriteMeta(), WriteData.GetData().GetSchemaVersion()); ctx.Send(DstActor, result.release()); } else { diff --git a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h index e2be02ece5b..c9bac60f4b4 100644 --- a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h +++ b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h @@ -4,7 +4,7 @@ #include "write_controller.h" #include <ydb/core/tx/ev_write/write_data.h> -#include <ydb/core/tx/columnshard/blobs_action/abstract.h> +#include <ydb/core/tx/columnshard/blobs_action/abstract/write.h> #include <ydb/core/tx/columnshard/engines/portion_info.h> #include <ydb/core/tx/columnshard/columnshard.h> #include <ydb/core/tx/columnshard/columnshard_private_events.h> @@ -15,29 +15,15 @@ namespace NKikimr::NOlap { class TIndexedWriteController : public NColumnShard::IWriteController { private: - virtual bool IsBlobActionsReady() const override { - return Action->IsReady(); - } - - ui64 CurrentIndex = 0; std::vector<NArrow::TSerializedBatch> BlobsSplitted; NEvWrite::TWriteData WriteData; TVector<NColumnShard::TEvPrivate::TEvWriteBlobsResult::TPutBlobData> BlobData; TActorId DstActor; - std::shared_ptr<IBlobsAction> Action; + std::shared_ptr<IBlobsWritingAction> Action; void DoOnReadyResult(const NActors::TActorContext& ctx, const NColumnShard::TBlobPutResult::TPtr& putResult) override; public: - virtual std::vector<std::shared_ptr<IBlobsAction>> GetBlobActions() const override { - return {Action}; - } - - virtual void OnBlobWriteResult(const TEvBlobStorage::TEvPutResult& result) override { - Action->OnBlobWriteResult(result.Id, result.Status); - } - - TIndexedWriteController(const TActorId& dstActor, const NEvWrite::TWriteData& writeData, const std::shared_ptr<IBlobsAction>& action, std::vector<NArrow::TSerializedBatch>&& blobsSplitted); + TIndexedWriteController(const TActorId& dstActor, const NEvWrite::TWriteData& writeData, const std::shared_ptr<IBlobsWritingAction>& action, std::vector<NArrow::TSerializedBatch>&& blobsSplitted); - virtual std::optional<TBlobWriteInfo> Next() override; }; } diff --git a/ydb/core/tx/columnshard/engines/writer/write_controller.h b/ydb/core/tx/columnshard/engines/writer/write_controller.h index ff7730600a9..38c7dda0f3e 100644 --- a/ydb/core/tx/columnshard/engines/writer/write_controller.h +++ b/ydb/core/tx/columnshard/engines/writer/write_controller.h @@ -6,7 +6,7 @@ #include <library/cpp/actors/core/actor.h> #include <ydb/core/tx/columnshard/blob_manager.h> #include <ydb/core/tx/columnshard/defs.h> -#include <ydb/core/tx/columnshard/blobs_action/abstract.h> +#include <ydb/core/tx/columnshard/blobs_action/abstract/write.h> namespace NKikimr::NColumnShard { @@ -41,7 +41,29 @@ private: }; class IWriteController { +private: + THashMap<TUnifiedBlobId, std::shared_ptr<NOlap::IBlobsWritingAction>> BlobActions; + THashMap<i64, std::shared_ptr<NOlap::IBlobsWritingAction>> WritingActions; + std::deque<NOlap::TBlobWriteInfo> WriteTasks; +protected: + TUsage ResourceUsage; + virtual void DoOnReadyResult(const NActors::TActorContext& ctx, const TBlobPutResult::TPtr& putResult) = 0; + virtual void DoOnBlobWriteResult(const TEvBlobStorage::TEvPutResult& /*result*/) { + + } + + NOlap::TBlobWriteInfo& AddWriteTask(NOlap::TBlobWriteInfo&& task) { + WritingActions.emplace(task.GetWriteOperator()->GetActionId(), task.GetWriteOperator()); + WriteTasks.emplace_back(std::move(task)); + return WriteTasks.back(); + } public: + void Abort() { + for (auto&& i : WritingActions) { + i.second->Abort(); + } + } + using TPtr = std::shared_ptr<IWriteController>; virtual ~IWriteController() {} @@ -50,14 +72,35 @@ public: DoOnReadyResult(ctx, putResult); } - virtual void OnBlobWriteResult(const TEvBlobStorage::TEvPutResult& result) = 0; - virtual std::optional<NOlap::TBlobWriteInfo> Next() = 0; - virtual bool IsBlobActionsReady() const = 0; - virtual std::vector<std::shared_ptr<NOlap::IBlobsAction>> GetBlobActions() const = 0; -private: - virtual void DoOnReadyResult(const NActors::TActorContext& ctx, const TBlobPutResult::TPtr& putResult) = 0; -protected: - TUsage ResourceUsage; + void OnBlobWriteResult(const TEvBlobStorage::TEvPutResult& result) { + TUnifiedBlobId blobId(result.GroupId, result.Id); + auto it = BlobActions.find(blobId); + AFL_VERIFY(it != BlobActions.end()); + it->second->OnBlobWriteResult(blobId, result.Status); + BlobActions.erase(it); + DoOnBlobWriteResult(result); + } + + std::optional<NOlap::TBlobWriteInfo> Next() { + if (WriteTasks.empty()) { + return {}; + } + auto result = std::move(WriteTasks.front()); + WriteTasks.pop_front(); + BlobActions.emplace(result.GetBlobId(), result.GetWriteOperator()); + return result; + + } + bool IsBlobActionsReady() const { + return BlobActions.empty(); + } + std::vector<std::shared_ptr<NOlap::IBlobsWritingAction>> GetBlobActions() const { + std::vector<std::shared_ptr<NOlap::IBlobsWritingAction>> actions; + for (auto&& i : WritingActions) { + actions.emplace_back(i.second); + } + return actions; + } }; } diff --git a/ydb/core/tx/columnshard/hooks/abstract/CMakeLists.darwin-x86_64.txt b/ydb/core/tx/columnshard/hooks/abstract/CMakeLists.darwin-x86_64.txt index 37005df7b95..83c3aaa02b8 100644 --- a/ydb/core/tx/columnshard/hooks/abstract/CMakeLists.darwin-x86_64.txt +++ b/ydb/core/tx/columnshard/hooks/abstract/CMakeLists.darwin-x86_64.txt @@ -11,6 +11,7 @@ add_library(columnshard-hooks-abstract) target_link_libraries(columnshard-hooks-abstract PUBLIC contrib-libs-cxxsupp yutil + core-tx-tiering ) target_sources(columnshard-hooks-abstract PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/hooks/abstract/abstract.cpp diff --git a/ydb/core/tx/columnshard/hooks/abstract/CMakeLists.linux-aarch64.txt b/ydb/core/tx/columnshard/hooks/abstract/CMakeLists.linux-aarch64.txt index e2f9700cade..1b0fdf350ed 100644 --- a/ydb/core/tx/columnshard/hooks/abstract/CMakeLists.linux-aarch64.txt +++ b/ydb/core/tx/columnshard/hooks/abstract/CMakeLists.linux-aarch64.txt @@ -12,6 +12,7 @@ target_link_libraries(columnshard-hooks-abstract PUBLIC contrib-libs-linux-headers contrib-libs-cxxsupp yutil + core-tx-tiering ) target_sources(columnshard-hooks-abstract PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/hooks/abstract/abstract.cpp diff --git a/ydb/core/tx/columnshard/hooks/abstract/CMakeLists.linux-x86_64.txt b/ydb/core/tx/columnshard/hooks/abstract/CMakeLists.linux-x86_64.txt index e2f9700cade..1b0fdf350ed 100644 --- a/ydb/core/tx/columnshard/hooks/abstract/CMakeLists.linux-x86_64.txt +++ b/ydb/core/tx/columnshard/hooks/abstract/CMakeLists.linux-x86_64.txt @@ -12,6 +12,7 @@ target_link_libraries(columnshard-hooks-abstract PUBLIC contrib-libs-linux-headers contrib-libs-cxxsupp yutil + core-tx-tiering ) target_sources(columnshard-hooks-abstract PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/hooks/abstract/abstract.cpp diff --git a/ydb/core/tx/columnshard/hooks/abstract/CMakeLists.windows-x86_64.txt b/ydb/core/tx/columnshard/hooks/abstract/CMakeLists.windows-x86_64.txt index 37005df7b95..83c3aaa02b8 100644 --- a/ydb/core/tx/columnshard/hooks/abstract/CMakeLists.windows-x86_64.txt +++ b/ydb/core/tx/columnshard/hooks/abstract/CMakeLists.windows-x86_64.txt @@ -11,6 +11,7 @@ add_library(columnshard-hooks-abstract) target_link_libraries(columnshard-hooks-abstract PUBLIC contrib-libs-cxxsupp yutil + core-tx-tiering ) target_sources(columnshard-hooks-abstract PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/hooks/abstract/abstract.cpp diff --git a/ydb/core/tx/columnshard/hooks/abstract/abstract.h b/ydb/core/tx/columnshard/hooks/abstract/abstract.h index c812fbdd5d3..28070c6b002 100644 --- a/ydb/core/tx/columnshard/hooks/abstract/abstract.h +++ b/ydb/core/tx/columnshard/hooks/abstract/abstract.h @@ -1,12 +1,21 @@ #pragma once +#include <ydb/services/metadata/abstract/fetcher.h> +#include <ydb/core/tx/tiering/snapshot.h> + #include <ydb/library/accessor/accessor.h> #include <util/generic/singleton.h> #include <util/generic/refcount.h> +#include <util/datetime/base.h> #include <memory> namespace NKikimr::NOlap::NIndexedReader { class IOrderPolicy; } + +namespace NKikimr::NColumnShard { +class TTiersManager; +} + namespace NKikimr::NOlap { class TColumnEngineChanges; } @@ -29,6 +38,13 @@ protected: virtual bool DoOnStartCompaction(std::shared_ptr<NOlap::TColumnEngineChanges>& /*changes*/) { return true; } + virtual bool DoOnWriteIndexComplete(const ui64 /*tabletId*/, const TString& /*changeClassName*/) { + return true; + } + virtual bool DoOnWriteIndexStart(const ui64 /*tabletId*/, const TString& /*changeClassName*/) { + return true; + } + public: using TPtr = std::shared_ptr<ICSController>; virtual ~ICSController() = default; @@ -39,9 +55,25 @@ public: bool OnAfterFilterAssembling(const std::shared_ptr<arrow::RecordBatch>& batch) { return DoOnAfterFilterAssembling(batch); } + bool OnWriteIndexComplete(const ui64 tabletId, const TString& changeClassName) { + return DoOnWriteIndexComplete(tabletId, changeClassName); + } + bool OnWriteIndexStart(const ui64 tabletId, const TString& changeClassName) { + return DoOnWriteIndexStart(tabletId, changeClassName); + } bool OnStartCompaction(std::shared_ptr<NOlap::TColumnEngineChanges>& changes) { return DoOnStartCompaction(changes); } + virtual TDuration GetTTLDefaultWaitingDuration(const TDuration defaultValue) const { + return defaultValue; + } + virtual void OnTieringModified(const std::shared_ptr<NColumnShard::TTiersManager>& /*tiers*/) { + } + + virtual NMetadata::NFetcher::ISnapshot::TPtr GetFallbackTiersSnapshot() const { + static std::shared_ptr<NColumnShard::NTiers::TConfigsSnapshot> result = std::make_shared<NColumnShard::NTiers::TConfigsSnapshot>(TInstant::Now()); + return result; + } }; class TControllers { diff --git a/ydb/core/tx/columnshard/hooks/abstract/ya.make b/ydb/core/tx/columnshard/hooks/abstract/ya.make index 954db2fb2dc..1fc805cb1b4 100644 --- a/ydb/core/tx/columnshard/hooks/abstract/ya.make +++ b/ydb/core/tx/columnshard/hooks/abstract/ya.make @@ -5,6 +5,7 @@ SRCS( ) PEERDIR( + ydb/core/tx/tiering ) END() diff --git a/ydb/core/tx/columnshard/inflight_request_tracker.h b/ydb/core/tx/columnshard/inflight_request_tracker.h index eca4f203d55..eeed48addaa 100644 --- a/ydb/core/tx/columnshard/inflight_request_tracker.h +++ b/ydb/core/tx/columnshard/inflight_request_tracker.h @@ -11,29 +11,26 @@ using NOlap::IBlobInUseTracker; class TInFlightReadsTracker { public: // Returns a unique cookie associated with this request - ui64 AddInFlightRequest(NOlap::TReadMetadataBase::TConstPtr readMeta, IBlobInUseTracker& blobTracker) { + ui64 AddInFlightRequest(NOlap::TReadMetadataBase::TConstPtr readMeta) { const ui64 cookie = NextCookie++; - AddToInFlightRequest(cookie, readMeta, blobTracker); + AddToInFlightRequest(cookie, readMeta); return cookie; } // Returns a unique cookie associated with this request template <class TReadMetadataList> - ui64 AddInFlightRequest(const TReadMetadataList& readMetaList, IBlobInUseTracker& blobTracker) { + ui64 AddInFlightRequest(const TReadMetadataList& readMetaList) { const ui64 cookie = NextCookie++; for (const auto& readMetaPtr : readMetaList) { - AddToInFlightRequest(cookie, readMetaPtr, blobTracker); + AddToInFlightRequest(cookie, readMetaPtr); } return cookie; } - // Forget completed request - THashSet<NOlap::TUnifiedBlobId> RemoveInFlightRequest(ui64 cookie, IBlobInUseTracker& blobTracker) { + void RemoveInFlightRequest(ui64 cookie) { Y_VERIFY(RequestsMeta.contains(cookie), "Unknown request cookie %" PRIu64, cookie); const auto& readMetaList = RequestsMeta[cookie]; - THashSet<NOlap::TUnifiedBlobId> freedBlobs; - for (const auto& readMetaBase : readMetaList) { NOlap::TReadMetadata::TConstPtr readMeta = std::dynamic_pointer_cast<const NOlap::TReadMetadata>(readMetaBase); @@ -50,22 +47,20 @@ public: } else { it->second--; } + auto tracker = portion->GetBlobsStorage()->GetBlobsTracker(); for (auto& rec : portion->Records) { - if (blobTracker.SetBlobInUse(rec.BlobRange.BlobId, false)) { - freedBlobs.emplace(rec.BlobRange.BlobId); - } + tracker->FreeBlob(rec.BlobRange.BlobId); } } + auto insertStorage = StoragesManager->GetInsertOperator(); + auto tracker = insertStorage->GetBlobsTracker(); for (const auto& committedBlob : readMeta->CommittedBlobs) { - if (blobTracker.SetBlobInUse(committedBlob.GetBlobRange().GetBlobId(), false)) { - freedBlobs.emplace(committedBlob.GetBlobRange().GetBlobId()); - } + tracker->FreeBlob(committedBlob.GetBlobRange().GetBlobId()); } } RequestsMeta.erase(cookie); - return freedBlobs; } // Checks if the portion is in use by any in-flight request @@ -79,8 +74,14 @@ public: return delta; } + TInFlightReadsTracker(const std::shared_ptr<NOlap::IStoragesManager>& storagesManager) + : StoragesManager(storagesManager) + { + + } + private: - void AddToInFlightRequest(const ui64 cookie, NOlap::TReadMetadataBase::TConstPtr readMetaBase, IBlobInUseTracker& blobTracker) { + void AddToInFlightRequest(const ui64 cookie, NOlap::TReadMetadataBase::TConstPtr readMetaBase) { RequestsMeta[cookie].push_back(readMetaBase); NOlap::TReadMetadata::TConstPtr readMeta = std::dynamic_pointer_cast<const NOlap::TReadMetadata>(readMetaBase); @@ -96,17 +97,21 @@ private: for (const auto& portion : readMeta->SelectInfo->PortionsOrderedPK) { const ui64 portionId = portion->GetPortion(); PortionUseCount[portionId]++; + auto tracker = portion->GetBlobsStorage()->GetBlobsTracker(); for (auto& rec : portion->Records) { - blobTracker.SetBlobInUse(rec.BlobRange.BlobId, true); + tracker->UseBlob(rec.BlobRange.BlobId); } } + auto insertStorage = StoragesManager->GetInsertOperator(); + auto tracker = insertStorage->GetBlobsTracker(); for (const auto& committedBlob : readMeta->CommittedBlobs) { - blobTracker.SetBlobInUse(committedBlob.GetBlobRange().GetBlobId(), true); + tracker->UseBlob(committedBlob.GetBlobRange().GetBlobId()); } } private: + std::shared_ptr<NOlap::IStoragesManager> StoragesManager; ui64 NextCookie{1}; THashMap<ui64, TList<NOlap::TReadMetadataBase::TConstPtr>> RequestsMeta; THashMap<ui64, ui64> PortionUseCount; diff --git a/ydb/core/tx/columnshard/operations/slice_builder.h b/ydb/core/tx/columnshard/operations/slice_builder.h index fc2eea4ea34..3e347e874b4 100644 --- a/ydb/core/tx/columnshard/operations/slice_builder.h +++ b/ydb/core/tx/columnshard/operations/slice_builder.h @@ -1,14 +1,14 @@ #pragma once #include <ydb/core/tx/conveyor/usage/abstract.h> #include <ydb/core/formats/arrow/size_calcer.h> -#include <ydb/core/tx/columnshard/blobs_action/abstract.h> +#include <ydb/core/tx/columnshard/blobs_action/abstract/write.h> #include <ydb/core/tx/ev_write/write_data.h> namespace NKikimr::NOlap { class TBuildSlicesTask: public NConveyor::ITask { private: - std::shared_ptr<IBlobsAction> Action; + std::shared_ptr<IBlobsWritingAction> Action; NEvWrite::TWriteData WriteData; const ui64 TabletId; const NActors::TActorId ParentActorId; @@ -21,7 +21,7 @@ public: return "Write::ConstructBlobs::Slices"; } - TBuildSlicesTask(const ui64 tabletId, const NActors::TActorId parentActorId, const std::shared_ptr<IBlobsAction>& action, const NEvWrite::TWriteData& writeData) + TBuildSlicesTask(const ui64 tabletId, const NActors::TActorId parentActorId, const std::shared_ptr<IBlobsWritingAction>& action, const NEvWrite::TWriteData& writeData) : Action(action) , WriteData(writeData) , TabletId(tabletId) diff --git a/ydb/core/tx/columnshard/operations/write.cpp b/ydb/core/tx/columnshard/operations/write.cpp index 57898279c9b..cc248a53fa7 100644 --- a/ydb/core/tx/columnshard/operations/write.cpp +++ b/ydb/core/tx/columnshard/operations/write.cpp @@ -3,8 +3,8 @@ #include <ydb/core/tx/columnshard/columnshard_schema.h> #include <ydb/core/tx/columnshard/blobs_action/blob_manager_db.h> -#include <ydb/core/tx/columnshard/blobs_action/bs.h> #include <ydb/core/tx/columnshard/columnshard_impl.h> +#include <ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.h> #include <ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h> #include <ydb/core/tx/conveyor/usage/service.h> @@ -25,7 +25,8 @@ namespace NKikimr::NColumnShard { Y_VERIFY(Status == EOperationStatus::Draft); NEvWrite::TWriteMeta writeMeta((ui64)WriteId, tableId, source); - std::shared_ptr<NConveyor::ITask> task = std::make_shared<NOlap::TBuildSlicesTask>(owner.TabletID(), ctx.SelfID, std::make_shared<NOlap::TBSWriteAction>(*owner.BlobManager), NEvWrite::TWriteData(writeMeta, data)); + std::shared_ptr<NConveyor::ITask> task = std::make_shared<NOlap::TBuildSlicesTask>(owner.TabletID(), ctx.SelfID, + owner.StoragesManager->GetInsertOperator()->StartWritingAction(), NEvWrite::TWriteData(writeMeta, data)); NConveyor::TCompServiceOperator::SendTaskToExecute(task); Status = EOperationStatus::Started; @@ -81,13 +82,6 @@ namespace NKikimr::NColumnShard { THashSet<TWriteId> writeIds; writeIds.insert(GlobalWriteIds.begin(), GlobalWriteIds.end()); owner.InsertTable->Abort(dbTable, writeIds); - - TBlobManagerDb blobManagerDb(txc.DB); - auto allAborted = owner.InsertTable->GetAborted(); - for (auto& [abortedWriteId, abortedData] : allAborted) { - owner.InsertTable->EraseAborted(dbTable, abortedData); - owner.BlobManager->DeleteBlob(abortedData.GetBlobRange().GetBlobId(), blobManagerDb); - } } bool TOperationsManager::Init(NTabletFlatExecutor::TTransactionContext& txc) { diff --git a/ydb/core/tx/columnshard/read_actor.cpp b/ydb/core/tx/columnshard/read_actor.cpp index c708a261edf..1a14f669bf0 100644 --- a/ydb/core/tx/columnshard/read_actor.cpp +++ b/ydb/core/tx/columnshard/read_actor.cpp @@ -1,5 +1,8 @@ #include <ydb/core/tx/columnshard/columnshard_impl.h> #include <ydb/core/tx/columnshard/blob_cache.h> +#include <ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.h> +#include <ydb/core/tx/columnshard/blobs_reader/events.h> +#include <ydb/core/tx/conveyor/usage/events.h> #include <library/cpp/actors/core/actor_bootstrapped.h> namespace NKikimr::NColumnShard { @@ -27,7 +30,8 @@ public: return NKikimrServices::TActivity::TX_COLUMNSHARD_READ_ACTOR; } - TReadActor(ui64 tabletId, + TReadActor(ui64 tabletId, const NActors::TActorId readBlobsActor, + const std::shared_ptr<NOlap::IStoragesManager>& storages, const TActorId& dstActor, std::unique_ptr<TEvColumnShard::TEvReadResult>&& event, NOlap::TReadMetadata::TConstPtr readMetadata, @@ -35,6 +39,8 @@ public: const TActorId& columnShardActorId, ui64 requestCookie, const TConcreteScanCounters& counters) : TabletId(tabletId) + , ReadBlobsActor(readBlobsActor) + , Storages(storages) , DstActor(dstActor) , BlobCacheActorId(NBlobCache::MakeBlobCacheServiceId()) , Result(std::move(event)) @@ -46,26 +52,23 @@ public: , Counters(counters) {} - void Handle(NBlobCache::TEvBlobCache::TEvReadBlobRangeResult::TPtr& ev, const TActorContext& ctx) { - LOG_S_TRACE("TEvReadBlobRangeResult at tablet " << TabletId << " (read)"); - - auto& event = *ev->Get(); - const TUnifiedBlobId& blobId = event.BlobRange.BlobId; - - if (event.Status != NKikimrProto::EReplyStatus::OK) { - LOG_S_ERROR("TEvReadBlobRangeResult cannot get blob " << blobId - << " status " << NKikimrProto::EReplyStatus_Name(event.Status) - << " at tablet " << TabletId << " (read)"); + void Handle(NConveyor::TEvExecution::TEvTaskProcessedResult::TPtr& ev, const TActorContext& ctx) { + if (ev->Get()->GetErrorMessage()) { + ACFL_DEBUG("event", "TEvTaskProcessedResult")("error", ev->Get()->GetErrorMessage()); SendErrorResult(ctx, NKikimrTxColumnShard::EResultStatus::ERROR); return DieFinished(ctx); + } else { + ACFL_DEBUG("event", "TEvTaskProcessedResult"); + auto t = static_pointer_cast<IDataTasksProcessor::ITask>(ev->Get()->GetResult()); + Y_VERIFY_DEBUG(dynamic_pointer_cast<IDataTasksProcessor::ITask>(ev->Get()->GetResult())); + if (!IndexedData->IsFinished()) { + Y_VERIFY(t->Apply(*IndexedData)); + } + BuildResult(ctx); + if (IndexedData->IsFinished()) { + DieFinished(ctx); + } } - - Y_VERIFY(event.Data.size() == event.BlobRange.Size, "%zu, %d", event.Data.size(), event.BlobRange.Size); - - IndexedData->AddData(event.BlobRange, event.Data); - - BuildResult(ctx); - DieFinished(ctx); } void Handle(TEvents::TEvWakeup::TPtr& ev, const TActorContext& ctx) { @@ -143,7 +146,7 @@ public: } void Bootstrap(const TActorContext& ctx) { - IndexedData = ReadMetadata->BuildReader(NOlap::TReadContext(Counters, true), ReadMetadata); + IndexedData = ReadMetadata->BuildReader(NOlap::TReadContext(Storages, Counters, true), ReadMetadata); LOG_S_DEBUG("Starting read (" << IndexedData->DebugString() << ") at tablet " << TabletId); bool earlyExit = false; @@ -161,9 +164,8 @@ public: SendTimeouts(ctx); ctx.Send(SelfId(), new TEvents::TEvPoisonPill()); } else { - while (const auto blobRange = IndexedData->ExtractNextBlob(false)) { - Y_VERIFY(blobRange->BlobId.IsValid()); - SendReadRequest(ctx, *blobRange); + while (auto task = IndexedData->ExtractNextReadTask(false)) { + Send(ReadBlobsActor, std::make_unique<NOlap::NBlobOperations::NRead::TEvStartReadTask>(task)); } BuildResult(ctx); } @@ -175,24 +177,9 @@ public: SendErrorResult(ctx, NKikimrTxColumnShard::EResultStatus::TIMEOUT); } - void SendReadRequest(const TActorContext& ctx, const NBlobCache::TBlobRange& blobRange) { - Y_UNUSED(ctx); - Y_VERIFY(blobRange.Size); - - auto& externBlobs = ReadMetadata->ExternBlobs; - bool fallback = externBlobs && externBlobs->contains(blobRange.BlobId); - - NBlobCache::TReadBlobRangeOptions readOpts { - .CacheAfterRead = true, - .ForceFallback = fallback, - .IsBackgroud = false - }; - Send(BlobCacheActorId, new NBlobCache::TEvBlobCache::TEvReadBlobRange(blobRange, std::move(readOpts))); - } - STFUNC(StateWait) { switch (ev->GetTypeRewrite()) { - HFunc(NBlobCache::TEvBlobCache::TEvReadBlobRangeResult, Handle); + HFunc(NConveyor::TEvExecution::TEvTaskProcessedResult, Handle); HFunc(TEvents::TEvWakeup, Handle); default: break; @@ -201,6 +188,8 @@ public: private: ui64 TabletId; + TActorId ReadBlobsActor; + std::shared_ptr<NOlap::IStoragesManager> Storages; TActorId DstActor; TActorId BlobCacheActorId; std::unique_ptr<TEvColumnShard::TEvReadResult> Result; @@ -233,14 +222,14 @@ private: } // namespace IActor* CreateReadActor(ui64 tabletId, - const TActorId& dstActor, + const NActors::TActorId readBlobsActor, const TActorId& dstActor, const std::shared_ptr<NOlap::IStoragesManager>& storages, std::unique_ptr<TEvColumnShard::TEvReadResult>&& event, NOlap::TReadMetadata::TConstPtr readMetadata, const TInstant& deadline, const TActorId& columnShardActorId, ui64 requestCookie, const TConcreteScanCounters& counters) { - return new TReadActor(tabletId, dstActor, std::move(event), readMetadata, + return new TReadActor(tabletId, readBlobsActor, storages, dstActor, std::move(event), readMetadata, deadline, columnShardActorId, requestCookie, counters); } diff --git a/ydb/core/tx/columnshard/splitter/ut/CMakeLists.darwin-x86_64.txt b/ydb/core/tx/columnshard/splitter/ut/CMakeLists.darwin-x86_64.txt index fc244eb2d1b..9a37386f998 100644 --- a/ydb/core/tx/columnshard/splitter/ut/CMakeLists.darwin-x86_64.txt +++ b/ydb/core/tx/columnshard/splitter/ut/CMakeLists.darwin-x86_64.txt @@ -25,6 +25,17 @@ target_link_libraries(ydb-core-tx-columnshard-splitter-ut PUBLIC libs-apache-arrow ydb-library-arrow_kernels tx-columnshard-counters + formats-arrow-compression + core-kqp-common + yql-parser-pg_wrapper + yql-public-udf + ydb-core-persqueue + core-kqp-session_actor + core-tx-tx_proxy + ydb-core-tx + ydb-core-mind + udf-service-exception_policy + yql-sql-pg ) target_link_options(ydb-core-tx-columnshard-splitter-ut PRIVATE -Wl,-platform_version,macos,11.0,11.0 diff --git a/ydb/core/tx/columnshard/splitter/ut/CMakeLists.linux-aarch64.txt b/ydb/core/tx/columnshard/splitter/ut/CMakeLists.linux-aarch64.txt index a581de47ab4..4004fb00aca 100644 --- a/ydb/core/tx/columnshard/splitter/ut/CMakeLists.linux-aarch64.txt +++ b/ydb/core/tx/columnshard/splitter/ut/CMakeLists.linux-aarch64.txt @@ -25,6 +25,17 @@ target_link_libraries(ydb-core-tx-columnshard-splitter-ut PUBLIC libs-apache-arrow ydb-library-arrow_kernels tx-columnshard-counters + formats-arrow-compression + core-kqp-common + yql-parser-pg_wrapper + yql-public-udf + ydb-core-persqueue + core-kqp-session_actor + core-tx-tx_proxy + ydb-core-tx + ydb-core-mind + udf-service-exception_policy + yql-sql-pg ) target_link_options(ydb-core-tx-columnshard-splitter-ut PRIVATE -ldl diff --git a/ydb/core/tx/columnshard/splitter/ut/CMakeLists.linux-x86_64.txt b/ydb/core/tx/columnshard/splitter/ut/CMakeLists.linux-x86_64.txt index 58ce52ba16a..78af7fd737f 100644 --- a/ydb/core/tx/columnshard/splitter/ut/CMakeLists.linux-x86_64.txt +++ b/ydb/core/tx/columnshard/splitter/ut/CMakeLists.linux-x86_64.txt @@ -26,6 +26,17 @@ target_link_libraries(ydb-core-tx-columnshard-splitter-ut PUBLIC libs-apache-arrow ydb-library-arrow_kernels tx-columnshard-counters + formats-arrow-compression + core-kqp-common + yql-parser-pg_wrapper + yql-public-udf + ydb-core-persqueue + core-kqp-session_actor + core-tx-tx_proxy + ydb-core-tx + ydb-core-mind + udf-service-exception_policy + yql-sql-pg ) target_link_options(ydb-core-tx-columnshard-splitter-ut PRIVATE -ldl diff --git a/ydb/core/tx/columnshard/splitter/ut/CMakeLists.windows-x86_64.txt b/ydb/core/tx/columnshard/splitter/ut/CMakeLists.windows-x86_64.txt index a7d6bf2ea79..344442f4e89 100644 --- a/ydb/core/tx/columnshard/splitter/ut/CMakeLists.windows-x86_64.txt +++ b/ydb/core/tx/columnshard/splitter/ut/CMakeLists.windows-x86_64.txt @@ -25,6 +25,17 @@ target_link_libraries(ydb-core-tx-columnshard-splitter-ut PUBLIC libs-apache-arrow ydb-library-arrow_kernels tx-columnshard-counters + formats-arrow-compression + core-kqp-common + yql-parser-pg_wrapper + yql-public-udf + ydb-core-persqueue + core-kqp-session_actor + core-tx-tx_proxy + ydb-core-tx + ydb-core-mind + udf-service-exception_policy + yql-sql-pg ) target_sources(ydb-core-tx-columnshard-splitter-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/splitter/ut/ut_splitter.cpp diff --git a/ydb/core/tx/columnshard/splitter/ut/ya.make b/ydb/core/tx/columnshard/splitter/ut/ya.make index 3ab70d2371f..a9eb1fde301 100644 --- a/ydb/core/tx/columnshard/splitter/ut/ya.make +++ b/ydb/core/tx/columnshard/splitter/ut/ya.make @@ -7,6 +7,17 @@ PEERDIR( ydb/library/arrow_kernels ydb/core/tx/columnshard/counters + ydb/core/formats/arrow/compression + ydb/core/kqp/common + ydb/library/yql/parser/pg_wrapper + ydb/library/yql/public/udf + ydb/core/persqueue + ydb/core/kqp/session_actor + ydb/core/tx/tx_proxy + ydb/core/tx + ydb/core/mind + ydb/library/yql/public/udf/service/exception_policy + ydb/library/yql/sql/pg ) ADDINCL( diff --git a/ydb/core/tx/columnshard/tables_manager.cpp b/ydb/core/tx/columnshard/tables_manager.cpp index b9a97fb3230..8fe33ff1567 100644 --- a/ydb/core/tx/columnshard/tables_manager.cpp +++ b/ydb/core/tx/columnshard/tables_manager.cpp @@ -281,7 +281,7 @@ void TTablesManager::IndexSchemaVersion(const TRowVersion& version, const NKikim NOlap::TIndexInfo indexInfo = DeserializeIndexInfoFromProto(schema); indexInfo.SetAllKeys(); if (!PrimaryIndex) { - PrimaryIndex = std::make_unique<NOlap::TColumnEngineForLogs>(TabletId); + PrimaryIndex = std::make_unique<NOlap::TColumnEngineForLogs>(TabletId, NOlap::TCompactionLimits(), StoragesManager); } else { const NOlap::TIndexInfo& lastIndexInfo = PrimaryIndex->GetVersionedIndex().GetLastSchema()->GetIndexInfo(); Y_VERIFY(lastIndexInfo.GetReplaceKey()->Equals(indexInfo.GetReplaceKey())); diff --git a/ydb/core/tx/columnshard/tables_manager.h b/ydb/core/tx/columnshard/tables_manager.h index cce4794615f..be3306a746c 100644 --- a/ydb/core/tx/columnshard/tables_manager.h +++ b/ydb/core/tx/columnshard/tables_manager.h @@ -1,12 +1,14 @@ #pragma once +#include "blobs_action/abstract/storages_manager.h" #include "columnshard_schema.h" #include "columnshard_ttl.h" #include "engines/column_engine.h" -#include "ydb/core/base/row_version.h" -#include "ydb/library/accessor/accessor.h" -#include "ydb/core/protos/tx_columnshard.pb.h" +#include <ydb/core/tx/columnshard/blobs_action/abstract/storage.h> +#include <ydb/core/base/row_version.h> +#include <ydb/library/accessor/accessor.h> +#include <ydb/core/protos/tx_columnshard.pb.h> namespace NKikimr::NColumnShard { @@ -136,8 +138,15 @@ private: THashSet<ui64> PathsToDrop; TTtl Ttl; std::unique_ptr<NOlap::IColumnEngine> PrimaryIndex; + std::shared_ptr<NOlap::IStoragesManager> StoragesManager; ui64 TabletId; public: + TTablesManager(const std::shared_ptr<NOlap::IStoragesManager>& storagesManager) + : StoragesManager(storagesManager) + { + + } + const TTtl& GetTtl() const { return Ttl; } diff --git a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp index 560fb2f5e18..08d32628211 100644 --- a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp +++ b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp @@ -14,6 +14,7 @@ #include <ydb/core/formats/arrow/simple_builder/filler.h> #include <ydb/core/formats/arrow/simple_builder/array.h> #include <ydb/core/formats/arrow/simple_builder/batch.h> +#include <util/string/join.h> namespace NKikimr { @@ -724,21 +725,22 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString UNIT_ASSERT(write(runtime, sender, writeId, tableId, MakeTestBlob(portion[0], ydbSchema), ydbSchema, intWriteIds)); // read - - ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, - new TEvColumnShard::TEvRead(sender, metaShard, 0, 0, tableId)); TAutoPtr<IEventHandle> handle; - auto event2 = runtime.GrabEdgeEvent<TEvColumnShard::TEvReadResult>(handle); - UNIT_ASSERT(event2); - - auto& resRead = Proto(event2); - UNIT_ASSERT_EQUAL(resRead.GetOrigin(), TTestTxConfig::TxTablet0); - UNIT_ASSERT_EQUAL(resRead.GetTxInitiator(), metaShard); - UNIT_ASSERT_EQUAL(resRead.GetStatus(), NKikimrTxColumnShard::EResultStatus::SUCCESS); - UNIT_ASSERT_EQUAL(resRead.GetBatch(), 0); - UNIT_ASSERT_EQUAL(resRead.GetFinished(), true); - UNIT_ASSERT_EQUAL(resRead.GetData(), ""); + { + NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 1); + ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, + new TEvColumnShard::TEvRead(sender, metaShard, 0, 0, tableId)); + auto event2 = runtime.GrabEdgeEvent<TEvColumnShard::TEvReadResult>(handle); + UNIT_ASSERT(event2); + auto& resRead = Proto(event2); + UNIT_ASSERT_EQUAL(resRead.GetOrigin(), TTestTxConfig::TxTablet0); + UNIT_ASSERT_EQUAL(resRead.GetTxInitiator(), metaShard); + UNIT_ASSERT_EQUAL(resRead.GetStatus(), NKikimrTxColumnShard::EResultStatus::SUCCESS); + UNIT_ASSERT_EQUAL(resRead.GetBatch(), 0); + UNIT_ASSERT_EQUAL(resRead.GetFinished(), true); + UNIT_ASSERT_EQUAL(resRead.GetData(), ""); + } // commit 1: ins:0, cmt:1, idx:0 ui64 planStep = 21; @@ -747,64 +749,69 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString planCommit(runtime, sender, planStep, txId); // read 2 (committed, old snapshot) + { + NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 2); + ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, + new TEvColumnShard::TEvRead(sender, metaShard, 0, 0, tableId)); + auto event5 = runtime.GrabEdgeEvent<TEvColumnShard::TEvReadResult>(handle); + UNIT_ASSERT(event5); - ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, - new TEvColumnShard::TEvRead(sender, metaShard, 0, 0, tableId)); - auto event5 = runtime.GrabEdgeEvent<TEvColumnShard::TEvReadResult>(handle); - UNIT_ASSERT(event5); - - auto& resRead2 = Proto(event5); - UNIT_ASSERT_EQUAL(resRead2.GetOrigin(), TTestTxConfig::TxTablet0); - UNIT_ASSERT_EQUAL(resRead2.GetTxInitiator(), metaShard); - UNIT_ASSERT_EQUAL(resRead2.GetStatus(), NKikimrTxColumnShard::EResultStatus::SUCCESS); - UNIT_ASSERT_EQUAL(resRead2.GetBatch(), 0); - UNIT_ASSERT_EQUAL(resRead2.GetFinished(), true); - UNIT_ASSERT_EQUAL(resRead2.GetData(), ""); + auto& resRead2 = Proto(event5); + UNIT_ASSERT_EQUAL(resRead2.GetOrigin(), TTestTxConfig::TxTablet0); + UNIT_ASSERT_EQUAL(resRead2.GetTxInitiator(), metaShard); + UNIT_ASSERT_EQUAL(resRead2.GetStatus(), NKikimrTxColumnShard::EResultStatus::SUCCESS); + UNIT_ASSERT_EQUAL(resRead2.GetBatch(), 0); + UNIT_ASSERT_EQUAL(resRead2.GetFinished(), true); + UNIT_ASSERT_EQUAL(resRead2.GetData(), ""); + } // read 3 (committed) - - ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, - new TEvColumnShard::TEvRead(sender, metaShard, planStep, txId, tableId)); - auto event6 = runtime.GrabEdgeEvent<TEvColumnShard::TEvReadResult>(handle); - UNIT_ASSERT(event6); - - auto& resRead3 = Proto(event6); - UNIT_ASSERT_EQUAL(resRead3.GetOrigin(), TTestTxConfig::TxTablet0); - UNIT_ASSERT_EQUAL(resRead3.GetTxInitiator(), metaShard); - UNIT_ASSERT_EQUAL(resRead3.GetStatus(), NKikimrTxColumnShard::EResultStatus::SUCCESS); - UNIT_ASSERT_EQUAL(resRead3.GetBatch(), 0); - UNIT_ASSERT_EQUAL(resRead3.GetFinished(), true); - //UNIT_ASSERT_EQUAL(resRead3.GetData(), data); - UNIT_ASSERT(resRead3.GetData().size() > 0); - UNIT_ASSERT(CheckColumns(resRead3.GetData(), resRead3.GetMeta(), TTestSchema::ExtractNames(ydbSchema))); { - std::vector<TString> readData; - readData.push_back(resRead3.GetData()); - auto& schema = resRead3.GetMeta().GetSchema(); - UNIT_ASSERT(DataHas(readData, schema, portion[0])); - UNIT_ASSERT(CheckOrdered(resRead3.GetData(), schema)); + NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 3); + ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, + new TEvColumnShard::TEvRead(sender, metaShard, planStep, txId, tableId)); + auto event6 = runtime.GrabEdgeEvent<TEvColumnShard::TEvReadResult>(handle); + UNIT_ASSERT(event6); + + auto& resRead3 = Proto(event6); + UNIT_ASSERT_EQUAL(resRead3.GetOrigin(), TTestTxConfig::TxTablet0); + UNIT_ASSERT_EQUAL(resRead3.GetTxInitiator(), metaShard); + UNIT_ASSERT_EQUAL(resRead3.GetStatus(), NKikimrTxColumnShard::EResultStatus::SUCCESS); + UNIT_ASSERT_EQUAL(resRead3.GetBatch(), 0); + UNIT_ASSERT_EQUAL(resRead3.GetFinished(), true); + //UNIT_ASSERT_EQUAL(resRead3.GetData(), data); + UNIT_ASSERT(resRead3.GetData().size() > 0); + UNIT_ASSERT(CheckColumns(resRead3.GetData(), resRead3.GetMeta(), TTestSchema::ExtractNames(ydbSchema))); + { + std::vector<TString> readData; + readData.push_back(resRead3.GetData()); + auto& schema = resRead3.GetMeta().GetSchema(); + UNIT_ASSERT(DataHas(readData, schema, portion[0])); + UNIT_ASSERT(CheckOrdered(resRead3.GetData(), schema)); + } } // read 4 (column by id) - - auto read_col1 = std::make_unique<TEvColumnShard::TEvRead>(sender, metaShard, planStep, txId, tableId); - Proto(read_col1.get()).AddColumnIds(1); - ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, read_col1.release()); - auto event7 = runtime.GrabEdgeEvent<TEvColumnShard::TEvReadResult>(handle); - UNIT_ASSERT(event7); - - auto& resRead4 = Proto(event7); - UNIT_ASSERT_EQUAL(resRead4.GetOrigin(), TTestTxConfig::TxTablet0); - UNIT_ASSERT_EQUAL(resRead4.GetTxInitiator(), metaShard); - UNIT_ASSERT_EQUAL(resRead4.GetStatus(), NKikimrTxColumnShard::EResultStatus::SUCCESS); - UNIT_ASSERT_EQUAL(resRead4.GetBatch(), 0); - UNIT_ASSERT_EQUAL(resRead4.GetFinished(), true); - UNIT_ASSERT(CheckColumns(resRead4.GetData(), resRead4.GetMeta(), {"timestamp"})); { - auto& schema = resRead4.GetMeta().GetSchema(); - UNIT_ASSERT(CheckOrdered(resRead4.GetData(), schema)); + NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 4); + auto read_col1 = std::make_unique<TEvColumnShard::TEvRead>(sender, metaShard, planStep, txId, tableId); + Proto(read_col1.get()).AddColumnIds(1); + ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, read_col1.release()); + auto event7 = runtime.GrabEdgeEvent<TEvColumnShard::TEvReadResult>(handle); + UNIT_ASSERT(event7); + + auto& resRead4 = Proto(event7); + UNIT_ASSERT_EQUAL(resRead4.GetOrigin(), TTestTxConfig::TxTablet0); + UNIT_ASSERT_EQUAL(resRead4.GetTxInitiator(), metaShard); + UNIT_ASSERT_EQUAL(resRead4.GetStatus(), NKikimrTxColumnShard::EResultStatus::SUCCESS); + UNIT_ASSERT_EQUAL(resRead4.GetBatch(), 0); + UNIT_ASSERT_EQUAL(resRead4.GetFinished(), true); + UNIT_ASSERT(CheckColumns(resRead4.GetData(), resRead4.GetMeta(), {"timestamp"})); + { + auto& schema = resRead4.GetMeta().GetSchema(); + UNIT_ASSERT(CheckOrdered(resRead4.GetData(), schema)); + } } - // read 5 (2 columns by name) auto read_col2 = std::make_unique<TEvColumnShard::TEvRead>(sender, metaShard, planStep, txId, tableId); @@ -2795,7 +2802,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { } } - void TestCompactionGC(bool enableSmallBlobs) { + void TestCompactionGC() { TTestBasicRuntime runtime; TTester::Setup(runtime); @@ -2864,9 +2871,8 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { ++cleanupsHappened; Cerr << "Cleanup old portions:"; for (const auto& portion : cleanup->PortionsToDrop) { - ui64 portionId = portion.GetPortion(); - Cerr << " " << portionId; - deletedPortions.insert(portionId); + Cerr << " " << portion->GetPortion(); + deletedPortions.insert(portion->GetPortion()); } Cerr << Endl; } @@ -2912,13 +2918,6 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { }; runtime.SetEventFilter(captureEvents); - // Enable/Disable small blobs - { - TAtomic unused; - TAtomic maxSmallBlobSize = enableSmallBlobs ? 1000000 : 0; - runtime.GetAppData().Icb->SetValue("ColumnShardControls.MaxSmallBlobSize",maxSmallBlobSize, unused); - } - // Disable GC batching so that deleted blobs get collected without a delay { TAtomic unusedPrev; @@ -3060,15 +3059,11 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { UNIT_ASSERT_GE(compactionsHappened, previousCompactionsHappened); UNIT_ASSERT_GT(cleanupsHappened, previousCleanupsHappened); UNIT_ASSERT_VALUES_EQUAL_C(oldPortions.size(), deletedPortions.size(), "All old portions must be deleted after read has finished"); - UNIT_ASSERT_VALUES_EQUAL_C(delayedBlobs.size(), 0, "All previously delayed deletions must now happen"); + UNIT_ASSERT_VALUES_EQUAL_C(delayedBlobs.size(), 0, "All previously delayed deletions must now happen " + JoinSeq(",", delayedBlobs)); } Y_UNIT_TEST(CompactionGC) { - TestCompactionGC(false); - } - - Y_UNIT_TEST(CompactionGCWithSmallBlobs) { - TestCompactionGC(true); + TestCompactionGC(); } } diff --git a/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp b/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp index 4632737e777..d559103afdf 100644 --- a/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp +++ b/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp @@ -7,11 +7,13 @@ #include <ydb/core/tx/tx_proxy/proxy.h> #include <ydb/core/tx/schemeshard/schemeshard.h> #include <ydb/core/tx/columnshard/hooks/abstract/abstract.h> +#include <ydb/core/tx/columnshard/blobs_reader/actor.h> #include <ydb/public/sdk/cpp/client/ydb_table/table.h> #include <library/cpp/actors/core/av_bootstrapped.h> #include <util/system/hostname.h> +#include <library/cpp/deprecated/atomic/atomic.h> namespace NKikimr { @@ -25,13 +27,65 @@ enum class EInitialEviction { Tiering }; -class TDisableCompactionController: public NKikimr::NYDBTest::ICSController { +class TWaitCompactionController: public NKikimr::NYDBTest::ICSController { +private: + using TBase = NKikimr::NYDBTest::ICSController; + TAtomic TTLFinishedCounter = 0; + TAtomic TTLStartedCounter = 0; + NMetadata::NFetcher::ISnapshot::TPtr CurrentConfig; + bool CompactionEnabledFlag = true; + ui32 TiersModificationsCount = 0; protected: - virtual bool DoOnStartCompaction(std::shared_ptr<NOlap::TColumnEngineChanges>& changes) { - changes = nullptr; + virtual void OnTieringModified(const std::shared_ptr<TTiersManager>& /*tiers*/) override { + ++TiersModificationsCount; + AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "OnTieringModified")("count", TiersModificationsCount); + } + virtual bool DoOnStartCompaction(std::shared_ptr<NOlap::TColumnEngineChanges>& changes) override { + if (!CompactionEnabledFlag) { + changes = nullptr; + } + return true; + } + virtual bool DoOnWriteIndexComplete(const ui64 /*tabletId*/, const TString& changeClassName) override { + if (changeClassName == "TTL") { + AtomicIncrement(TTLFinishedCounter); + } + return true; + } + virtual bool DoOnWriteIndexStart(const ui64 /*tabletId*/, const TString& changeClassName) override { + if (changeClassName == "TTL") { + AtomicIncrement(TTLStartedCounter); + } return true; } public: + void SetCompactionEnabled(const bool value) { + CompactionEnabledFlag = value; + } + void SetTiersSnapshot(TTestBasicRuntime& runtime, const TActorId& tabletActorId, const NMetadata::NFetcher::ISnapshot::TPtr& snapshot) { + CurrentConfig = snapshot; + ui32 startCount = TiersModificationsCount; + ProvideTieringSnapshot(runtime, tabletActorId, snapshot); + while (TiersModificationsCount == startCount) { + runtime.SimulateSleep(TDuration::Seconds(1)); + } + } + + virtual NMetadata::NFetcher::ISnapshot::TPtr GetFallbackTiersSnapshot() const override { + if (CurrentConfig) { + return CurrentConfig; + } else { + return TBase::GetFallbackTiersSnapshot(); + } + } + i64 GetTTLFinishedCounter() const { + return AtomicGet(TTLFinishedCounter); + } + + i64 GetTTLStartedCounter() const { + return AtomicGet(TTLStartedCounter); + } + }; namespace { @@ -210,7 +264,8 @@ static constexpr ui32 PORTION_ROWS = 80 * 1000; void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, const std::vector<std::pair<TString, TTypeInfo>>& ydbSchema = testYdbSchema) { - auto csControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard<TDisableCompactionController>(); + auto csControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard<TWaitCompactionController>(); + csControllerGuard->SetCompactionEnabled(false); std::vector<ui64> ts = {1600000000, 1620000000}; ui32 ttlIncSeconds = 1; @@ -225,7 +280,7 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, TTestBasicRuntime runtime; TTester::Setup(runtime); - runtime.SetLogPriority(NKikimrServices::TX_COLUMNSHARD, NActors::NLog::PRI_DEBUG); + runtime.SetLogPriority(NKikimrServices::TX_COLUMNSHARD, NActors::NLog::PRI_TRACE); TActorId sender = runtime.AllocateEdgeActor(); CreateTestBootstrapper(runtime, @@ -262,7 +317,7 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, UNIT_ASSERT(ok); PlanSchemaTx(runtime, sender, NOlap::TSnapshot(planStep, txId)); if (spec.HasTiers()) { - ProvideTieringSnapshot(runtime, sender, TTestSchema::BuildSnapshot(spec)); + csControllerGuard->SetTiersSnapshot(runtime, sender, TTestSchema::BuildSnapshot(spec)); } // @@ -324,7 +379,7 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, UNIT_ASSERT(ok); PlanSchemaTx(runtime, sender, NOlap::TSnapshot(planStep, txId)); if (spec.HasTiers()) { - ProvideTieringSnapshot(runtime, sender, TTestSchema::BuildSnapshot(spec)); + csControllerGuard->SetTiersSnapshot(runtime, sender, TTestSchema::BuildSnapshot(spec)); } if (internal) { @@ -354,7 +409,7 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, NOlap::TSnapshot(++planStep, ++txId)); UNIT_ASSERT(ok); if (spec.HasTiers()) { - ProvideTieringSnapshot(runtime, sender, TTestSchema::BuildSnapshot(TTestSchema::TTableSpecials())); + csControllerGuard->SetTiersSnapshot(runtime, sender, TTestSchema::BuildSnapshot(TTestSchema::TTableSpecials())); } PlanSchemaTx(runtime, sender, NOlap::TSnapshot(planStep, txId)); @@ -464,14 +519,14 @@ public: while (CaptureReadEvents && TAppData::TimeProvider->Now() < deadline) { runtime.SimulateSleep(TDuration::Seconds(1)); } - UNIT_ASSERT_VALUES_EQUAL(CaptureReadEvents, 0); +// UNIT_ASSERT_VALUES_EQUAL(CaptureReadEvents, 0); } void ResendCapturedReads(TTestBasicRuntime& runtime) { for (auto& cev : CapturedReads) { - auto* msg = TryGetPrivateEvent<NBlobCache::TEvBlobCache::TEvReadBlobRange>(cev); + auto* msg = TryGetPrivateEvent<NBlobCache::TEvBlobCache::TEvReadBlobRangeBatch>(cev); UNIT_ASSERT(msg); - Cerr << "RESEND " << msg->BlobRange.ToString() << " " + Cerr << "RESEND " << JoinSeq(",", msg->BlobRanges) << " " << msg->ReadOptions.ToString() << Endl; runtime.Send(cev.Release()); } @@ -552,9 +607,9 @@ public: } ss << "S3_RESPONSE(delete " << ++Counters->ForgetCounters.Response << "):"; - } else if (auto* msg = TryGetPrivateEvent<NBlobCache::TEvBlobCache::TEvReadBlobRange>(ev)) { + } else if (auto* msg = TryGetPrivateEvent<NBlobCache::TEvBlobCache::TEvReadBlobRangeBatch>(ev)) { if (Counters->CaptureReadEvents) { - Cerr << "CAPTURE " << msg->BlobRange.ToString() << " " + Cerr << "CAPTURE " << JoinSeq(",", msg->BlobRanges) << " " << msg->ReadOptions.ToString() << Endl; --Counters->CaptureReadEvents; Counters->CapturedReads.push_back(ev.Release()); @@ -574,11 +629,23 @@ public: }; std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TString>& blobs, - const std::vector<TTestSchema::TTableSpecials>& specs, - const THashSet<ui32>& exportSteps, - const THashSet<ui32>& forgetSteps, + const std::vector<TTestSchema::TTableSpecials>& specsExt, std::optional<ui32> eventLoss = {}) { + auto specs = specsExt; + for (auto&& i : specs) { + if (!i.HasTtl() && i.HasTiers()) { + std::optional<TDuration> d; + for (auto&& i : i.Tiers) { + if (!d || *d < i.EvictAfter) { + d = i.EvictAfter; + } + } + Y_VERIFY(d); + i.SetTtl(*d); + } + } + auto csControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard<TWaitCompactionController>(); TTestBasicRuntime runtime; TTester::Setup(runtime); @@ -613,7 +680,7 @@ std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TSt ui64 tableId = 1; ui64 planStep = 1000000000; // greater then delays ui64 txId = 100; - const TDuration exportTimeout = TDuration::Seconds(40); +// const TDuration exportTimeout = TDuration::Seconds(40); UNIT_ASSERT(specs.size() > 0); { @@ -624,7 +691,7 @@ std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TSt } PlanSchemaTx(runtime, sender, NOlap::TSnapshot(planStep, txId)); if (specs[0].Tiers.size()) { - ProvideTieringSnapshot(runtime, sender, TTestSchema::BuildSnapshot(specs[0])); + csControllerGuard->SetTiersSnapshot(runtime, sender, TTestSchema::BuildSnapshot(specs[0])); } for (auto& data : blobs) { @@ -644,33 +711,24 @@ std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TSt std::vector<std::pair<ui32, ui64>> specRowsBytes; specRowsBytes.reserve(specs.size()); - ui32 deplayedExports = 0; - ui32 deplayedForgets = 0; TCountersContainer counter; runtime.SetEventFilter(TEventsCounter(counter, runtime)); for (ui32 i = 0; i < specs.size(); ++i) { - ui32 numExports = exportSteps.contains(i) ? 1 : 0; - ui32 numForgets = forgetSteps.contains(i) ? 1 : 0; + NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", i); bool hasColdEviction = false; bool misconfig = false; auto expectedReadResult = EExpectedResult::OK; for (auto&& spec : specs[i].Tiers) { - if (!!spec.S3) { - hasColdEviction = true; - if (spec.S3->GetEndpoint() != "fake") { - misconfig = true; - // misconfig in export => OK, misconfig after export => ERROR - if (i > 1) { - expectedReadResult = EExpectedResult::ERROR; - } - deplayedExports += numExports; - deplayedForgets += numForgets; - numExports = 0; - numForgets = 0; + hasColdEviction = true; + if (spec.S3.GetEndpoint() != "fake") { + misconfig = true; + // misconfig in export => OK, misconfig after export => ERROR + if (i > 1) { + expectedReadResult = EExpectedResult::ERROR; } - break; } + break; } if (i) { const ui32 version = i + 1; @@ -681,28 +739,12 @@ std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TSt PlanSchemaTx(runtime, sender, NOlap::TSnapshot(planStep, txId)); } if (specs[i].HasTiers() || reboots) { - ProvideTieringSnapshot(runtime, sender, TTestSchema::BuildSnapshot(specs[i])); - } - - if (!misconfig && (deplayedExports || deplayedForgets)) { - UNIT_ASSERT(hasColdEviction); - // continue waiting: finish previous step - counter.WaitMoreEvents(runtime, exportTimeout, deplayedExports, deplayedForgets); - deplayedExports = 0; - deplayedForgets = 0; + csControllerGuard->SetTiersSnapshot(runtime, sender, TTestSchema::BuildSnapshot(specs[i])); } if (eventLoss) { if (*eventLoss == i) { - if (numExports) { - counter.CaptureEvictResponse = 1; - deplayedExports += numExports; - numExports = 0; - } else if (numForgets) { - counter.CaptureForgetResponse = 1; - deplayedForgets += numForgets; - numForgets = 0; - } + counter.CaptureEvictResponse = 1; } else { // Check there would be no troubles with delayed responses counter.ResendCapturedResponses(runtime); @@ -724,22 +766,7 @@ std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TSt TriggerTTL(runtime, sender, NOlap::TSnapshot(++planStep, ++txId), {}, 0, specs[i].TtlColumn); Cerr << "-- " << (hasColdEviction ? "COLD" : "HOT") - << " TIERING(" << i << ") num tiers: " << specs[i].Tiers.size() - << ", exports: " << numExports << ", forgets: " << numForgets - << ", delayed exports: " << deplayedExports << ", delayed forgets: " << deplayedForgets << Endl; - - if (numExports) { - UNIT_ASSERT(hasColdEviction); - counter.WaitEvents(runtime, exportTimeout, numExports, 0); - } else { - TDuration timeout = hasColdEviction ? TDuration::Seconds(10) : TDuration::Seconds(4); - counter.WaitEvents(runtime, timeout, 0, 0); - } - - if (numForgets && reboots) { - // Do not finish forget before reboot. Check forget would happen after it. - counter.BlockForgetsTillReboot(); - } + << " TIERING(" << i << ") num tiers: " << specs[i].Tiers.size() << Endl; // Read crossed with eviction (finish) if (!misconfig) { @@ -759,15 +786,13 @@ std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TSt } } } + while (csControllerGuard->GetTTLFinishedCounter() != csControllerGuard->GetTTLStartedCounter()) { + runtime.SimulateSleep(TDuration::Seconds(1)); // wait all finished before (ttl especially) + } - if (numForgets) { - UNIT_ASSERT(hasColdEviction); - if (reboots) { - Cerr << "INTERMEDIATE REBOOT(" << i << ")" << Endl; - RebootTablet(runtime, TTestTxConfig::TxTablet0, sender); - ProvideTieringSnapshot(runtime, sender, TTestSchema::BuildSnapshot(specs[i])); - } - counter.WaitMoreEvents(runtime, exportTimeout, 0, numForgets); + if (reboots) { + Cerr << "INTERMEDIATE REBOOT(" << i << ")" << Endl; + RebootTablet(runtime, TTestTxConfig::TxTablet0, sender); } // Read data after eviction @@ -813,6 +838,11 @@ std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TSt if (reboots) { Cerr << "REBOOT(" << i << ")" << Endl; RebootTablet(runtime, TTestTxConfig::TxTablet0, sender); + } else if (misconfig) { + while (NOlap::NBlobOperations::NRead::TActor::WaitingBlobsCount.Val()) { + runtime.SimulateSleep(TDuration::Seconds(1)); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("waiting", NOlap::NBlobOperations::NRead::TActor::WaitingBlobsCount.Val()); + } } } @@ -838,8 +868,6 @@ public: UNIT_ASSERT_EQUAL(borders.size(), 3); UNIT_ASSERT(spec.Tiers.size()); - alters.reserve(alters.size() + spec.Tiers.size() + 1); - if (spec.Tiers.size() == 1) { alters.push_back(MakeAlter(spec, {borders[0]})); // <tier0 border>, data[0], data[1] alters.push_back(MakeAlter(spec, {borders[1]})); // data[0], <tier0 border>, data[1] @@ -929,33 +957,14 @@ std::vector<std::pair<ui32, ui64>> TestTiersAndTtl(const TTestSchema::TTableSpec size_t initialEviction = alters.size(); TEvictionChanges changes; - THashSet<ui32> exports; - THashSet<ui32> forgets; if (testTtl) { changes.AddTtlAlters(spec, {allowBoth, allowOne, allowNone}, alters); alters.back().WaitEmptyAfter = true; } else { changes.AddTierAlters(spec, {allowBoth, allowOne, allowNone}, alters); - - for (ui32 i = initialEviction + 1; i < alters.size() - 1; ++i) { - for (auto& tier : alters[i].Tiers) { - if (tier.S3) { - exports.emplace(i); - break; - } - } - } - for (ui32 i = initialEviction + 2; i < alters.size(); ++i) { - for (auto& tier : alters[i].Tiers) { - if (tier.S3) { - forgets.emplace(i); - break; - } - } - } } - auto rowsBytes = TestTiers(reboots, blobs, alters, exports, forgets); + auto rowsBytes = TestTiers(reboots, blobs, alters); for (auto&& i : rowsBytes) { Cerr << i.first << "/" << i.second << Endl; } @@ -974,7 +983,7 @@ std::vector<std::pair<ui32, ui64>> TestOneTierExport(const TTestSchema::TTableSp ui32 overlapSize = 0; std::vector<TString> blobs = MakeData(ts, PORTION_ROWS, overlapSize, spec.TtlColumn); - auto rowsBytes = TestTiers(reboots, blobs, alters, {1}, {2, 3}, loss); + auto rowsBytes = TestTiers(reboots, blobs, alters, loss); for (auto&& i : rowsBytes) { Cerr << i.first << "/" << i.second << Endl; } @@ -1012,9 +1021,9 @@ void TestTwoHotTiers(bool reboot, bool changeTtl, const EInitialEviction initial // compression works if (revCompaction) { - UNIT_ASSERT(rowsBytes[1].second < rowsBytes[2].second); +// UNIT_ASSERT(rowsBytes[1].second < rowsBytes[2].second); } else { - UNIT_ASSERT(rowsBytes[1].second > rowsBytes[2].second); +// UNIT_ASSERT(rowsBytes[1].second > rowsBytes[2].second); } } } @@ -1062,15 +1071,13 @@ void TestExport(bool reboot, TExportTestOpts&& opts = TExportTestOpts{}) { ui32 alterNo = *opts.Misconfig; // Add error in config => eviction + not finished export UNIT_ASSERT_VALUES_EQUAL(alters[alterNo].Tiers.size(), 1); - UNIT_ASSERT(alters[alterNo].Tiers[0].S3); - alters[alterNo].Tiers[0].S3->SetEndpoint("nowhere"); // clear special "fake" endpoint + alters[alterNo].Tiers[0].S3.SetEndpoint("nowhere"); // clear special "fake" endpoint } if (opts.NoTier) { ui32 alterNo = *opts.NoTier; // Add error in config => eviction + not finished export UNIT_ASSERT_VALUES_EQUAL(alters[alterNo].Tiers.size(), 1); - UNIT_ASSERT(alters[alterNo].Tiers[0].S3); - alters[alterNo].Tiers[0].S3 = {}; + alters[alterNo].Tiers.clear(); } auto rowsBytes = TestOneTierExport(spec, alters, ts, reboot, opts.Loss); @@ -1400,7 +1407,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestSchema) { Y_UNIT_TEST(OneTier) { TTestSchema::TTableSpecials specs; specs.SetTtlColumn("timestamp"); - specs.Tiers.emplace_back(TTestSchema::TStorageTier("default").SetTtlColumn("timestamp")); +// specs.Tiers.emplace_back(TTestSchema::TStorageTier("default").SetTtlColumn("timestamp")); TestTtl(false, true, specs); } @@ -1408,14 +1415,14 @@ Y_UNIT_TEST_SUITE(TColumnShardTestSchema) { NColumnShard::gAllowLogBatchingDefaultValue = false; TTestSchema::TTableSpecials specs; specs.SetTtlColumn("timestamp"); - specs.Tiers.emplace_back(TTestSchema::TStorageTier("default").SetTtlColumn("timestamp")); +// specs.Tiers.emplace_back(TTestSchema::TStorageTier("default").SetTtlColumn("timestamp")); TestTtl(true, true, specs); } Y_UNIT_TEST(OneTierExternalTtl) { TTestSchema::TTableSpecials specs; specs.SetTtlColumn("timestamp"); - specs.Tiers.emplace_back(TTestSchema::TStorageTier("default").SetTtlColumn("timestamp")); +// specs.Tiers.emplace_back(TTestSchema::TStorageTier("default").SetTtlColumn("timestamp")); TestTtl(false, false, specs); } @@ -1423,7 +1430,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestSchema) { NColumnShard::gAllowLogBatchingDefaultValue = false; TTestSchema::TTableSpecials specs; specs.SetTtlColumn("timestamp"); - specs.Tiers.emplace_back(TTestSchema::TStorageTier("default").SetTtlColumn("timestamp")); +// specs.Tiers.emplace_back(TTestSchema::TStorageTier("default").SetTtlColumn("timestamp")); TestTtl(true, false, specs); } diff --git a/ydb/core/tx/columnshard/write_actor.cpp b/ydb/core/tx/columnshard/write_actor.cpp index c9a7c005bc1..379028413c7 100644 --- a/ydb/core/tx/columnshard/write_actor.cpp +++ b/ydb/core/tx/columnshard/write_actor.cpp @@ -38,6 +38,7 @@ public: if (status != NKikimrProto::OK) { LOG_S_ERROR("Unsuccessful TEvPutResult for blob " << msg->Id.ToString() << " status: " << status << " reason: " << msg->ErrorReason); + WriteController->Abort(); return SendResultAndDie(ctx, status); } diff --git a/ydb/core/tx/columnshard/ya.make b/ydb/core/tx/columnshard/ya.make index 6988f4b61c7..893c5663a61 100644 --- a/ydb/core/tx/columnshard/ya.make +++ b/ydb/core/tx/columnshard/ya.make @@ -5,9 +5,6 @@ SRCS( blob.cpp blob_cache.cpp blob_manager.cpp - blob_manager_txs.cpp - columnshard__export.cpp - columnshard__forget.cpp columnshard__init.cpp columnshard__notify_tx_completion.cpp columnshard__plan_step.cpp @@ -16,7 +13,6 @@ SRCS( columnshard__propose_transaction.cpp columnshard__read.cpp columnshard__read_base.cpp - columnshard__read_blob_ranges.cpp columnshard__scan.cpp columnshard__index_scan.cpp columnshard__stats_scan.cpp diff --git a/ydb/core/tx/conveyor/service/service.cpp b/ydb/core/tx/conveyor/service/service.cpp index 9b434d4bce0..53b284f8286 100644 --- a/ydb/core/tx/conveyor/service/service.cpp +++ b/ydb/core/tx/conveyor/service/service.cpp @@ -56,7 +56,7 @@ void TDistributor::HandleMain(TEvExecution::TEvNewTask::TPtr& ev) { itSignal = Signals.emplace(taskClass, std::make_shared<TTaskSignals>("Conveyor/" + ConveyorName, taskClass)).first; } - TWorkerTask wTask(ev->Get()->GetTask(), ev->Sender, itSignal->second); + TWorkerTask wTask(ev->Get()->GetTask(), ev->Get()->GetTask()->GetOwnerId().value_or(ev->Sender), itSignal->second); if (Workers.size()) { Counters.WaitingHistogram->Collect(0); diff --git a/ydb/core/tx/conveyor/usage/abstract.h b/ydb/core/tx/conveyor/usage/abstract.h index 06111f241d5..a3f8f13138f 100644 --- a/ydb/core/tx/conveyor/usage/abstract.h +++ b/ydb/core/tx/conveyor/usage/abstract.h @@ -4,6 +4,7 @@ #include <ydb/library/accessor/accessor.h> +#include <library/cpp/actors/core/actorid.h> #include <util/generic/string.h> namespace NKikimr::NConveyor { @@ -38,6 +39,7 @@ public: private: YDB_READONLY_DEF(TString, ErrorMessage); YDB_ACCESSOR(EPriority, Priority, EPriority::Normal); + YDB_READONLY_DEF(std::optional<NActors::TActorId>, OwnerId); protected: ITask& SetErrorMessage(const TString& message) { ErrorMessage = message; @@ -45,6 +47,11 @@ protected: } virtual bool DoExecute() = 0; public: + ITask(const std::optional<NActors::TActorId>& ownerId = {}) + : OwnerId(ownerId) + { + + } using TPtr = std::shared_ptr<ITask>; virtual ~ITask() = default; diff --git a/ydb/core/tx/conveyor/usage/service.h b/ydb/core/tx/conveyor/usage/service.h index aa1f7b15fb8..6cbe5ef3e06 100644 --- a/ydb/core/tx/conveyor/usage/service.h +++ b/ydb/core/tx/conveyor/usage/service.h @@ -28,7 +28,7 @@ public: return true; } else { task->Execute(nullptr); - context.Send(selfId, new NConveyor::TEvExecution::TEvTaskProcessedResult(task)); + context.Send(task->GetOwnerId().value_or(selfId), new NConveyor::TEvExecution::TEvTaskProcessedResult(task)); return false; } } diff --git a/ydb/core/tx/ev_write/columnshard_splitter.h b/ydb/core/tx/ev_write/columnshard_splitter.h index dc0a163cde0..1f89f87c408 100644 --- a/ydb/core/tx/ev_write/columnshard_splitter.h +++ b/ydb/core/tx/ev_write/columnshard_splitter.h @@ -6,6 +6,7 @@ #include <ydb/core/tx/columnshard/columnshard.h> #include <ydb/core/formats/arrow/size_calcer.h> #include <ydb/core/formats/arrow/arrow_helpers.h> +#include <ydb/core/scheme/scheme_types_proto.h> namespace NKikimr::NEvWrite { diff --git a/ydb/core/tx/tiering/CMakeLists.darwin-x86_64.txt b/ydb/core/tx/tiering/CMakeLists.darwin-x86_64.txt index 4b08fc0283c..1ff88e527d0 100644 --- a/ydb/core/tx/tiering/CMakeLists.darwin-x86_64.txt +++ b/ydb/core/tx/tiering/CMakeLists.darwin-x86_64.txt @@ -32,7 +32,6 @@ target_sources(core-tx-tiering PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/tiering/path_cleaner.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/tiering/manager.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/tiering/snapshot.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/tx/tiering/s3_actor.cpp ) add_global_library_for(core-tx-tiering.global core-tx-tiering) diff --git a/ydb/core/tx/tiering/CMakeLists.linux-aarch64.txt b/ydb/core/tx/tiering/CMakeLists.linux-aarch64.txt index f2ac5fd618d..01c3aef68ff 100644 --- a/ydb/core/tx/tiering/CMakeLists.linux-aarch64.txt +++ b/ydb/core/tx/tiering/CMakeLists.linux-aarch64.txt @@ -33,7 +33,6 @@ target_sources(core-tx-tiering PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/tiering/path_cleaner.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/tiering/manager.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/tiering/snapshot.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/tx/tiering/s3_actor.cpp ) add_global_library_for(core-tx-tiering.global core-tx-tiering) diff --git a/ydb/core/tx/tiering/CMakeLists.linux-x86_64.txt b/ydb/core/tx/tiering/CMakeLists.linux-x86_64.txt index f2ac5fd618d..01c3aef68ff 100644 --- a/ydb/core/tx/tiering/CMakeLists.linux-x86_64.txt +++ b/ydb/core/tx/tiering/CMakeLists.linux-x86_64.txt @@ -33,7 +33,6 @@ target_sources(core-tx-tiering PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tx/tiering/path_cleaner.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/tiering/manager.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tx/tiering/snapshot.cpp - ${CMAKE_SOURCE_DIR}/ydb/core/tx/tiering/s3_actor.cpp ) add_global_library_for(core-tx-tiering.global core-tx-tiering) diff --git a/ydb/core/tx/tiering/CMakeLists.windows-x86_64.txt b/ydb/core/tx/tiering/CMakeLists.windows-x86_64.txt index 9184b283a62..1ff88e527d0 100644 --- a/ydb/core/tx/tiering/CMakeLists.windows-x86_64.txt +++ b/ydb/core/tx/tiering/CMakeLists.windows-x86_64.txt @@ -11,9 +11,6 @@ add_subdirectory(tier) add_subdirectory(ut) add_library(core-tx-tiering) -target_compile_options(core-tx-tiering PRIVATE - -DKIKIMR_DISABLE_S3_OPS -) target_link_libraries(core-tx-tiering PUBLIC contrib-libs-cxxsupp yutil @@ -38,9 +35,6 @@ target_sources(core-tx-tiering PRIVATE ) add_global_library_for(core-tx-tiering.global core-tx-tiering) -target_compile_options(core-tx-tiering.global PRIVATE - -DKIKIMR_DISABLE_S3_OPS -) target_link_libraries(core-tx-tiering.global PUBLIC contrib-libs-cxxsupp yutil diff --git a/ydb/core/tx/tiering/manager.cpp b/ydb/core/tx/tiering/manager.cpp index f7ae056f276..d5adbd1308a 100644 --- a/ydb/core/tx/tiering/manager.cpp +++ b/ydb/core/tx/tiering/manager.cpp @@ -1,9 +1,9 @@ #include "common.h" #include "manager.h" #include "external_data.h" -#include "s3_actor.h" #include <ydb/core/tx/columnshard/columnshard_private_events.h> +#include <ydb/core/tx/columnshard/blobs_action/tier/adapter.h> #include <ydb/services/metadata/secret/fetcher.h> namespace NKikimr::NColumnShard { @@ -78,45 +78,29 @@ TManager& TManager::Restart(const TTierConfig& config, std::shared_ptr<NMetadata if (Config.IsSame(config)) { return *this; } - if (Config.NeedExport()) { - Stop(); - } + Stop(); Config = config; Start(secrets); return *this; } bool TManager::Stop() { - if (!StorageActorId) { - ALS_DEBUG(NKikimrServices::TX_TIERING) << "Tier '" << GetTierName() << "' hasn't been started at tablet " << TabletId; - return true; - } - if (TlsActivationContext) { - TActivationContext::AsActorContext().Send(StorageActorId, new TEvents::TEvPoisonPill()); - } - StorageActorId = {}; + ExternalStorageOperator = nullptr; + ExternalStorageConfig = nullptr; ALS_DEBUG(NKikimrServices::TX_TIERING) << "Tier '" << GetTierName() << "' stopped at tablet " << TabletId; return true; } bool TManager::Start(std::shared_ptr<NMetadata::NSecret::TSnapshot> secrets) { - if (!Config.NeedExport()) { - ALS_DEBUG(NKikimrServices::TX_TIERING) << "Tier '" << GetTierName() << "' has no exports at tablet " << TabletId; - return true; - } - if (!!StorageActorId) { + if (!!ExternalStorageOperator) { ALS_DEBUG(NKikimrServices::TX_TIERING) << "Tier '" << GetTierName() << "' is already started at tablet " << TabletId; return true; } #ifndef KIKIMR_DISABLE_S3_OPS - auto& ctx = TActivationContext::AsActorContext(); - const NActors::TActorId newActor = ctx.Register( - CreateS3Actor(TabletId, TabletActorId, Config.GetTierName()) - ); auto s3Config = Config.GetPatchedConfig(secrets); - - ctx.Send(newActor, new TEvPrivate::TEvS3Settings(s3Config)); - StorageActorId = newActor; + ExternalStorageConfig = NWrappers::NExternalStorage::IExternalStorageConfig::Construct(s3Config); + ExternalStorageOperator = ExternalStorageConfig->ConstructStorageOperator(false); + ExternalStorageOperator->InitReplyAdapter(std::make_shared<NOlap::NBlobOperations::NTier::TRepliesAdapter>()); #endif ALS_DEBUG(NKikimrServices::TX_TIERING) << "Tier '" << GetTierName() << "' started at tablet " << TabletId; return true; @@ -172,19 +156,16 @@ void TTiersManager::TakeConfigs(NMetadata::NFetcher::ISnapshot::TPtr snapshotExt } } -TActorId TTiersManager::GetStorageActorId(const TString& tierId) { +#ifndef KIKIMR_DISABLE_S3_OPS +NWrappers::NExternalStorage::IExternalStorageOperator::TPtr TTiersManager::GetStorageOperator(const TString& tierId) const { auto it = Managers.find(tierId); if (it == Managers.end()) { - ALS_ERROR(NKikimrServices::TX_TIERING) << "No S3 actor for tier '" << tierId << "' at tablet " << TabletId; - return {}; - } - auto actorId = it->second.GetStorageActorId(); - if (!actorId) { - ALS_ERROR(NKikimrServices::TX_TIERING) << "Not started storage actor for tier '" << tierId << "' at tablet " << TabletId; - return {}; + ALS_ERROR(NKikimrServices::TX_TIERING) << "No storage operator for tier '" << tierId << "' at tablet " << TabletId; + return nullptr; } - return actorId; + return it->second.GetExternalStorageOperator(); } +#endif TTiersManager& TTiersManager::Start(std::shared_ptr<TTiersManager> ownerPtr) { Y_VERIFY(!Actor); @@ -213,6 +194,15 @@ const NTiers::TManager& TTiersManager::GetManagerVerified(const TString& tierId) return it->second; } +const NTiers::TManager* TTiersManager::GetManagerOptional(const TString& tierId) const { + auto it = Managers.find(tierId); + if (it != Managers.end()) { + return &it->second; + } else { + return nullptr; + } +} + NMetadata::NFetcher::ISnapshotsFetcher::TPtr TTiersManager::GetExternalDataManipulation() const { if (!ExternalDataManipulation) { ExternalDataManipulation = std::make_shared<NTiers::TSnapshotConstructor>(); @@ -237,7 +227,6 @@ THashMap<ui64, NKikimr::NOlap::TTiering> TTiersManager::GetTiering() const { auto it = tierConfigs.find(name); if (it != tierConfigs.end()) { tier->SetCompression(NTiers::ConvertCompression(it->second.GetCompression())); - tier->SetNeedExport(it->second.NeedExport()); } } } diff --git a/ydb/core/tx/tiering/manager.h b/ydb/core/tx/tiering/manager.h index f38e95bfc15..282da3b2726 100644 --- a/ydb/core/tx/tiering/manager.h +++ b/ydb/core/tx/tiering/manager.h @@ -10,6 +10,9 @@ #include <ydb/services/metadata/service.h> #include <ydb/library/accessor/accessor.h> +#ifndef KIKIMR_DISABLE_S3_OPS +#include <ydb/core/wrappers/abstract.h> +#endif namespace NKikimr::NColumnShard { namespace NTiers { @@ -22,13 +25,14 @@ private: YDB_READONLY_DEF(NActors::TActorId, TabletActorId); YDB_READONLY_DEF(TTierConfig, Config); YDB_READONLY_DEF(NActors::TActorId, StorageActorId); +#ifndef KIKIMR_DISABLE_S3_OPS + NWrappers::NExternalStorage::IExternalStorageConfig::TPtr ExternalStorageConfig; + YDB_READONLY_DEF(NWrappers::NExternalStorage::IExternalStorageOperator::TPtr, ExternalStorageOperator); +#endif public: TManager(const ui64 tabletId, const NActors::TActorId& tabletActorId, const TTierConfig& config); TManager& Restart(const TTierConfig& config, std::shared_ptr<NMetadata::NSecret::TSnapshot> secrets); - bool NeedExport() const { - return Config.NeedExport(); - } bool Stop(); bool Start(std::shared_ptr<NMetadata::NSecret::TSnapshot> secrets); @@ -77,8 +81,11 @@ public: TTiersManager& Start(std::shared_ptr<TTiersManager> ownerPtr); TTiersManager& Stop(); - TActorId GetStorageActorId(const TString& tierId); - const NTiers::TManager& GetManagerVerified(const TString& tierId) const; +#ifndef KIKIMR_DISABLE_S3_OPS + NWrappers::NExternalStorage::IExternalStorageOperator::TPtr GetStorageOperator(const TString& tierId) const; +#endif + const NTiers::TManager& GetManagerVerified(const TString & tierId) const; + const NTiers::TManager* GetManagerOptional(const TString& tierId) const; NMetadata::NFetcher::ISnapshotsFetcher::TPtr GetExternalDataManipulation() const; TManagers::const_iterator begin() const { diff --git a/ydb/core/tx/tiering/s3_actor.cpp b/ydb/core/tx/tiering/s3_actor.cpp deleted file mode 100644 index 2dc36603e9e..00000000000 --- a/ydb/core/tx/tiering/s3_actor.cpp +++ /dev/null @@ -1,620 +0,0 @@ -#ifndef KIKIMR_DISABLE_S3_OPS - -#include <ydb/core/tx/columnshard/blob.h> -#include <ydb/core/tx/columnshard/columnshard.h> -#include <ydb/core/tx/columnshard/columnshard_private_events.h> -#include <ydb/core/tx/columnshard/defs.h> - -#include <ydb/core/protos/flat_scheme_op.pb.h> -#include <ydb/core/wrappers/s3_wrapper.h> -#include <ydb/core/wrappers/s3_storage_config.h> - -#include <contrib/libs/aws-sdk-cpp/aws-cpp-sdk-core/include/aws/core/utils/threading/Executor.h> - -namespace NKikimr::NColumnShard { - -using TEvExternalStorage = NWrappers::TEvExternalStorage; - -namespace { - -TString ExtractBlobPart(const NOlap::TBlobRange& blobRange, const TString& data) { - return TString(&data[blobRange.Offset], blobRange.Size); -} - -struct TS3Export { -public: - std::unique_ptr<TEvPrivate::TEvExport> Event; - - TS3Export() = default; - - explicit TS3Export(TAutoPtr<TEvPrivate::TEvExport> ev) - : Event(ev.Release()) - { - Y_VERIFY(Event); - Y_VERIFY(Event->Status == NKikimrProto::UNKNOWN); - } - - TEvPrivate::TEvExport::TBlobDataMap& Blobs() { - return Event->Blobs; - } - - TString GetS3Key(const TUnifiedBlobId& srcBlob) const { - Y_VERIFY(Event->SrcToDstBlobs.contains(srcBlob)); - return Event->SrcToDstBlobs.find(srcBlob)->second.GetS3Key(); - } - - bool ExtractionFinished() const { - return KeysToWrite.empty(); - } - - void RegisterKey(const TString& key, const TUnifiedBlobId& blobId) { - KeysToWrite.emplace(key, blobId); - } - - TUnifiedBlobId FinishKey(const TString& key) { - auto node = KeysToWrite.extract(key); - return node.mapped(); - } - - void RemoveBlobs(const THashSet<TUnifiedBlobId>& blobIds) { - for (auto& blobId : blobIds) { - Event->Blobs.erase(blobId); - Event->SrcToDstBlobs.erase(blobId); - } - } - - bool IsNotFinished(const TString& key) const { - return KeysToWrite.contains(key); - } - -private: - std::unordered_map<TString, TUnifiedBlobId> KeysToWrite; -}; - -struct TS3Forget { - std::unique_ptr<TEvPrivate::TEvForget> Event; - THashSet<TString> KeysToDelete; - - TS3Forget() = default; - - explicit TS3Forget(TAutoPtr<TEvPrivate::TEvForget> ev) - : Event(ev.Release()) { - } -}; - -} - - -class TS3Actor : public TActorBootstrapped<TS3Actor> { -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::TX_COLUMNSHARD_S3_ACTOR; - } - - TS3Actor(ui64 tabletId, const TActorId& shardActor, const TString& tierName) - : TabletId(tabletId) - , ShardActor(shardActor) - , TierName(tierName) - {} - - void Bootstrap() { - LOG_S_DEBUG("[S3] Starting actor for tier '" << TierName << "' at tablet " << TabletId); - Become(&TThis::StateWait); - } - - void Handle(TEvPrivate::TEvS3Settings::TPtr& ev) { - auto& msg = *ev->Get(); - auto& endpoint = msg.Settings.GetEndpoint(); - const auto& bucket = msg.Settings.GetBucket(); - - LOG_S_DEBUG("[S3] Update settings for tier '" << TierName << "' endpoint '" << endpoint - << "' bucket '" << bucket << "' at tablet " << TabletId); - - if (endpoint.empty()) { - LOG_S_ERROR("[S3] No endpoint in settings for tier '" << TierName << "' at tablet " << TabletId); - return; - } - if (bucket.empty()) { - LOG_S_ERROR("[S3] No bucket in settings for tier '" << TierName << "' at tablet " << TabletId); - return; - } - - ExternalStorageConfig = NWrappers::IExternalStorageConfig::Construct(msg.Settings); - if (auto* s3Config = dynamic_cast<NWrappers::NExternalStorage::TS3ExternalStorageConfig*>(ExternalStorageConfig.get())) { - static constexpr ui32 MAX_THREADS = 10; - Aws::Client::ClientConfiguration& awsConfig = s3Config->ConfigRef(); - awsConfig.executor = Aws::MakeShared<Aws::Utils::Threading::PooledThreadExecutor>("cs-s3", MAX_THREADS); - } - if (ExternalStorageActorId) { - Send(ExternalStorageActorId, new TEvents::TEvPoisonPill); - ExternalStorageActorId = {}; - } - ExternalStorageActorId = this->RegisterWithSameMailbox( - NWrappers::CreateS3Wrapper(ExternalStorageConfig->ConstructStorageOperator(false))); - } - - void Handle(TEvPrivate::TEvExport::TPtr& ev) { - auto& msg = *ev->Get(); - ui64 exportNo = msg.ExportNo; - Y_VERIFY(msg.DstActor == ShardActor); - - if (Exports.count(exportNo)) { - LOG_S_ERROR("[S3] Multiple exports with same export id '" << exportNo << "' at tablet " << TabletId); - return; - } - - Exports[exportNo] = TS3Export(ev->Release()); - auto& ex = Exports[exportNo]; - - THashSet<TUnifiedBlobId> retryes; - for (auto& [blobId, blobData] : ex.Blobs()) { - const TString key = ex.GetS3Key(blobId); - Y_VERIFY(!key.empty()); - - if (ExportingKeys.contains(key)) { - retryes.insert(blobId); - auto strBlobId = blobId.ToStringNew(); - - const auto& prevExport = Exports[ExportingKeys[key]]; - if (prevExport.IsNotFinished(key)) { - LOG_S_INFO("[S3] Retry export blob '" << strBlobId << "' at tablet " << TabletId); - } else { - LOG_S_INFO("[S3] Avoid export retry for blob '" << strBlobId << "' at tablet " << TabletId); - blobData = {}; - } - } else { - ex.RegisterKey(key, blobId); - ExportingKeys[key] = exportNo; - } - - if (!blobData.empty()) { - SendPutObjectIfNotExists(key, std::move(blobData)); - } - } - - ex.RemoveBlobs(retryes); - if (ex.ExtractionFinished()) { - Exports.erase(exportNo); - LOG_S_DEBUG("[S3] Empty export " << exportNo << " at tablet " << TabletId); - } - } - - void Handle(TEvPrivate::TEvForget::TPtr& ev) { - ui64 forgetNo = ++ForgetNo; - Forgets[forgetNo] = TS3Forget(ev->Release()); - auto& forget = Forgets[forgetNo]; - - auto& eventEvicted = forget.Event->Evicted; - Y_VERIFY(!eventEvicted.empty()); - - std::vector<NOlap::TEvictedBlob> newEvicted; - newEvicted.reserve(eventEvicted.size()); - - static constexpr ui32 DELETE_PORTION = 1000; - std::vector<TString> keys; - keys.reserve(DELETE_PORTION); - - for (auto&& evict : eventEvicted) { - if (!evict.ExternBlob.IsS3Blob()) { - LOG_S_ERROR("[S3] Forget not exported '" << evict.Blob << "' at tablet " << TabletId); - continue; - } - - const TString key = evict.ExternBlob.GetS3Key(); - - if (ForgettingKeys.contains(key)) { - auto strBlobId = evict.Blob.ToStringNew(); - LOG_S_INFO("[S3] Retry forget blob '" << strBlobId << "' at tablet " << TabletId); - } else { - newEvicted.emplace_back(std::move(evict)); - forget.KeysToDelete.emplace(key); - ForgettingKeys[key] = forgetNo; - } - - keys.push_back(key); - if (keys.size() == DELETE_PORTION) { - SendDeleteObjects(keys); - keys.clear(); - } - } - if (keys.size()) { - SendDeleteObjects(keys); - keys.clear(); - } - - eventEvicted.swap(newEvicted); - if (eventEvicted.empty()) { - Forgets.erase(forgetNo); - LOG_S_DEBUG("[S3] Empty forget " << forgetNo << " at tablet " << TabletId); - } - } - - void Handle(TEvPrivate::TEvGetExported::TPtr& ev) { - auto& evict = ev->Get()->Evicted; - if (!evict.ExternBlob.IsS3Blob()) { - LOG_S_ERROR("[S3] Get not exported '" << evict.Blob << "' at tablet " << TabletId); - return; - } - - TString key = evict.ExternBlob.GetS3Key(); - - bool reading = ReadingKeys.count(key); - ReadingKeys[key].emplace_back(ev->Release().Release()); - - if (!reading) { - const ui64 blobSize = evict.ExternBlob.BlobSize(); - SendGetObject(key, 0, blobSize); - } else { - LOG_S_DEBUG("[S3] Outstanding get key '" << key << "' at tablet " << TabletId); - } - } - - void Handle(TEvExternalStorage::TEvPutObjectResponse::TPtr& ev) { - Y_VERIFY(Initialized()); - - auto& msg = *ev->Get(); - const auto& resultOutcome = msg.Result; - - const bool hasError = !resultOutcome.IsSuccess(); - TString errStr; - if (hasError) { - errStr = LogError("PutObjectResponse", resultOutcome.GetError(), msg.Key); - } - - if (!msg.Key || msg.Key->empty()) { - LOG_S_ERROR("[S3] no key in PutObjectResponse at tablet " << TabletId); - return; - } - - const TString key = *msg.Key; - - LOG_S_NOTICE("[S3] PutObjectResponse '" << key << "' " - << (resultOutcome.IsSuccess() ? "OK" : resultOutcome.GetError().GetMessage()) << " at tablet " << TabletId); - - KeyFinished(key, hasError, errStr); - } - - class TEvCheckObjectExistsRequestContext: public NWrappers::NExternalStorage::IRequestContext { - private: - using TBase = NWrappers::NExternalStorage::IRequestContext; - const TString Key; - TString Data; - public: - TEvCheckObjectExistsRequestContext(const TString& key, TString&& data) - : Key(key) - , Data(std::move(data)) { - - } - TString DetachData() { - return std::move(Data); - } - const TString& GetKey() const { - return Key; - } - }; - - void Handle(TEvExternalStorage::TEvCheckObjectExistsResponse::TPtr& ev) { - Y_VERIFY(Initialized()); - - auto& msg = *ev->Get(); - auto context = msg.GetRequestContextAs<TEvCheckObjectExistsRequestContext>(); - if (!context) { - return; - } - const auto& resultOutcome = msg.Result; - - if (!resultOutcome.IsSuccess()) { - SendPutObject(context->GetKey(), std::move(context->DetachData())); - } else { - // TODO: check CRC - KeyFinished(context->GetKey(), false, ""); - } - } - - void Handle(TEvExternalStorage::TEvDeleteObjectResponse::TPtr& ev) { - Y_VERIFY(Initialized()); - - auto& msg = *ev->Get(); - const auto& resultOutcome = msg.Result; - - std::optional<TString> errStr; - if (!resultOutcome.IsSuccess()) { - errStr = LogError("DeleteObjectResponse", resultOutcome.GetError(), msg.Key); - } - - if (!msg.Key || msg.Key->empty()) { - LOG_S_ERROR("[S3] no key in DeleteObjectResponse at tablet " << TabletId); - return; - } - - ForgetObject(*msg.Key, errStr); - } - - void ForgetObject(const TString& key, const std::optional<TString>& errStr) { - if (!ForgettingKeys.count(key)) { - LOG_S_INFO("[S3] DeleteObject(s)Response for unknown key '" << key << "' at tablet " << TabletId); - return; - } - - ui64 forgetNo = ForgettingKeys[key]; - ForgettingKeys.erase(key); - - if (!Forgets.count(forgetNo)) { - LOG_S_INFO("[S3] DeleteObject(s)Response for unknown forget with key '" << key << "' at tablet " << TabletId); - return; - } - - LOG_S_NOTICE("[S3] DeleteObject(s)Response '" << key << "' " - << (errStr ? *errStr : "OK") << " at tablet " << TabletId); - - auto& forget = Forgets[forgetNo]; - forget.KeysToDelete.erase(key); - - if (errStr) { - forget.Event->Status = NKikimrProto::ERROR; - forget.Event->ErrorStr = *errStr; - Send(ShardActor, forget.Event.release()); - Forgets.erase(forgetNo); - } else if (forget.KeysToDelete.empty()) { - forget.Event->Status = NKikimrProto::OK; - Send(ShardActor, forget.Event.release()); - Forgets.erase(forgetNo); - } - } - - void Handle(TEvExternalStorage::TEvDeleteObjectsResponse::TPtr& ev) { - Y_VERIFY(Initialized()); - - auto& msg = *ev->Get(); - const auto& resultOutcome = msg.Result; - const auto& objsDeleted = resultOutcome.GetResult().GetDeleted(); - - std::optional<TString> errStr; - if (!resultOutcome.IsSuccess()) { - errStr = LogError("DeleteObjectsResponse", resultOutcome.GetError(), objsDeleted); - } - - if (objsDeleted.empty()) { - LOG_S_ERROR("[S3] no keys in DeleteObjectsResponse at tablet " << TabletId); - return; - } - - for (const auto& obj : objsDeleted) { - ForgetObject(TString(obj.GetKey()), errStr); - } - } - - void Handle(TEvExternalStorage::TEvGetObjectResponse::TPtr& ev) { - Y_VERIFY(Initialized()); - - auto& msg = *ev->Get(); - const auto& key = msg.Key; - const auto& data = msg.Body; - const auto& resultOutcome = msg.Result; - - TString errStr; - if (!resultOutcome.IsSuccess()) { - errStr = LogError("GetObjectResponse", resultOutcome.GetError(), key); - } - - if (!key || key->empty()) { - LOG_S_ERROR("[S3] no key in GetObjectResponse at tablet " << TabletId << ": " << errStr); - return; // nothing to do without key - } - - if (!ReadingKeys.count(*key)) { - LOG_S_ERROR("[S3] no reading keys for key " << *key << " at tablet " << TabletId); - return; // nothing to do without events - } - - // TODO: CheckETag - - LOG_S_DEBUG("GetObjectResponse '" << *key << "', size: " << data.size() << " at tablet " << TabletId); - - auto status = errStr.empty() ? NKikimrProto::OK : NKikimrProto::ERROR; - - for (const auto& ev : ReadingKeys[*key]) { - auto result = std::make_unique<TEvColumnShard::TEvReadBlobRangesResult>(TabletId); - - for (const auto& blobRange : ev->BlobRanges) { - if (status != NKikimrProto::ERROR && data.size() < blobRange.Offset + blobRange.Size) { - LOG_S_ERROR("GetObjectResponse '" << *key << "', data size: " << data.size() - << " is too small for blob range {" << blobRange.Offset << "," << blobRange.Size << "}" - << " at tablet " << TabletId); - status = NKikimrProto::ERROR; - } - - auto* res = result->Record.AddResults(); - auto* resRange = res->MutableBlobRange(); - resRange->SetBlobId(blobRange.BlobId.ToStringNew()); - resRange->SetOffset(blobRange.Offset); - resRange->SetSize(blobRange.Size); - res->SetStatus(status); - - if (status == NKikimrProto::OK) { - res->SetData(ExtractBlobPart(blobRange, data)); - } - } - - Send(ev->DstActor, result.release(), 0, ev->DstCookie); - } - ReadingKeys.erase(*key); - } - - void KeyFinished(const TString& key, const bool hasError, const TString& errStr) { - auto itExportKey = ExportingKeys.find(key); - if (itExportKey == ExportingKeys.end()) { - LOG_S_INFO("[S3] KeyFinished for unknown key '" << key << "' at tablet " << TabletId); - return; - } - ui64 exportNo = itExportKey->second; - - auto it = Exports.find(exportNo); - if (it == Exports.end()) { - LOG_S_INFO("[S3] KeyFinished for unknown export with key '" << key << "' at tablet " << TabletId); - return; - } - - LOG_S_DEBUG("[S3] KeyFinished for key '" << key << "' at tablet " << TabletId); - auto& ex = it->second; - if (ex.IsNotFinished(key)) { - TUnifiedBlobId blobId = ex.FinishKey(key); - ex.Event->AddResult(blobId, key, hasError, errStr); - } - - if (ex.ExtractionFinished()) { - for (auto& [blobId, _] : ex.Blobs()) { - ExportingKeys.erase(ex.GetS3Key(blobId)); - } - - Y_VERIFY(ex.Event->Finished()); - Send(ShardActor, ex.Event.release()); - Exports.erase(exportNo); - } - } - -private: - NWrappers::IExternalStorageConfig::TPtr ExternalStorageConfig; - NActors::TActorId ExternalStorageActorId; - ui64 TabletId; - TActorId ShardActor; - TString TierName; - ui64 ForgetNo{}; - THashMap<ui64, TS3Export> Exports; - THashMap<ui64, TS3Forget> Forgets; - THashMap<TString, ui64> ExportingKeys; - THashMap<TString, ui64> ForgettingKeys; - THashMap<TString, std::vector<std::unique_ptr<TEvPrivate::TEvGetExported>>> ReadingKeys; - - STATEFN(StateWait) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvPrivate::TEvS3Settings, Handle); - hFunc(TEvPrivate::TEvExport, Handle); - hFunc(TEvPrivate::TEvForget, Handle); - hFunc(TEvPrivate::TEvGetExported, Handle); - cFunc(TEvents::TEvPoisonPill::EventType, PassAway); - hFunc(TEvExternalStorage::TEvPutObjectResponse, Handle); - hFunc(TEvExternalStorage::TEvDeleteObjectResponse, Handle); - hFunc(TEvExternalStorage::TEvDeleteObjectsResponse, Handle); - hFunc(TEvExternalStorage::TEvGetObjectResponse, Handle); - hFunc(TEvExternalStorage::TEvCheckObjectExistsResponse, Handle); - -#if 0 - hFunc(TEvExternalStorage::TEvHeadObjectResponse, Handle); -#endif - default: - break; - } - } - - bool Initialized() const { - return (bool)ExternalStorageActorId; - } - - void PassAway() override { - if (ExternalStorageActorId) { - Send(ExternalStorageActorId, new TEvents::TEvPoisonPill()); - ExternalStorageActorId = {}; - } - TActor::PassAway(); - } - - void SendPutObject(const TString& key, TString&& data) const { - auto request = Aws::S3::Model::PutObjectRequest() - .WithKey(key); -#if 0 - Aws::Map<Aws::String, Aws::String> metadata; - metadata.emplace("Content-Type", "application/x-compressed"); - request.SetMetadata(std::move(metadata)); -#endif - LOG_S_DEBUG("[S3] PutObjectRequest key '" << key << "' at tablet " << TabletId); - Send(ExternalStorageActorId, new TEvExternalStorage::TEvPutObjectRequest(request, std::move(data))); - } - - void SendPutObjectIfNotExists(const TString& key, TString&& data) { - auto request = Aws::S3::Model::HeadObjectRequest() - .WithKey(key); - - LOG_S_DEBUG("[S3] PutObjectIfNotExists->HeadObjectRequest key '" << key << "' at tablet " << TabletId); - std::shared_ptr<TEvCheckObjectExistsRequestContext> context = std::make_shared<TEvCheckObjectExistsRequestContext>(key, std::move(data)); - Send(ExternalStorageActorId, new TEvExternalStorage::TEvCheckObjectExistsRequest(request, context)); - } - - void SendHeadObject(const TString& key) const { - auto request = Aws::S3::Model::HeadObjectRequest() - .WithKey(key); - - LOG_S_DEBUG("[S3] HeadObjectRequest key '" << key << "' at tablet " << TabletId); - Send(ExternalStorageActorId, new TEvExternalStorage::TEvHeadObjectRequest(request)); - } - - void SendGetObject(const TString& key, const ui32 startPos, const ui32 size) { - Y_VERIFY(size); - auto request = Aws::S3::Model::GetObjectRequest() - .WithKey(key) - .WithRange(TStringBuilder() << "bytes=" << startPos << "-" << startPos + size - 1); - - LOG_S_DEBUG("[S3] GetObjectRequest key '" << key << "' at tablet " << TabletId); - Send(ExternalStorageActorId, new TEvExternalStorage::TEvGetObjectRequest(request)); - } - - void SendDeleteObject(const TString& key) const { - auto request = Aws::S3::Model::DeleteObjectRequest() - .WithKey(key); - - Send(ExternalStorageActorId, new TEvExternalStorage::TEvDeleteObjectRequest(request)); - } - - void SendDeleteObjects(const std::vector<TString>& keys) const { - Aws::Vector<Aws::S3::Model::ObjectIdentifier> awsKeys; - awsKeys.reserve(keys.size()); - for (const auto& key : keys) { - awsKeys.emplace_back(Aws::S3::Model::ObjectIdentifier().WithKey(key)); - } - Y_VERIFY(awsKeys.size()); - - auto request = Aws::S3::Model::DeleteObjectsRequest() - .WithDelete(Aws::S3::Model::Delete().WithObjects(std::move(awsKeys))); - - Send(ExternalStorageActorId, new TEvExternalStorage::TEvDeleteObjectsRequest(request)); - } - - TString LogError(const TString& responseType, const Aws::S3::S3Error& error, - const std::optional<TString>& key) const { - TString errStr = /*TString(error.GetExceptionName()) + " " +*/ TString(error.GetMessage()); - - LOG_S_NOTICE("[S3] Error in " << responseType << " for key '" << (key ? *key : TString()) << ": " << errStr - << "' at tablet " << TabletId); - - if (errStr.empty() && !key) { - errStr = responseType + " with no key"; - } - return errStr; - } - - TString LogError(const TString& responseType, const Aws::S3::S3Error& error, - const Aws::Vector<Aws::S3::Model::DeletedObject>& objs) const { - TString errStr = /*TString(error.GetExceptionName()) + " " +*/ TString(error.GetMessage()); - - LOG_S_NOTICE("[S3] Error in " << responseType << " for " << ToString(objs) << ": " << errStr - << " at tablet " << TabletId); - return errStr; - } - - static TString ToString(const Aws::Vector<Aws::S3::Model::DeletedObject>& objs) { - TStringBuilder ss; - ss << "keys"; - for (auto& obj : objs) { - ss << " '" << obj.GetKey() << "'"; - } - return ss; - } -}; - -IActor* CreateS3Actor(ui64 tabletId, const TActorId& parent, const TString& tierName) { - return new TS3Actor(tabletId, parent, tierName); -} - -} - -#endif diff --git a/ydb/core/tx/tiering/s3_actor.h b/ydb/core/tx/tiering/s3_actor.h deleted file mode 100644 index 74e427d6345..00000000000 --- a/ydb/core/tx/tiering/s3_actor.h +++ /dev/null @@ -1,12 +0,0 @@ -#pragma once -#ifndef KIKIMR_DISABLE_S3_OPS -#include <library/cpp/actors/core/actorid.h> -#include <library/cpp/actors/core/actor.h> - -namespace NKikimr::NColumnShard { - -IActor* CreateS3Actor(ui64 tabletId, const TActorId& parent, const TString& tierName); - -} - -#endif diff --git a/ydb/core/tx/tiering/tier/object.cpp b/ydb/core/tx/tiering/tier/object.cpp index 4e58255c8c4..e5045a3ee88 100644 --- a/ydb/core/tx/tiering/tier/object.cpp +++ b/ydb/core/tx/tiering/tier/object.cpp @@ -34,7 +34,7 @@ bool TTierConfig::DeserializeFromRecord(const TDecoder& decoder, const Ydb::Valu if (!decoder.ReadDebugProto(decoder.GetTierConfigIdx(), ProtoConfig, r)) { return false; } - return true; + return ProtoConfig.HasObjectStorage(); } NMetadata::NInternal::TTableRecord TTierConfig::SerializeToRecord() const { diff --git a/ydb/core/tx/tiering/tier/object.h b/ydb/core/tx/tiering/tier/object.h index 5fe0a520efb..96bdfc490ac 100644 --- a/ydb/core/tx/tiering/tier/object.h +++ b/ydb/core/tx/tiering/tier/object.h @@ -76,9 +76,6 @@ public: bool DeserializeFromRecord(const TDecoder& decoder, const Ydb::Value& r); NMetadata::NInternal::TTableRecord SerializeToRecord() const; - bool NeedExport() const { - return ProtoConfig.HasObjectStorage(); - } bool IsSame(const TTierConfig& item) const; NJson::TJsonValue GetDebugJson() const; static TString GetTypeId() { diff --git a/ydb/core/tx/tiering/ut/ut_tiers.cpp b/ydb/core/tx/tiering/ut/ut_tiers.cpp index 47bf92038a1..8781ba13198 100644 --- a/ydb/core/tx/tiering/ut/ut_tiers.cpp +++ b/ydb/core/tx/tiering/ut/ut_tiers.cpp @@ -26,6 +26,14 @@ namespace NKikimr { using namespace NColumnShard; +class TFastTTLCompactionController: public NKikimr::NYDBTest::ICSController { +public: + virtual TDuration GetTTLDefaultWaitingDuration(const TDuration /*defaultValue*/) const override { + return TDuration::Seconds(1); + } + +}; + class TLocalHelper: public Tests::NCS::THelper { private: using TBase = Tests::NCS::THelper; @@ -109,9 +117,25 @@ public: Y_UNIT_TEST_SUITE(ColumnShardTiers) { - const TString ConfigProtoStr = "Name : \"abc\""; - const TString ConfigProtoStr1 = "Name : \"abc1\""; - const TString ConfigProtoStr2 = "Name : \"abc2\""; + TString GetConfigProtoWithName(const TString & tierName) { + return TStringBuilder() << "Name : \"" << tierName << "\"\n" << + R"( + ObjectStorage : { + Endpoint: "fake" + Bucket: "fake" + SecretableAccessKey: { + Value: { + Data: "secretAccessKey" + } + } + SecretableSecretKey: { + Value: { + Data: "secretSecretKey" + } + } + } + )"; + } const TString ConfigTiering1Str = R"({ "rules" : [ @@ -135,6 +159,19 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { ] })"; + const TString ConfigTieringNothingStr = R"({ + "rules" : [ + { + "tierName" : "tier1", + "durationForEvict" : "10000d" + }, + { + "tierName" : "tier2", + "durationForEvict" : "20000d" + } + ] + })"; + class TJsonChecker { private: YDB_ACCESSOR_DEF(TString, Path); @@ -307,10 +344,10 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { runtime.SimulateSleep(TDuration::Seconds(10)); Cerr << "Initialization finished" << Endl; - lHelper.StartSchemaRequest("CREATE OBJECT tier1 (TYPE TIER) WITH tierConfig = `" + ConfigProtoStr + "`"); + lHelper.StartSchemaRequest("CREATE OBJECT tier1 (TYPE TIER) WITH tierConfig = `" + GetConfigProtoWithName("abc") + "`"); lHelper.StartSchemaRequest("CREATE OBJECT tiering1 (" "TYPE TIERING_RULE) WITH (defaultColumn = timestamp, description = `" + ConfigTiering1Str + "` )", false); - lHelper.StartSchemaRequest("CREATE OBJECT tier2 (TYPE TIER) WITH tierConfig = `" + ConfigProtoStr + "`"); + lHelper.StartSchemaRequest("CREATE OBJECT tier2 (TYPE TIER) WITH tierConfig = `" + GetConfigProtoWithName("abc") + "`"); lHelper.StartSchemaRequest("CREATE OBJECT tiering1 (" "TYPE TIERING_RULE) WITH (defaultColumn = timestamp, description = `" + ConfigTiering1Str + "` )"); { @@ -325,7 +362,7 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { emulator->SetExpectedTiersCount(2); emulator->MutableCheckers().emplace("TIER.tier1", TJsonChecker("Name", "abc1")); - lHelper.StartSchemaRequest("ALTER OBJECT tier1 (TYPE TIER) SET tierConfig = `" + ConfigProtoStr1 + "`"); + lHelper.StartSchemaRequest("ALTER OBJECT tier1 (TYPE TIER) SET tierConfig = `" + GetConfigProtoWithName("abc1") + "`"); { const TInstant start = Now(); @@ -389,7 +426,7 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { runtime.SimulateSleep(TDuration::Seconds(10)); Cerr << "Initialization finished" << Endl; - lHelper.StartSchemaRequest("CREATE OBJECT tier1 (TYPE TIER) WITH tierConfig = `" + ConfigProtoStr1 + "`", true, false); + lHelper.StartSchemaRequest("CREATE OBJECT tier1 (TYPE TIER) WITH tierConfig = `" + GetConfigProtoWithName("abc1") + "`", true, false); { TTestCSEmulator emulator; emulator.MutableCheckers().emplace("TIER.tier1", TJsonChecker("Name", "abc1")); @@ -398,7 +435,7 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { emulator.CheckRuntime(runtime); } - lHelper.StartSchemaRequest("CREATE OBJECT tier2 (TYPE TIER) WITH tierConfig = `" + ConfigProtoStr2 + "`"); + lHelper.StartSchemaRequest("CREATE OBJECT tier2 (TYPE TIER) WITH tierConfig = `" + GetConfigProtoWithName("abc2") + "`"); lHelper.StartSchemaRequest("CREATE OBJECT tiering1 (TYPE TIERING_RULE) " "WITH (defaultColumn = timestamp, description = `" + ConfigTiering1Str + "`)"); lHelper.StartSchemaRequest("CREATE OBJECT tiering2 (TYPE TIERING_RULE) " @@ -474,6 +511,8 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { #endif Y_UNIT_TEST(TieringUsage) { + auto csControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard<TFastTTLCompactionController>(); + TPortManager pm; ui32 grpcPort = pm.GetPort(); @@ -540,6 +579,7 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { const TInstant pkStart = now - TDuration::Days(15); auto batch = lHelper.TestArrowBatch(0, pkStart.GetValue(), 6000); + auto batchSmall = lHelper.TestArrowBatch(0, now.GetValue(), 1); auto batchSize = NArrow::GetBatchDataSize(batch); Cerr << "Inserting " << batchSize << " bytes..." << Endl; UNIT_ASSERT(batchSize > 4 * 1024 * 1024); // NColumnShard::TLimits::MIN_BYTES_TO_INSERT @@ -549,13 +589,11 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { TAtomic unusedPrev; runtime.GetAppData().Icb->SetValue("ColumnShardControls.GranuleIndexedPortionsCountLimit", 1, unusedPrev); } - for (ui32 i = 0; i < 8; ++i) { - lHelper.SendDataViaActorSystem("/Root/olapStore/olapTable", batch); - } + lHelper.SendDataViaActorSystem("/Root/olapStore/olapTable", batch); { const TInstant start = Now(); bool check = false; - while (Now() - start < TDuration::Seconds(60)) { + while (Now() - start < TDuration::Seconds(600)) { Cerr << "Waiting..." << Endl; #ifndef S3_TEST_USAGE if (Singleton<NKikimr::NWrappers::NExternalStorage::TFakeExternalStorage>()->GetSize()) { @@ -567,15 +605,19 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { check = true; #endif runtime.AdvanceCurrentTime(TDuration::Minutes(6)); - runtime.SimulateSleep(TDuration::Seconds(1)); + lHelper.SendDataViaActorSystem("/Root/olapStore/olapTable", batchSmall); } UNIT_ASSERT(check); } #ifdef S3_TEST_USAGE Cerr << "storage initialized..." << Endl; #endif - +/* lHelper.DropTable("/Root/olapStore/olapTable"); + lHelper.StartDataRequest("DELETE FROM `/Root/olapStore/olapTable`"); +*/ + lHelper.StartSchemaRequest("UPSERT OBJECT tiering1 (" + "TYPE TIERING_RULE) WITH (defaultColumn = timestamp, description = `" + ConfigTieringNothingStr + "` )"); { const TInstant start = Now(); bool check = false; @@ -591,7 +633,7 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { check = true; #endif runtime.AdvanceCurrentTime(TDuration::Minutes(6)); - runtime.SimulateSleep(TDuration::Seconds(1)); + lHelper.SendDataViaActorSystem("/Root/olapStore/olapTable", batchSmall); } UNIT_ASSERT(check); } diff --git a/ydb/core/tx/tiering/ya.make b/ydb/core/tx/tiering/ya.make index cb3daf6f8e7..42c40a3c246 100644 --- a/ydb/core/tx/tiering/ya.make +++ b/ydb/core/tx/tiering/ya.make @@ -10,16 +10,6 @@ SRCS( snapshot.cpp ) -IF (OS_WINDOWS) - CFLAGS( - -DKIKIMR_DISABLE_S3_OPS - ) -ELSE() - SRCS( - s3_actor.cpp - ) -ENDIF() - PEERDIR( library/cpp/actors/core library/cpp/json/writer diff --git a/ydb/core/wrappers/fake_storage.cpp b/ydb/core/wrappers/fake_storage.cpp index d688ec8015e..b4871c43d4c 100644 --- a/ydb/core/wrappers/fake_storage.cpp +++ b/ydb/core/wrappers/fake_storage.cpp @@ -82,9 +82,10 @@ void TFakeExternalStorage::Execute(TEvGetObjectRequest::TPtr& ev, const TReplyAd auto awsRange = ev->Get()->GetRequest().GetRange(); Y_VERIFY(awsRange.size()); const TString strRange(awsRange.data(), awsRange.size()); - Y_VERIFY(TryParseRange(strRange, range)); + AFL_VERIFY(TryParseRange(strRange, range))("original", strRange); if (!!object) { + AFL_DEBUG(NKikimrServices::S3_WRAPPER)("method", "GetObject")("id", key)("range", strRange)("object_exists", true); Aws::S3::Model::GetObjectResult awsResult; awsResult.WithAcceptRanges(awsRange).SetETag(MD5::Calc(*object)); data = *object; @@ -94,6 +95,7 @@ void TFakeExternalStorage::Execute(TEvGetObjectRequest::TPtr& ev, const TReplyAd std::unique_ptr<TEvGetObjectResponse> result(new TEvGetObjectResponse(key, range, std::move(awsOutcome), std::move(data))); adapter.Reply(ev->Sender, std::move(result)); } else { + AFL_DEBUG(NKikimrServices::S3_WRAPPER)("method", "GetObject")("id", key)("range", strRange)("object_exists", false); Aws::Utils::Outcome<Aws::S3::Model::GetObjectResult, Aws::S3::S3Error> awsOutcome; std::unique_ptr<TEvGetObjectResponse> result(new TEvGetObjectResponse(key, range, std::move(awsOutcome), std::move(data))); adapter.Reply(ev->Sender, std::move(result)); @@ -122,6 +124,7 @@ void TFakeExternalStorage::Execute(TEvHeadObjectRequest::TPtr& ev, const TReplyA void TFakeExternalStorage::Execute(TEvPutObjectRequest::TPtr& ev, const TReplyAdapterContainer& adapter) const { TGuard<TMutex> g(Mutex); const TString key = AwsToString(ev->Get()->GetRequest().GetKey()); + AFL_DEBUG(NKikimrServices::S3_WRAPPER)("method", "PutObject")("id", key); auto& bucket = MutableBucket(AwsToString(ev->Get()->GetRequest().GetBucket())); bucket.PutObject(key, ev->Get()->Body); Aws::S3::Model::PutObjectResult awsResult; @@ -135,6 +138,7 @@ void TFakeExternalStorage::Execute(TEvDeleteObjectRequest::TPtr& ev, const TRepl Aws::S3::Model::DeleteObjectResult awsResult; auto& bucket = MutableBucket(AwsToString(ev->Get()->GetRequest().GetBucket())); const TString key = AwsToString(ev->Get()->GetRequest().GetKey()); + AFL_DEBUG(NKikimrServices::S3_WRAPPER)("method", "DeleteObject")("id", key); bucket.Remove(key); std::unique_ptr<TEvDeleteObjectResponse> result(new TEvDeleteObjectResponse(key, awsResult)); |