diff options
author | ivanmorozov <ivanmorozov@yandex-team.com> | 2022-11-01 17:10:55 +0300 |
---|---|---|
committer | ivanmorozov <ivanmorozov@yandex-team.com> | 2022-11-01 17:10:55 +0300 |
commit | 3a44612c9dbed9010f65168ae8b8bcbc855ef8f8 (patch) | |
tree | bafa65298fb8f8415e1ede4eb772791b9b1f3896 | |
parent | 1eb01e09b8655f3d6f05f761f24b0903c4457d40 (diff) | |
download | ydb-3a44612c9dbed9010f65168ae8b8bcbc855ef8f8.tar.gz |
path id usage in cold-storage-key construction as prefix
-rw-r--r-- | ydb/core/tx/columnshard/blob.cpp | 43 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/blob.h | 12 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/columnshard__write_index.cpp | 29 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/columnshard_private_events.h | 6 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/engines/column_engine.cpp | 38 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/engines/column_engine.h | 52 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/engines/column_engine_logs.cpp | 12 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/engines/portion_info.h | 1 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/export_actor.cpp | 6 | ||||
-rw-r--r-- | ydb/core/tx/tiering/s3_actor.cpp | 6 |
10 files changed, 123 insertions, 82 deletions
diff --git a/ydb/core/tx/columnshard/blob.cpp b/ydb/core/tx/columnshard/blob.cpp index 9dac0226e52..c7454238bfe 100644 --- a/ydb/core/tx/columnshard/blob.cpp +++ b/ydb/core/tx/columnshard/blob.cpp @@ -8,34 +8,42 @@ namespace NKikimr::NOlap { // Format: "S3-f(logoBlobId)-group" -// Example: "S3-72075186224038245_51_31595_2_0_11952_0-2181038103" -TString DsIdToS3Key(const TUnifiedBlobId& dsid) { - TString res = TString("S3") + dsid.GetLogoBlobId().ToString(); - res[2] = '-'; // replace '[' - res[res.size() - 1] = '-'; // replace ']' - - for (size_t i = 0; i < res.size(); ++i) { - switch (res[i]) { +// Example: "S3-42-72075186224038245_51_31595_2_0_11952_0-2181038103" +TString DsIdToS3Key(const TUnifiedBlobId& dsid, const ui64 pathId) { + TString blobId = dsid.GetLogoBlobId().ToString(); + for (auto&& c : blobId) { + switch (c) { case ':': - res[i] = '_'; + c = '_'; + break; + case '[': + case ']': + c = '-'; } } - - res += ToString(dsid.GetDsGroup()); - return res; + TString result = + "S3-" + + ::ToString(pathId) + + blobId + + ::ToString(dsid.GetDsGroup()) + ; + return result; } -TUnifiedBlobId S3KeyToDsId(const TString& s, TString& error) { +TUnifiedBlobId S3KeyToDsId(const TString& s, TString& error, ui64& pathId) { TVector<TString> keyBucket; Split(s, "-", keyBucket); ui32 dsGroup; - if (keyBucket.size() != 3 || keyBucket[0] != "S3" || !TryFromString<ui32>(keyBucket[2], dsGroup)) { + if (keyBucket.size() != 4 || keyBucket[0] != "S3" + || !TryFromString<ui32>(keyBucket[3], dsGroup) + || !TryFromString<ui64>(keyBucket[1], pathId)) + { error = TStringBuilder() << "Wrong S3 key '" << s << "'"; return TUnifiedBlobId(); } - TString blobId = "[" + keyBucket[1] + "]"; + TString blobId = "[" + keyBucket[2] + "]"; for (size_t i = 0; i < blobId.size(); ++i) { switch (blobId[i]) { case '_': @@ -134,12 +142,13 @@ TUnifiedBlobId ParseS3BlobId(const TString& s, TString& error) { return TUnifiedBlobId(); } - TUnifiedBlobId dsBlobId = S3KeyToDsId(keyBucket[0], error); + ui64 pathId; + TUnifiedBlobId dsBlobId = S3KeyToDsId(keyBucket[0], error, pathId); if (!dsBlobId.IsValid()) { return TUnifiedBlobId(); } - return TUnifiedBlobId(dsBlobId, TUnifiedBlobId::S3_BLOB, keyBucket[1]); + return TUnifiedBlobId(dsBlobId, TUnifiedBlobId::S3_BLOB, keyBucket[1], pathId); } } diff --git a/ydb/core/tx/columnshard/blob.h b/ydb/core/tx/columnshard/blob.h index 97cbb54b823..00228e7071c 100644 --- a/ydb/core/tx/columnshard/blob.h +++ b/ydb/core/tx/columnshard/blob.h @@ -9,8 +9,8 @@ namespace NKikimr::NOlap { class IBlobGroupSelector; class TUnifiedBlobId; -TString DsIdToS3Key(const TUnifiedBlobId& dsid); -TUnifiedBlobId S3KeyToDsId(const TString& s, TString& error); +TString DsIdToS3Key(const TUnifiedBlobId& dsid, const ui64 pathId); +TUnifiedBlobId S3KeyToDsId(const TString& s, TString& error, ui64& pathId); // Encapsulates different types of blob ids to simplify dealing with blobs for the // components that do not need to know where the blob is stored @@ -92,12 +92,12 @@ class TUnifiedBlobId { TS3BlobId() = default; - TS3BlobId(const TUnifiedBlobId& dsBlob, const TString& bucket) + TS3BlobId(const TUnifiedBlobId& dsBlob, const TString& bucket, const ui64 pathId) : Bucket(bucket) { Y_VERIFY(dsBlob.IsDsBlob()); DsBlobId = std::get<TDsBlobId>(dsBlob.Id); - Key = DsIdToS3Key(dsBlob); + Key = DsIdToS3Key(dsBlob, pathId); } bool operator == (const TS3BlobId& other) const { @@ -143,8 +143,8 @@ public: {} // Make S3 blob Id from DS one - TUnifiedBlobId(const TUnifiedBlobId& blob, EBlobType type, const TString& bucket) - : Id(TS3BlobId(blob, bucket)) + TUnifiedBlobId(const TUnifiedBlobId& blob, EBlobType type, const TString& bucket, const ui64 pathId) + : Id(TS3BlobId(blob, bucket, pathId)) { Y_VERIFY(type == S3_BLOB); } diff --git a/ydb/core/tx/columnshard/columnshard__write_index.cpp b/ydb/core/tx/columnshard/columnshard__write_index.cpp index a972c1e7022..e6c88ef4cd9 100644 --- a/ydb/core/tx/columnshard/columnshard__write_index.cpp +++ b/ydb/core/tx/columnshard/columnshard__write_index.cpp @@ -23,9 +23,18 @@ public: TTxType GetTxType() const override { return TXTYPE_WRITE_INDEX; } private: + struct TPathIdBlobs { + THashSet<TUnifiedBlobId> Blobs; + ui64 PathId; + TPathIdBlobs(const ui64 pathId) + : PathId(pathId) { + + } + }; + TEvPrivate::TEvWriteIndex::TPtr Ev; - THashMap<TUnifiedBlobId, TString> BlobsToExport; - THashMap<TString, THashSet<TUnifiedBlobId>> ExportTierBlobs; + THashMap<TUnifiedBlobId, NOlap::TPortionEvictionFeatures> BlobsToExport; + THashMap<TString, TPathIdBlobs> ExportTierBlobs; THashMap<TString, std::vector<NOlap::TEvictedBlob>> TierBlobsToForget; ui64 ExportNo = 0; }; @@ -121,7 +130,7 @@ bool TTxWriteIndex::Execute(TTransactionContext& txc, const TActorContext& ctx) } Self->IncCounter(COUNTER_EVICTION_PORTIONS_WRITTEN, changes->PortionsToEvict.size()); - for (auto& [portionInfo, _] : changes->PortionsToEvict) { + for (auto& [portionInfo, evictionFeatures] : changes->PortionsToEvict) { auto& tierName = portionInfo.TierName; if (tierName.empty()) { continue; @@ -133,7 +142,7 @@ bool TTxWriteIndex::Execute(TTransactionContext& txc, const TActorContext& ctx) for (auto& rec : portionInfo.Records) { auto& blobId = rec.BlobRange.BlobId; if (!BlobsToExport.count(blobId)) { - BlobsToExport.emplace(blobId, tierName); + BlobsToExport.emplace(blobId, evictionFeatures); NKikimrTxColumnShard::TEvictMetadata meta; meta.SetTierName(tierName); @@ -215,8 +224,12 @@ bool TTxWriteIndex::Execute(TTransactionContext& txc, const TActorContext& ctx) if (BlobsToExport.size()) { size_t numBlobs = BlobsToExport.size(); - for (auto& [blobId, tierName] : BlobsToExport) { - ExportTierBlobs[tierName].emplace(blobId); + for (auto& [blobId, evFeatures] : BlobsToExport) { + auto it = ExportTierBlobs.find(evFeatures.TargetTierName); + if (it == ExportTierBlobs.end()) { + it = ExportTierBlobs.emplace(evFeatures.TargetTierName, TPathIdBlobs(evFeatures.PathId)).first; + } + it->second.Blobs.emplace(blobId); } BlobsToExport.clear(); @@ -283,8 +296,8 @@ void TTxWriteIndex::Complete(const TActorContext& ctx) { Y_VERIFY(ExportNo); TEvPrivate::TEvExport::TBlobDataMap blobsData; - for (auto&& i : blobIds) { - TEvPrivate::TEvExport::TExportBlobInfo info; + for (auto&& i : blobIds.Blobs) { + TEvPrivate::TEvExport::TExportBlobInfo info(blobIds.PathId); info.Evicting = Self->BlobManager->IsEvicting(i); blobsData.emplace(i, std::move(info)); } diff --git a/ydb/core/tx/columnshard/columnshard_private_events.h b/ydb/core/tx/columnshard/columnshard_private_events.h index bdf7402b168..380e1dd4ffd 100644 --- a/ydb/core/tx/columnshard/columnshard_private_events.h +++ b/ydb/core/tx/columnshard/columnshard_private_events.h @@ -125,8 +125,14 @@ struct TEvPrivate { struct TEvExport : public TEventLocal<TEvExport, EvExport> { struct TExportBlobInfo { + const ui64 PathId = 0; TString Data; bool Evicting = false; + TExportBlobInfo(const ui64 pathId) + : PathId(pathId) + { + + } }; using TBlobDataMap = THashMap<TUnifiedBlobId, TExportBlobInfo>; diff --git a/ydb/core/tx/columnshard/engines/column_engine.cpp b/ydb/core/tx/columnshard/engines/column_engine.cpp index e8e3611ea8e..905a57f27f1 100644 --- a/ydb/core/tx/columnshard/engines/column_engine.cpp +++ b/ydb/core/tx/columnshard/engines/column_engine.cpp @@ -1,4 +1,5 @@ #include "column_engine.h" +#include <util/stream/output.h> namespace NKikimr::NOlap { @@ -10,5 +11,42 @@ TString TTiersInfo::GetDebugString() const { } return sb; } +} +template <> +void Out<NKikimr::NOlap::TColumnEngineChanges>(IOutputStream& out, TTypeTraits<NKikimr::NOlap::TColumnEngineChanges>::TFuncParam changes) { + if (ui32 switched = changes.SwitchedPortions.size()) { + out << "switch " << switched << " portions"; + for (auto& portionInfo : changes.SwitchedPortions) { + out << portionInfo; + } + out << "; "; + } + if (ui32 added = changes.AppendedPortions.size()) { + out << "add " << added << " portions"; + for (auto& portionInfo : changes.AppendedPortions) { + out << portionInfo; + } + out << "; "; + } + if (ui32 moved = changes.PortionsToMove.size()) { + out << "move " << moved << " portions"; + for (auto& [portionInfo, granule] : changes.PortionsToMove) { + out << portionInfo << " (to " << granule << ")"; + } + out << "; "; + } + if (ui32 evicted = changes.PortionsToEvict.size()) { + out << "evict " << evicted << " portions"; + for (auto& [portionInfo, evictionFeatures] : changes.PortionsToEvict) { + out << portionInfo << " (to " << evictionFeatures.TargetTierName << ")"; + } + out << "; "; + } + if (ui32 dropped = changes.PortionsToDrop.size()) { + out << "drop " << dropped << " portions"; + for (auto& portionInfo : changes.PortionsToDrop) { + out << portionInfo; + } + } } diff --git a/ydb/core/tx/columnshard/engines/column_engine.h b/ydb/core/tx/columnshard/engines/column_engine.h index 2422d5ebcde..603b49f2cd5 100644 --- a/ydb/core/tx/columnshard/engines/column_engine.h +++ b/ydb/core/tx/columnshard/engines/column_engine.h @@ -101,6 +101,16 @@ struct TTiersInfo { } }; +struct TPortionEvictionFeatures { + const TString TargetTierName; + const ui64 PathId; // portion path id for cold-storage-key construct + TPortionEvictionFeatures(const TString& targetTierName, const ui64 pathId) + : TargetTierName(targetTierName) + , PathId(pathId) { + + } +}; + class TColumnEngineChanges { public: enum EType { @@ -131,7 +141,7 @@ public: TVector<TPortionInfo> SwitchedPortions; // Portions that would be replaced by new ones TVector<TPortionInfo> AppendedPortions; // New portions after indexing or compaction TVector<TPortionInfo> PortionsToDrop; - TVector<std::pair<TPortionInfo, TString>> PortionsToEvict; // {portion, target tier name} + TVector<std::pair<TPortionInfo, TPortionEvictionFeatures>> PortionsToEvict; // {portion, TPortionEvictionFeatures} TVector<TColumnRecord> EvictedRecords; TVector<std::pair<TPortionInfo, ui64>> PortionsToMove; // {portion, new granule} THashMap<TBlobRange, TString> Blobs; @@ -183,7 +193,7 @@ public: } static THashMap<TUnifiedBlobId, std::vector<TBlobRange>> - GroupedBlobRanges(const TVector<std::pair<TPortionInfo, TString>>& portions) { + GroupedBlobRanges(const TVector<std::pair<TPortionInfo, TPortionEvictionFeatures>>& portions) { Y_VERIFY(portions.size()); THashMap<TUnifiedBlobId, std::vector<TBlobRange>> sameBlobRanges; @@ -196,44 +206,6 @@ public: } return sameBlobRanges; } - - friend IOutputStream& operator << (IOutputStream& out, const TColumnEngineChanges& changes) { - if (ui32 switched = changes.SwitchedPortions.size()) { - out << "switch " << switched << " portions"; - for (auto& portionInfo : changes.SwitchedPortions) { - out << portionInfo; - } - out << "; "; - } - if (ui32 added = changes.AppendedPortions.size()) { - out << "add " << added << " portions"; - for (auto& portionInfo : changes.AppendedPortions) { - out << portionInfo; - } - out << "; "; - } - if (ui32 moved = changes.PortionsToMove.size()) { - out << "move " << moved << " portions"; - for (auto& [portionInfo, granule] : changes.PortionsToMove) { - out << portionInfo << " (to " << granule << ")"; - } - out << "; "; - } - if (ui32 evicted = changes.PortionsToEvict.size()) { - out << "evict " << evicted << " portions"; - for (auto& [portionInfo, tier] : changes.PortionsToEvict) { - out << portionInfo << " (to " << tier << ")"; - } - out << "; "; - } - if (ui32 dropped = changes.PortionsToDrop.size()) { - out << "drop " << dropped << " portions"; - for (auto& portionInfo : changes.PortionsToDrop) { - out << portionInfo; - } - } - return out; - } }; struct TSelectInfo { diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp index 5b5af6b9461..60da8ba39b0 100644 --- a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp +++ b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp @@ -824,7 +824,7 @@ std::shared_ptr<TColumnEngineChanges> TColumnEngineForLogs::StartTtl(const THash keep = true; if (allowEviction && info.TierName != border.TierName) { evicttionSize += info.BlobsSizes().first; - changes->PortionsToEvict.emplace_back(info, border.TierName); + changes->PortionsToEvict.emplace_back(info, TPortionEvictionFeatures(border.TierName, pathId)); } break; } @@ -1960,16 +1960,16 @@ TVector<TString> TColumnEngineForLogs::EvictBlobs(const TIndexInfo& indexInfo, Y_VERIFY(changes->EvictedRecords.empty()); // dst meta TVector<TString> newBlobs; - TVector<std::pair<TPortionInfo, TString>> evicted; + TVector<std::pair<TPortionInfo, TPortionEvictionFeatures>> evicted; evicted.reserve(changes->PortionsToEvict.size()); - for (auto& [portionInfo, tierName] : changes->PortionsToEvict) { + for (auto& [portionInfo, evFeatures] : changes->PortionsToEvict) { Y_VERIFY(!portionInfo.Empty()); Y_VERIFY(portionInfo.IsActive()); - if (UpdateEvictedPortion(portionInfo, indexInfo, tierName, changes->Blobs, changes->EvictedRecords, newBlobs)) { - Y_VERIFY(portionInfo.TierName == tierName); - evicted.emplace_back(std::move(portionInfo), TString{}); + if (UpdateEvictedPortion(portionInfo, indexInfo, evFeatures.TargetTierName, changes->Blobs, changes->EvictedRecords, newBlobs)) { + Y_VERIFY(portionInfo.TierName == evFeatures.TargetTierName); + evicted.emplace_back(std::move(portionInfo), evFeatures); } } diff --git a/ydb/core/tx/columnshard/engines/portion_info.h b/ydb/core/tx/columnshard/engines/portion_info.h index fa35d36530b..dcb45573c53 100644 --- a/ydb/core/tx/columnshard/engines/portion_info.h +++ b/ydb/core/tx/columnshard/engines/portion_info.h @@ -54,7 +54,6 @@ struct TPortionInfo { bool Valid() const { return !Empty() && Meta.Produced != TPortionMeta::UNSPECIFIED && HasMinMax(FirstPkColumn); } bool IsInserted() const { return Meta.Produced == TPortionMeta::INSERTED; } bool CanHaveDups() const { return !Valid() || IsInserted(); } - ui32 NumRecords() const { return Records.size(); } ui64 Portion() const { diff --git a/ydb/core/tx/columnshard/export_actor.cpp b/ydb/core/tx/columnshard/export_actor.cpp index bba0020fe3b..237819b98ee 100644 --- a/ydb/core/tx/columnshard/export_actor.cpp +++ b/ydb/core/tx/columnshard/export_actor.cpp @@ -48,7 +48,11 @@ public: BlobsToRead.erase(blobId); Y_VERIFY(Event); - Event->Blobs[blobId].Data = blobData; + { + auto it = Event->Blobs.find(blobId); + Y_VERIFY(it != Event->Blobs.end()); + it->second.Data = blobData; + } if (BlobsToRead.empty()) { SendResultAndDie(ctx); diff --git a/ydb/core/tx/tiering/s3_actor.cpp b/ydb/core/tx/tiering/s3_actor.cpp index f0fc7d80973..d9136104748 100644 --- a/ydb/core/tx/tiering/s3_actor.cpp +++ b/ydb/core/tx/tiering/s3_actor.cpp @@ -32,8 +32,8 @@ public: return Event->Blobs; } - TUnifiedBlobId AddExported(const TString& bucket, const TUnifiedBlobId& srcBlob) { - Event->SrcToDstBlobs[srcBlob] = TUnifiedBlobId(srcBlob, TUnifiedBlobId::S3_BLOB, bucket); + TUnifiedBlobId AddExported(const TString& bucket, const TUnifiedBlobId& srcBlob, const ui64 pathId) { + Event->SrcToDstBlobs[srcBlob] = TUnifiedBlobId(srcBlob, TUnifiedBlobId::S3_BLOB, bucket, pathId); return Event->SrcToDstBlobs[srcBlob]; } @@ -119,7 +119,7 @@ public: auto& ex = Exports[exportNo]; for (auto& [blobId, blob] : ex.Blobs()) { - TString key = ex.AddExported(Bucket, blobId).GetS3Key(); + TString key = ex.AddExported(Bucket, blobId, blob.PathId).GetS3Key(); Y_VERIFY(!ExportingKeys.count(key)); // TODO ex.RegisterKey(key); |