diff options
author | Alexey Borzenkov <snaury@yandex-team.ru> | 2022-02-08 13:40:28 +0300 |
---|---|---|
committer | Alexey Borzenkov <snaury@yandex-team.ru> | 2022-02-08 13:40:28 +0300 |
commit | a11ed8f0f548d88edbfba2f5b2da15bbc7e52c1d (patch) | |
tree | 378958cf58c7a77be0766f7af05c54204b0aaceb | |
parent | f85b4d7136edfe6eddb23f9c095e035889108265 (diff) | |
download | ydb-a11ed8f0f548d88edbfba2f5b2da15bbc7e52c1d.tar.gz |
Include hidden data within main rows in mvcc garbage histograms, KIKIMR-14257
ref:7023eaf186742e15113b1decdfe923853c624e4e
-rw-r--r-- | ydb/core/tablet_flat/flat_comp_gen.cpp | 85 | ||||
-rw-r--r-- | ydb/core/tablet_flat/flat_comp_gen.h | 5 | ||||
-rw-r--r-- | ydb/core/tablet_flat/flat_part_writer.h | 15 |
3 files changed, 77 insertions, 28 deletions
diff --git a/ydb/core/tablet_flat/flat_comp_gen.cpp b/ydb/core/tablet_flat/flat_comp_gen.cpp index b3e0c63715..43bf95357b 100644 --- a/ydb/core/tablet_flat/flat_comp_gen.cpp +++ b/ydb/core/tablet_flat/flat_comp_gen.cpp @@ -89,7 +89,7 @@ struct TGenCompactionStrategy::TExtraState { } }; -void TGenCompactionStrategy::TGeneration::PushFront(TPartView partView) noexcept { +TGenCompactionStrategy::TPartInfo& TGenCompactionStrategy::TGeneration::PushFront(TPartView partView) noexcept { Y_VERIFY(TakenHeadParts == 0, "Attempting to prepend part to generation that has taken head parts"); @@ -101,9 +101,10 @@ void TGenCompactionStrategy::TGeneration::PushFront(TPartView partView) noexcept auto& front = Parts.emplace_front(std::move(partView)); Stats += front.Stats; StatsPerTablet[front.Label.TabletID()] += front.Stats; + return front; } -void TGenCompactionStrategy::TGeneration::PushBack(TPartView partView) noexcept { +TGenCompactionStrategy::TPartInfo& TGenCompactionStrategy::TGeneration::PushBack(TPartView partView) noexcept { Y_VERIFY(CompactingTailParts == 0, "Attempting to append part to generation that has compacting tail parts"); @@ -119,6 +120,7 @@ void TGenCompactionStrategy::TGeneration::PushBack(TPartView partView) noexcept auto& back = Parts.emplace_back(std::move(partView)); Stats += back.Stats; StatsPerTablet[back.Label.TabletID()] += back.Stats; + return back; } void TGenCompactionStrategy::TGeneration::PopFront() noexcept { @@ -200,6 +202,11 @@ void TGenCompactionStrategy::Start(TCompactionState state) { Policy = scheme->CompactionPolicy; Generations.resize(Policy->Generations.size()); + // Reset garbage version to the minimum + // It will be recalculated in UpdateStats below anyway + CachedGarbageRowVersion = TRowVersion::Min(); + CachedGarbageBytes = 0; + for (auto& partView : Backend->TableParts(Table)) { auto label = partView->Label; ui32 level = state.PartLevels.Value(partView->Label, 255); @@ -482,6 +489,7 @@ TCompactionChanges TGenCompactionStrategy::CompactionFinished( Y_VERIFY(!FinalParts.empty()); auto& front = FinalParts.front(); Y_VERIFY(front.Label == (*partIt)->Label); + CachedGarbageBytes -= front.GarbageBytes; KnownParts.erase(front.Label); FinalParts.pop_front(); --FinalCompactionTaken; @@ -505,6 +513,7 @@ TCompactionChanges TGenCompactionStrategy::CompactionFinished( Y_VERIFY(!nextGen.Parts.empty()); auto& front = nextGen.Parts.front(); Y_VERIFY(front.Label == (*partIt)->Label); + CachedGarbageBytes -= front.GarbageBytes; KnownParts.erase(front.Label); nextGen.PopFront(); } @@ -555,6 +564,7 @@ TCompactionChanges TGenCompactionStrategy::CompactionFinished( "Failed at gen=%u, sourceIndex=%u, full=%d, headTaken=%lu", generation, sourceIndex, fullCompaction, sourceGen.TakenHeadParts); Y_VERIFY(sourceGen.CompactingTailParts > 0); + CachedGarbageBytes -= part.GarbageBytes; KnownParts.erase(part.Label); sourceGen.PopBack(); sourceParts.pop_back(); @@ -633,7 +643,11 @@ TCompactionChanges TGenCompactionStrategy::CompactionFinished( if (target == Generations.size()) { for (auto it = newParts.rbegin(); it != newParts.rend(); ++it) { auto& partView = *it; - FinalParts.emplace_front(std::move(partView)); + auto& front = FinalParts.emplace_front(std::move(partView)); + if (CachedGarbageRowVersion && front.PartView->GarbageStats) { + front.GarbageBytes = front.PartView->GarbageStats->GetGarbageBytes(CachedGarbageRowVersion); + CachedGarbageBytes += front.GarbageBytes; + } } } else { auto& newGen = Generations[target]; @@ -641,13 +655,21 @@ TCompactionChanges TGenCompactionStrategy::CompactionFinished( Y_VERIFY(!newGen.Parts || result->Epoch <= newGen.Parts.back().Epoch); for (auto it = newParts.begin(); it != newParts.end(); ++it) { auto& partView = *it; - newGen.PushBack(std::move(partView)); + auto& back = newGen.PushBack(std::move(partView)); + if (CachedGarbageRowVersion && back.PartView->GarbageStats) { + back.GarbageBytes = back.PartView->GarbageStats->GetGarbageBytes(CachedGarbageRowVersion); + CachedGarbageBytes += back.GarbageBytes; + } } } else { Y_VERIFY(!newGen.Parts || result->Epoch >= newGen.Parts.front().Epoch); for (auto it = newParts.rbegin(); it != newParts.rend(); ++it) { auto& partView = *it; - newGen.PushFront(std::move(partView)); + auto& front = newGen.PushFront(std::move(partView)); + if (CachedGarbageRowVersion && front.PartView->GarbageStats) { + front.GarbageBytes = front.PartView->GarbageStats->GetGarbageBytes(CachedGarbageRowVersion); + CachedGarbageBytes += front.GarbageBytes; + } } } @@ -765,6 +787,7 @@ void TGenCompactionStrategy::PartMerged(TPartView partView, ui32 level) { if (it->Label == label) { Stats -= it->Stats; StatsPerTablet[label.TabletID()] -= it->Stats; + CachedGarbageBytes -= it->GarbageBytes; KnownParts.erase(it->Label); FinalParts.erase(it); break; @@ -776,9 +799,13 @@ void TGenCompactionStrategy::PartMerged(TPartView partView, ui32 level) { } KnownParts[label] = level; - FinalParts.emplace_back(std::move(partView)); - Stats += FinalParts.back().Stats; - StatsPerTablet[FinalParts.back().Label.TabletID()] += FinalParts.back().Stats; + auto& back = FinalParts.emplace_back(std::move(partView)); + Stats += back.Stats; + StatsPerTablet[back.Label.TabletID()] += back.Stats; + if (CachedGarbageRowVersion && back.PartView->GarbageStats) { + back.GarbageBytes = back.PartView->GarbageStats->GetGarbageBytes(CachedGarbageRowVersion); + CachedGarbageBytes += back.GarbageBytes; + } } void TGenCompactionStrategy::PartMerged(TIntrusiveConstPtr<TColdPart> part, ui32 level) { @@ -1418,33 +1445,39 @@ void TGenCompactionStrategy::UpdateStats() { StatsPerTablet[part.Label.TabletID()] += part.Stats; } - CachedGarbageRowVersion = TRowVersion::Min(); - CachedGarbageBytes = 0; - CachedDroppedBytesPercent = 0; - if (const auto& ranges = Backend->TableRemovedRowVersions(Table)) { auto it = ranges.begin(); if (it->Lower.IsMin()) { - // TODO: we may want to cache it more efficiently - CachedGarbageRowVersion = it->Upper; - auto process = [&](const TPartInfo& part) { - if (part.PartView->GarbageStats) { - CachedGarbageBytes += part.PartView->GarbageStats->GetGarbageBytes(CachedGarbageRowVersion); + // We keep garbage bytes up to date, but when the version + // changes we need to recalculate it for all parts + if (CachedGarbageRowVersion != it->Upper) { + CachedGarbageRowVersion = it->Upper; + CachedGarbageBytes = 0; + auto process = [&](TPartInfo& part) { + if (CachedGarbageRowVersion && part.PartView->GarbageStats) { + part.GarbageBytes = part.PartView->GarbageStats->GetGarbageBytes(CachedGarbageRowVersion); + CachedGarbageBytes += part.GarbageBytes; + } else { + part.GarbageBytes = 0; + } + }; + for (auto& gen : Generations) { + for (auto& part : gen.Parts) { + process(part); + } } - }; - for (const auto& gen : Generations) { - for (const auto& part : gen.Parts) { + for (auto& part : FinalParts) { process(part); } } - for (const auto& part : FinalParts) { - process(part); - } - if (CachedGarbageBytes > 0 && Stats.BackingSize > 0) { - CachedDroppedBytesPercent = CachedGarbageBytes * 100 / Stats.BackingSize; - } } } + + if (CachedGarbageBytes > 0 && Stats.BackingSize > 0) { + CachedDroppedBytesPercent = CachedGarbageBytes * 100 / Stats.BackingSize; + } else { + CachedDroppedBytesPercent = 0; + } } void TGenCompactionStrategy::UpdateOverload() { diff --git a/ydb/core/tablet_flat/flat_comp_gen.h b/ydb/core/tablet_flat/flat_comp_gen.h index 642a90e292..fc38fe705e 100644 --- a/ydb/core/tablet_flat/flat_comp_gen.h +++ b/ydb/core/tablet_flat/flat_comp_gen.h @@ -158,6 +158,7 @@ namespace NCompGen { const TLogoBlobID Label; const TEpoch Epoch; const TStats Stats; + ui64 GarbageBytes = 0; inline bool operator<(const TPartInfo& other) const { if (other.Epoch != Epoch) { @@ -191,8 +192,8 @@ namespace NCompGen { THashMap<ui64, TStats> StatsPerTablet; float OverloadFactor = 0.0; - void PushFront(TPartView partView) noexcept; - void PushBack(TPartView partView) noexcept; + TPartInfo& PushFront(TPartView partView) noexcept; + TPartInfo& PushBack(TPartView partView) noexcept; void PopFront() noexcept; void PopBack() noexcept; }; diff --git a/ydb/core/tablet_flat/flat_part_writer.h b/ydb/core/tablet_flat/flat_part_writer.h index 0355bd9b96..4e04732cd9 100644 --- a/ydb/core/tablet_flat/flat_part_writer.h +++ b/ydb/core/tablet_flat/flat_part_writer.h @@ -245,12 +245,19 @@ namespace NTable { Y_VERIFY_DEBUG(minVersion < maxVersion); + ui64 overheadBytes = 0; for (size_t groupIdx : xrange(Groups.size())) { auto& g = Groups[groupIdx]; // N.B. non-main groups have no key TCellsRef groupKey = groupIdx == 0 ? KeyState.Key : TCellsRef{ }; g.NextDataSize = g.Data.CalcSize(groupKey, row, KeyState.Final, minVersion, maxVersion, /* txId */ 0); g.NextIndexSize = g.Index.CalcSize(groupKey); + + overheadBytes += ( + g.NextDataSize.DataPageSize + + g.NextDataSize.SmallSize + + g.NextDataSize.LargeSize + + g.NextIndexSize); } if (KeyState.WrittenDeltas == 0 && NeedFlush()) { @@ -294,6 +301,14 @@ namespace NTable { } FinishMainKey(); + + if (maxVersion < TRowVersion::Max()) { + // Count overhead bytes if everything up to maxVersion is removed + Current.GarbageStatsBuilder.Add(maxVersion, overheadBytes); + if (Current.GarbageStatsBuilder.Size() > GarbageStatsMaxBuildSize) { + Current.GarbageStatsBuilder.ShrinkTo(GarbageStatsMaxSize); + } + } } void FinishMainKey() noexcept |