aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexey Borzenkov <snaury@yandex-team.ru>2022-02-08 13:40:28 +0300
committerAlexey Borzenkov <snaury@yandex-team.ru>2022-02-08 13:40:28 +0300
commita11ed8f0f548d88edbfba2f5b2da15bbc7e52c1d (patch)
tree378958cf58c7a77be0766f7af05c54204b0aaceb
parentf85b4d7136edfe6eddb23f9c095e035889108265 (diff)
downloadydb-a11ed8f0f548d88edbfba2f5b2da15bbc7e52c1d.tar.gz
Include hidden data within main rows in mvcc garbage histograms, KIKIMR-14257
ref:7023eaf186742e15113b1decdfe923853c624e4e
-rw-r--r--ydb/core/tablet_flat/flat_comp_gen.cpp85
-rw-r--r--ydb/core/tablet_flat/flat_comp_gen.h5
-rw-r--r--ydb/core/tablet_flat/flat_part_writer.h15
3 files changed, 77 insertions, 28 deletions
diff --git a/ydb/core/tablet_flat/flat_comp_gen.cpp b/ydb/core/tablet_flat/flat_comp_gen.cpp
index b3e0c63715..43bf95357b 100644
--- a/ydb/core/tablet_flat/flat_comp_gen.cpp
+++ b/ydb/core/tablet_flat/flat_comp_gen.cpp
@@ -89,7 +89,7 @@ struct TGenCompactionStrategy::TExtraState {
}
};
-void TGenCompactionStrategy::TGeneration::PushFront(TPartView partView) noexcept {
+TGenCompactionStrategy::TPartInfo& TGenCompactionStrategy::TGeneration::PushFront(TPartView partView) noexcept {
Y_VERIFY(TakenHeadParts == 0,
"Attempting to prepend part to generation that has taken head parts");
@@ -101,9 +101,10 @@ void TGenCompactionStrategy::TGeneration::PushFront(TPartView partView) noexcept
auto& front = Parts.emplace_front(std::move(partView));
Stats += front.Stats;
StatsPerTablet[front.Label.TabletID()] += front.Stats;
+ return front;
}
-void TGenCompactionStrategy::TGeneration::PushBack(TPartView partView) noexcept {
+TGenCompactionStrategy::TPartInfo& TGenCompactionStrategy::TGeneration::PushBack(TPartView partView) noexcept {
Y_VERIFY(CompactingTailParts == 0,
"Attempting to append part to generation that has compacting tail parts");
@@ -119,6 +120,7 @@ void TGenCompactionStrategy::TGeneration::PushBack(TPartView partView) noexcept
auto& back = Parts.emplace_back(std::move(partView));
Stats += back.Stats;
StatsPerTablet[back.Label.TabletID()] += back.Stats;
+ return back;
}
void TGenCompactionStrategy::TGeneration::PopFront() noexcept {
@@ -200,6 +202,11 @@ void TGenCompactionStrategy::Start(TCompactionState state) {
Policy = scheme->CompactionPolicy;
Generations.resize(Policy->Generations.size());
+ // Reset garbage version to the minimum
+ // It will be recalculated in UpdateStats below anyway
+ CachedGarbageRowVersion = TRowVersion::Min();
+ CachedGarbageBytes = 0;
+
for (auto& partView : Backend->TableParts(Table)) {
auto label = partView->Label;
ui32 level = state.PartLevels.Value(partView->Label, 255);
@@ -482,6 +489,7 @@ TCompactionChanges TGenCompactionStrategy::CompactionFinished(
Y_VERIFY(!FinalParts.empty());
auto& front = FinalParts.front();
Y_VERIFY(front.Label == (*partIt)->Label);
+ CachedGarbageBytes -= front.GarbageBytes;
KnownParts.erase(front.Label);
FinalParts.pop_front();
--FinalCompactionTaken;
@@ -505,6 +513,7 @@ TCompactionChanges TGenCompactionStrategy::CompactionFinished(
Y_VERIFY(!nextGen.Parts.empty());
auto& front = nextGen.Parts.front();
Y_VERIFY(front.Label == (*partIt)->Label);
+ CachedGarbageBytes -= front.GarbageBytes;
KnownParts.erase(front.Label);
nextGen.PopFront();
}
@@ -555,6 +564,7 @@ TCompactionChanges TGenCompactionStrategy::CompactionFinished(
"Failed at gen=%u, sourceIndex=%u, full=%d, headTaken=%lu",
generation, sourceIndex, fullCompaction, sourceGen.TakenHeadParts);
Y_VERIFY(sourceGen.CompactingTailParts > 0);
+ CachedGarbageBytes -= part.GarbageBytes;
KnownParts.erase(part.Label);
sourceGen.PopBack();
sourceParts.pop_back();
@@ -633,7 +643,11 @@ TCompactionChanges TGenCompactionStrategy::CompactionFinished(
if (target == Generations.size()) {
for (auto it = newParts.rbegin(); it != newParts.rend(); ++it) {
auto& partView = *it;
- FinalParts.emplace_front(std::move(partView));
+ auto& front = FinalParts.emplace_front(std::move(partView));
+ if (CachedGarbageRowVersion && front.PartView->GarbageStats) {
+ front.GarbageBytes = front.PartView->GarbageStats->GetGarbageBytes(CachedGarbageRowVersion);
+ CachedGarbageBytes += front.GarbageBytes;
+ }
}
} else {
auto& newGen = Generations[target];
@@ -641,13 +655,21 @@ TCompactionChanges TGenCompactionStrategy::CompactionFinished(
Y_VERIFY(!newGen.Parts || result->Epoch <= newGen.Parts.back().Epoch);
for (auto it = newParts.begin(); it != newParts.end(); ++it) {
auto& partView = *it;
- newGen.PushBack(std::move(partView));
+ auto& back = newGen.PushBack(std::move(partView));
+ if (CachedGarbageRowVersion && back.PartView->GarbageStats) {
+ back.GarbageBytes = back.PartView->GarbageStats->GetGarbageBytes(CachedGarbageRowVersion);
+ CachedGarbageBytes += back.GarbageBytes;
+ }
}
} else {
Y_VERIFY(!newGen.Parts || result->Epoch >= newGen.Parts.front().Epoch);
for (auto it = newParts.rbegin(); it != newParts.rend(); ++it) {
auto& partView = *it;
- newGen.PushFront(std::move(partView));
+ auto& front = newGen.PushFront(std::move(partView));
+ if (CachedGarbageRowVersion && front.PartView->GarbageStats) {
+ front.GarbageBytes = front.PartView->GarbageStats->GetGarbageBytes(CachedGarbageRowVersion);
+ CachedGarbageBytes += front.GarbageBytes;
+ }
}
}
@@ -765,6 +787,7 @@ void TGenCompactionStrategy::PartMerged(TPartView partView, ui32 level) {
if (it->Label == label) {
Stats -= it->Stats;
StatsPerTablet[label.TabletID()] -= it->Stats;
+ CachedGarbageBytes -= it->GarbageBytes;
KnownParts.erase(it->Label);
FinalParts.erase(it);
break;
@@ -776,9 +799,13 @@ void TGenCompactionStrategy::PartMerged(TPartView partView, ui32 level) {
}
KnownParts[label] = level;
- FinalParts.emplace_back(std::move(partView));
- Stats += FinalParts.back().Stats;
- StatsPerTablet[FinalParts.back().Label.TabletID()] += FinalParts.back().Stats;
+ auto& back = FinalParts.emplace_back(std::move(partView));
+ Stats += back.Stats;
+ StatsPerTablet[back.Label.TabletID()] += back.Stats;
+ if (CachedGarbageRowVersion && back.PartView->GarbageStats) {
+ back.GarbageBytes = back.PartView->GarbageStats->GetGarbageBytes(CachedGarbageRowVersion);
+ CachedGarbageBytes += back.GarbageBytes;
+ }
}
void TGenCompactionStrategy::PartMerged(TIntrusiveConstPtr<TColdPart> part, ui32 level) {
@@ -1418,33 +1445,39 @@ void TGenCompactionStrategy::UpdateStats() {
StatsPerTablet[part.Label.TabletID()] += part.Stats;
}
- CachedGarbageRowVersion = TRowVersion::Min();
- CachedGarbageBytes = 0;
- CachedDroppedBytesPercent = 0;
-
if (const auto& ranges = Backend->TableRemovedRowVersions(Table)) {
auto it = ranges.begin();
if (it->Lower.IsMin()) {
- // TODO: we may want to cache it more efficiently
- CachedGarbageRowVersion = it->Upper;
- auto process = [&](const TPartInfo& part) {
- if (part.PartView->GarbageStats) {
- CachedGarbageBytes += part.PartView->GarbageStats->GetGarbageBytes(CachedGarbageRowVersion);
+ // We keep garbage bytes up to date, but when the version
+ // changes we need to recalculate it for all parts
+ if (CachedGarbageRowVersion != it->Upper) {
+ CachedGarbageRowVersion = it->Upper;
+ CachedGarbageBytes = 0;
+ auto process = [&](TPartInfo& part) {
+ if (CachedGarbageRowVersion && part.PartView->GarbageStats) {
+ part.GarbageBytes = part.PartView->GarbageStats->GetGarbageBytes(CachedGarbageRowVersion);
+ CachedGarbageBytes += part.GarbageBytes;
+ } else {
+ part.GarbageBytes = 0;
+ }
+ };
+ for (auto& gen : Generations) {
+ for (auto& part : gen.Parts) {
+ process(part);
+ }
}
- };
- for (const auto& gen : Generations) {
- for (const auto& part : gen.Parts) {
+ for (auto& part : FinalParts) {
process(part);
}
}
- for (const auto& part : FinalParts) {
- process(part);
- }
- if (CachedGarbageBytes > 0 && Stats.BackingSize > 0) {
- CachedDroppedBytesPercent = CachedGarbageBytes * 100 / Stats.BackingSize;
- }
}
}
+
+ if (CachedGarbageBytes > 0 && Stats.BackingSize > 0) {
+ CachedDroppedBytesPercent = CachedGarbageBytes * 100 / Stats.BackingSize;
+ } else {
+ CachedDroppedBytesPercent = 0;
+ }
}
void TGenCompactionStrategy::UpdateOverload() {
diff --git a/ydb/core/tablet_flat/flat_comp_gen.h b/ydb/core/tablet_flat/flat_comp_gen.h
index 642a90e292..fc38fe705e 100644
--- a/ydb/core/tablet_flat/flat_comp_gen.h
+++ b/ydb/core/tablet_flat/flat_comp_gen.h
@@ -158,6 +158,7 @@ namespace NCompGen {
const TLogoBlobID Label;
const TEpoch Epoch;
const TStats Stats;
+ ui64 GarbageBytes = 0;
inline bool operator<(const TPartInfo& other) const {
if (other.Epoch != Epoch) {
@@ -191,8 +192,8 @@ namespace NCompGen {
THashMap<ui64, TStats> StatsPerTablet;
float OverloadFactor = 0.0;
- void PushFront(TPartView partView) noexcept;
- void PushBack(TPartView partView) noexcept;
+ TPartInfo& PushFront(TPartView partView) noexcept;
+ TPartInfo& PushBack(TPartView partView) noexcept;
void PopFront() noexcept;
void PopBack() noexcept;
};
diff --git a/ydb/core/tablet_flat/flat_part_writer.h b/ydb/core/tablet_flat/flat_part_writer.h
index 0355bd9b96..4e04732cd9 100644
--- a/ydb/core/tablet_flat/flat_part_writer.h
+++ b/ydb/core/tablet_flat/flat_part_writer.h
@@ -245,12 +245,19 @@ namespace NTable {
Y_VERIFY_DEBUG(minVersion < maxVersion);
+ ui64 overheadBytes = 0;
for (size_t groupIdx : xrange(Groups.size())) {
auto& g = Groups[groupIdx];
// N.B. non-main groups have no key
TCellsRef groupKey = groupIdx == 0 ? KeyState.Key : TCellsRef{ };
g.NextDataSize = g.Data.CalcSize(groupKey, row, KeyState.Final, minVersion, maxVersion, /* txId */ 0);
g.NextIndexSize = g.Index.CalcSize(groupKey);
+
+ overheadBytes += (
+ g.NextDataSize.DataPageSize +
+ g.NextDataSize.SmallSize +
+ g.NextDataSize.LargeSize +
+ g.NextIndexSize);
}
if (KeyState.WrittenDeltas == 0 && NeedFlush()) {
@@ -294,6 +301,14 @@ namespace NTable {
}
FinishMainKey();
+
+ if (maxVersion < TRowVersion::Max()) {
+ // Count overhead bytes if everything up to maxVersion is removed
+ Current.GarbageStatsBuilder.Add(maxVersion, overheadBytes);
+ if (Current.GarbageStatsBuilder.Size() > GarbageStatsMaxBuildSize) {
+ Current.GarbageStatsBuilder.ShrinkTo(GarbageStatsMaxSize);
+ }
+ }
}
void FinishMainKey() noexcept