diff options
author | Maksim Kita <kitaetoya@gmail.com> | 2023-06-28 10:17:08 +0000 |
---|---|---|
committer | maksim-kita <maksim-kita@yandex-team.com> | 2023-06-28 13:17:08 +0300 |
commit | e8666c117b945b60f708bab1fa89f4e5e946c80c (patch) | |
tree | cb5b308d5fc8a0694f615cbf33518068aaf0df4a | |
parent | eb92c027b4d7b9c94509d0b466f77e51ef3f6b6a (diff) | |
download | ydb-e8666c117b945b60f708bab1fa89f4e5e946c80c.tar.gz |
KeyAccessSample refactoring
KeyAccessSample refactoring
Pull Request resolved: #276
-rw-r--r-- | ydb/core/tablet_flat/flat_stat_table.h | 42 |
1 files changed, 17 insertions, 25 deletions
diff --git a/ydb/core/tablet_flat/flat_stat_table.h b/ydb/core/tablet_flat/flat_stat_table.h index a8d1af2c17..73e1504b3b 100644 --- a/ydb/core/tablet_flat/flat_stat_table.h +++ b/ydb/core/tablet_flat/flat_stat_table.h @@ -26,7 +26,7 @@ public: } /** - * @return true when we haven't reached the end and have current key + * @return true when we haven't reached the end and have current key * @return false when we have reached the end and don't have current key */ bool Next(TPartDataStats& stats) { @@ -140,40 +140,32 @@ public: ui64 idx = TotalCount; ++TotalCount; if (idx >= SampleCount) { - idx = RandomNumber<ui64>(TotalCount) ; + idx = RandomNumber<ui64>(TotalCount); } if (idx >= SampleCount) { return; } - TSerializedCellVec saved(TSerializedCellVec::Serialize(key)); + TString serializedKey = TSerializedCellVec::Serialize(key); + ++KeyRefCount[serializedKey]; - auto it = KeyRefCount.find(saved.GetBuffer()); - if (it != KeyRefCount.end()) { - // Add a reference for existing key - saved = it->second.first; - ++it->second.second; - } else { - KeyRefCount[saved.GetBuffer()] = std::make_pair(saved, 1); + if (Sample.size() < SampleCount) { + Sample.emplace_back(std::make_pair(serializedKey, accessKind)); + return; } - if (Sample.size() < SampleCount) { - Sample.emplace_back(std::make_pair(saved.GetBuffer(), accessKind)); - } else { - TString old = Sample[idx].first; - auto oit = KeyRefCount.find(old); - Y_VERIFY(oit != KeyRefCount.end()); - - // Delete the key if this was the last reference - if (oit->second.second == 1) { - KeyRefCount.erase(oit); - } else { - --oit->second.second; - } + TString old = Sample[idx].first; + auto oit = KeyRefCount.find(old); + Y_VERIFY(oit != KeyRefCount.end()); + --oit->second; - Sample[idx] = std::make_pair(saved.GetBuffer(), accessKind); + // Delete the key if this was the last reference + if (oit->second == 0) { + KeyRefCount.erase(oit); } + + Sample[idx] = std::make_pair(serializedKey, accessKind); } const TSample& GetSample() const { @@ -191,7 +183,7 @@ private: const ui64 SampleCount; ui64 TotalCount; // Store only unique keys and their ref counts to save memory - THashMap<TString, std::pair<TSerializedCellVec, ui64>> KeyRefCount; + THashMap<TString, ui64> KeyRefCount; }; void BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, const IPages* env); |