diff options
author | kungasc <kungasc@yandex-team.com> | 2023-09-25 15:02:10 +0300 |
---|---|---|
committer | kungasc <kungasc@yandex-team.com> | 2023-09-25 17:03:48 +0300 |
commit | 287b4bb4d2239bb7555407d03950d323ebf0edce (patch) | |
tree | ce9333f95905e96059546302bfce9ed2fd05159f | |
parent | 0abd9ccd5284f92095881b3350084c192fa93ddc (diff) | |
download | ydb-287b4bb4d2239bb7555407d03950d323ebf0edce.tar.gz |
KIKIMR-19139 Load index in BuildStats
-rw-r--r-- | ydb/core/tablet_flat/flat_part_index_iter.h | 14 | ||||
-rw-r--r-- | ydb/core/tablet_flat/flat_stat_part.h | 87 | ||||
-rw-r--r-- | ydb/core/tablet_flat/flat_stat_table.cpp | 26 | ||||
-rw-r--r-- | ydb/core/tablet_flat/flat_stat_table.h | 27 | ||||
-rw-r--r-- | ydb/core/tablet_flat/flat_table_part_ut.cpp | 33 | ||||
-rw-r--r-- | ydb/core/tablet_flat/test/libs/table/test_make.h | 32 | ||||
-rw-r--r-- | ydb/core/tablet_flat/ut/CMakeLists.darwin-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/core/tablet_flat/ut/CMakeLists.linux-aarch64.txt | 1 | ||||
-rw-r--r-- | ydb/core/tablet_flat/ut/CMakeLists.linux-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/core/tablet_flat/ut/CMakeLists.windows-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/core/tablet_flat/ut/ut_stat.cpp | 104 | ||||
-rw-r--r-- | ydb/core/tablet_flat/ut/ya.make | 1 | ||||
-rw-r--r-- | ydb/core/tx/datashard/datashard__stats.cpp | 166 | ||||
-rw-r--r-- | ydb/core/tx/datashard/datashard_ut_stats.cpp | 1 |
14 files changed, 408 insertions, 87 deletions
diff --git a/ydb/core/tablet_flat/flat_part_index_iter.h b/ydb/core/tablet_flat/flat_part_index_iter.h index f4a11e19ac..f5761a88c2 100644 --- a/ydb/core/tablet_flat/flat_part_index_iter.h +++ b/ydb/core/tablet_flat/flat_part_index_iter.h @@ -89,7 +89,7 @@ public: return DataOrGone(); } - bool IsValid() { + bool IsValid() const { return bool(Iter); } @@ -99,23 +99,23 @@ public: } public: - TRowId GetEndRowId() { + TRowId GetEndRowId() const { return EndRowId; } - TPageId GetPageId() { + TPageId GetPageId() const { Y_VERIFY(Index); Y_VERIFY(Iter); return Iter->GetPageId(); } - TRowId GetRowId() { + TRowId GetRowId() const { Y_VERIFY(Index); Y_VERIFY(Iter); return Iter->GetRowId(); } - TRowId GetNextRowId() { + TRowId GetNextRowId() const { Y_VERIFY(Index); auto next = Iter + 1; return next @@ -123,14 +123,14 @@ public: : Max<TRowId>(); } - const TRecord * GetRecord() { + const TRecord * GetRecord() const { Y_VERIFY(Index); Y_VERIFY(Iter); return Iter.GetRecord(); } private: - EReady DataOrGone() { + EReady DataOrGone() const { return Iter ? EReady::Data : EReady::Gone; } diff --git a/ydb/core/tablet_flat/flat_stat_part.h b/ydb/core/tablet_flat/flat_stat_part.h index 5461528db5..212101611f 100644 --- a/ydb/core/tablet_flat/flat_stat_part.h +++ b/ydb/core/tablet_flat/flat_stat_part.h @@ -1,6 +1,7 @@ #pragma once #include "flat_part_iface.h" +#include "flat_part_index_iter.h" #include "flat_part_laid.h" #include "flat_page_frames.h" #include "util_basics.h" @@ -118,47 +119,69 @@ private: // if page start key is not screened then the whole previous page is added to stats class TScreenedPartIndexIterator { public: - TScreenedPartIndexIterator(TPartView partView, TIntrusiveConstPtr<TKeyCellDefaults> keyColumns, + TScreenedPartIndexIterator(TPartView partView, IPages* env, TIntrusiveConstPtr<TKeyCellDefaults> keyColumns, TIntrusiveConstPtr<NPage::TFrames> small, TIntrusiveConstPtr<NPage::TFrames> large) : Part(std::move(partView.Part)) + , Pos(Part.Get(), env, {}) , KeyColumns(std::move(keyColumns)) , Screen(std::move(partView.Screen)) , Small(std::move(small)) , Large(std::move(large)) , CurrentHole(TScreen::Iter(Screen, CurrentHoleIdx, 0, 1)) { - Pos = Part->Index->Begin(); - End = Part->Index->End(); AltGroups.reserve(Part->GroupsCount - 1); for (ui32 group : xrange(size_t(1), Part->GroupsCount)) { - AltGroups.emplace_back(Part.Get(), NPage::TGroupId(group)); + AltGroups.emplace_back(Part.Get(), env, NPage::TGroupId(group)); } - for (ui32 group : xrange(Part->HistoricGroupsCount)) { - HistoryGroups.emplace_back(Part.Get(), NPage::TGroupId(group, true)); + for (ui32 group : xrange(Part->HistoricIndexes.size())) { + HistoryGroups.emplace_back(Part.Get(), env, NPage::TGroupId(group, true)); } + } + + EReady Start() { + auto ready = Pos.Seek(0); FillKey(); + + for (auto& g : AltGroups) { + if (g.Pos.Seek(0) == EReady::Page) { + ready = EReady::Page; + } + } + for (auto& g : HistoryGroups) { + if (g.Pos.Seek(0) == EReady::Page) { + ready = EReady::Page; + } + } + + return ready; } bool IsValid() const { - return Pos != End; + return Pos.IsValid(); } - void Next(TPartDataStats& stats) { + EReady Next(TPartDataStats& stats) { Y_VERIFY(IsValid()); - auto curPageId = Pos->GetPageId(); - LastRowId = Pos->GetRowId(); - ++Pos; + auto curPageId = Pos.GetPageId(); + LastRowId = Pos.GetRowId(); + auto ready = Pos.Next(); + if (ready == EReady::Page) { + return ready; + } ui64 rowCount = IncludedRows(GetLastRowId(), GetCurrentRowId()); stats.RowCount += rowCount; if (rowCount) AddPageSize(stats.DataSize, curPageId); - TRowId nextRowId = Pos ? Pos->GetRowId() : Max<TRowId>(); + TRowId nextRowId = ready == EReady::Data ? Pos.GetRowId() : Max<TRowId>(); for (auto& g : AltGroups) { - while (g.Pos && g.Pos->GetRowId() < nextRowId) { + while (g.Pos.IsValid() && g.Pos.GetRowId() < nextRowId) { // eagerly include all data up to the next row id - if (rowCount) AddPageSize(stats.DataSize, g.Pos->GetPageId(), g.GroupId); - ++g.Pos; + if (rowCount) AddPageSize(stats.DataSize, g.Pos.GetPageId(), g.GroupId); + if (g.Pos.Next() == EReady::Page) { + ready = EReady::Page; + break; + } } } @@ -167,18 +190,24 @@ public: auto& h = HistoryGroups[0]; const auto& hscheme = Part->Scheme->HistoryGroup; Y_VERIFY_DEBUG(hscheme.ColsKeyIdx.size() == 3); - while (h.Pos && h.Pos->Cell(hscheme.ColsKeyIdx[0]).AsValue<TRowId>() < nextRowId) { + while (h.Pos.IsValid() && h.Pos.GetRecord()->Cell(hscheme.ColsKeyIdx[0]).AsValue<TRowId>() < nextRowId) { // eagerly include all history up to the next row id - if (rowCount) AddPageSize(stats.DataSize, h.Pos->GetPageId(), h.GroupId); - ++h.Pos; + if (rowCount) AddPageSize(stats.DataSize, h.Pos.GetPageId(), h.GroupId); + if (h.Pos.Next() == EReady::Page) { + ready = EReady::Page; + break; + } } - TRowId nextHistoryRowId = h.Pos ? h.Pos->GetRowId() : Max<TRowId>(); + TRowId nextHistoryRowId = h.Pos.IsValid() ? h.Pos.GetRowId() : Max<TRowId>(); for (size_t index = 1; index < HistoryGroups.size(); ++index) { auto& g = HistoryGroups[index]; - while (g.Pos && g.Pos->GetRowId() < nextHistoryRowId) { + while (g.Pos.IsValid() && g.Pos.GetRowId() < nextHistoryRowId) { // eagerly include all data up to the next row id - if (rowCount) AddPageSize(stats.DataSize, g.Pos->GetPageId(), g.GroupId); - ++g.Pos; + if (rowCount) AddPageSize(stats.DataSize, g.Pos.GetPageId(), g.GroupId); + if (g.Pos.Next() == EReady::Page) { + ready = EReady::Page; + break; + } } } } @@ -193,6 +222,7 @@ public: } FillKey(); + return ready; } TDbTupleRef GetCurrentKey() const { @@ -207,7 +237,7 @@ private: ui64 GetCurrentRowId() const { if (IsValid()) { - return Pos->GetRowId(); + return Pos.GetRowId(); } if (TRowId endRowId = Part->Index.GetEndRowId(); endRowId != Max<TRowId>()) { // This would include the last page rows when known @@ -233,7 +263,7 @@ private: ui32 keyIdx = 0; // Add columns that are present in the part for (;keyIdx < Part->Scheme->Groups[0].KeyTypes.size(); ++keyIdx) { - CurrentKey.push_back(Pos->Cell(Part->Scheme->Groups[0].ColsKeyIdx[keyIdx])); + CurrentKey.push_back(Pos.GetRecord()->Cell(Part->Scheme->Groups[0].ColsKeyIdx[keyIdx])); } // Extend with default values if needed @@ -293,20 +323,19 @@ private: private: struct TGroupState { - NPage::TIndex::TIter Pos; + TPartIndexIt Pos; const NPage::TGroupId GroupId; - TGroupState(const TPart* part, NPage::TGroupId groupId) - : Pos(part->GetGroupIndex(groupId)->Begin()) + TGroupState(const TPart* part, IPages* env, NPage::TGroupId groupId) + : Pos(part, env, groupId) , GroupId(groupId) { } }; private: TIntrusiveConstPtr<TPart> Part; + TPartIndexIt Pos; TIntrusiveConstPtr<TKeyCellDefaults> KeyColumns; - NPage::TIndex::TIter Pos; - NPage::TIndex::TIter End; TSmallVec<TCell> CurrentKey; ui64 LastRowId = 0; TSmallVec<TGroupState> AltGroups; diff --git a/ydb/core/tablet_flat/flat_stat_table.cpp b/ydb/core/tablet_flat/flat_stat_table.cpp index 78db00d61b..ff7e3fb1cc 100644 --- a/ydb/core/tablet_flat/flat_stat_table.cpp +++ b/ydb/core/tablet_flat/flat_stat_table.cpp @@ -6,26 +6,38 @@ namespace NKikimr { namespace NTable { -void BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, const IPages* env) { - Y_UNUSED(env); - +bool BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, IPages* env) { stats.Clear(); TPartDataStats stIterStats = { }; TStatsIterator stIter(subset.Scheme->Keys); // Make index iterators for all parts + bool started = true; for (auto& pi : subset.Flatten) { stats.IndexSize.Add(pi->IndexesRawSize, pi->Label.Channel()); - TAutoPtr<TScreenedPartIndexIterator> iter = new TScreenedPartIndexIterator(pi, subset.Scheme->Keys, pi->Small, pi->Large); - if (iter->IsValid()) { + TAutoPtr<TScreenedPartIndexIterator> iter = new TScreenedPartIndexIterator(pi, env, subset.Scheme->Keys, pi->Small, pi->Large); + auto ready = iter->Start(); + if (ready == EReady::Page) { + started = false; + } else if (ready == EReady::Data) { stIter.Add(iter); } } + if (!started) { + return false; + } ui64 prevRows = 0; ui64 prevSize = 0; - while (stIter.Next(stIterStats)) { + while (true) { + auto ready = stIter.Next(stIterStats); + if (ready == EReady::Page) { + return false; + } else if (ready == EReady::Gone) { + break; + } + const bool nextRowsBucket = (stIterStats.RowCount >= prevRows + rowCountResolution); const bool nextSizeBucket = (stIterStats.DataSize.Size >= prevSize + dataSizeResolution); @@ -48,6 +60,8 @@ void BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, u stats.RowCount = stIterStats.RowCount; stats.DataSize = std::move(stIterStats.DataSize); + + return true; } void GetPartOwners(const TSubset& subset, THashSet<ui64>& partOwners) { diff --git a/ydb/core/tablet_flat/flat_stat_table.h b/ydb/core/tablet_flat/flat_stat_table.h index 73e1504b3b..38a71acc9c 100644 --- a/ydb/core/tablet_flat/flat_stat_table.h +++ b/ydb/core/tablet_flat/flat_stat_table.h @@ -25,11 +25,7 @@ public: Heap.push(it); } - /** - * @return true when we haven't reached the end and have current key - * @return false when we have reached the end and don't have current key - */ - bool Next(TPartDataStats& stats) { + EReady Next(TPartDataStats& stats) { ui64 lastRowCount = stats.RowCount; ui64 lastDataSize = stats.DataSize.Size; @@ -41,16 +37,24 @@ public: TSerializedCellVec serialized = TSerializedCellVec(TSerializedCellVec::Serialize({it->GetCurrentKey().Columns, it->GetCurrentKey().ColumnCount})); TDbTupleRef key(KeyColumns->BasicTypes().data(), serialized.GetCells().data(), serialized.GetCells().size()); - if (MoveIterator(it, stats)) + auto ready = it->Next(stats); + if (ready == EReady::Page) { + return ready; + } else if (ready == EReady::Data) { Heap.push(it); + } // guarantees that all results will be different while (!Heap.empty() && CompareKeys(key, Heap.top()->GetCurrentKey()) == 0) { it = Heap.top(); Heap.pop(); - if (MoveIterator(it, stats)) + ready = it->Next(stats); + if (ready == EReady::Page) { + return ready; + } else if (ready == EReady::Data) { Heap.push(it); + } } if (stats.RowCount != lastRowCount && stats.DataSize.Size != lastDataSize) { @@ -58,7 +62,7 @@ public: } } - return !Heap.empty(); + return Heap.empty() ? EReady::Gone : EReady::Data; } TDbTupleRef GetCurrentKey() const { @@ -79,11 +83,6 @@ private: } }; - bool MoveIterator(TScreenedPartIndexIterator* it, TPartDataStats& stats) { - it->Next(stats); - return it->IsValid(); - } - TIntrusiveConstPtr<TKeyCellDefaults> KeyColumns; THolderVector<TScreenedPartIndexIterator> Iterators; TPriorityQueue<TScreenedPartIndexIterator*, TSmallVec<TScreenedPartIndexIterator*>, TIterKeyGreater> Heap; @@ -186,7 +185,7 @@ private: THashMap<TString, ui64> KeyRefCount; }; -void BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, const IPages* env); +bool BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, IPages* env); void GetPartOwners(const TSubset& subset, THashSet<ui64>& partOwners); }} diff --git a/ydb/core/tablet_flat/flat_table_part_ut.cpp b/ydb/core/tablet_flat/flat_table_part_ut.cpp index 77ff5388b4..98dd91bff7 100644 --- a/ydb/core/tablet_flat/flat_table_part_ut.cpp +++ b/ydb/core/tablet_flat/flat_table_part_ut.cpp @@ -76,15 +76,17 @@ Y_UNIT_TEST_SUITE(TLegacy) { std::vector<ui64>& sizes) { TPartDataStats stats = { }; + TTestEnv env; // TScreenedPartIndexIterator without screen previously was TPartIndexIterator - TScreenedPartIndexIterator idxIter(TPartView{part, nullptr, nullptr}, scheme->Keys, nullptr, nullptr); + TScreenedPartIndexIterator idxIter(TPartView{part, nullptr, nullptr}, &env, scheme->Keys, nullptr, nullptr); sizes.clear(); + UNIT_ASSERT_VALUES_EQUAL(idxIter.Start(), EReady::Data); while (idxIter.IsValid()) { TDbTupleRef key = idxIter.GetCurrentKey(); dbgOut << DbgPrintTuple(key, typeRegistry) << " " << stats.RowCount << " " << stats.DataSize.Size << Endl; sizes.push_back(stats.DataSize.Size); - idxIter.Next(stats); + UNIT_ASSERT(idxIter.Next(stats) != EReady::Page); } }; @@ -144,13 +146,15 @@ Y_UNIT_TEST_SUITE(TLegacy) { auto fnIterate = [&dbgOut, &typeRegistry] (TIntrusiveConstPtr<TPartStore> part, TIntrusiveConstPtr<TScreen> screen, TIntrusiveConstPtr<TRowScheme> scheme, TIntrusiveConstPtr<NPage::TFrames> frames) -> std::pair<ui64, ui64> { TPartDataStats stats = { }; - TScreenedPartIndexIterator idxIter(TPartView{part, screen, nullptr}, scheme->Keys, std::move(frames), nullptr); + TTestEnv env; + TScreenedPartIndexIterator idxIter(TPartView{part, screen, nullptr}, &env, scheme->Keys, std::move(frames), nullptr); + UNIT_ASSERT_VALUES_EQUAL(idxIter.Start(), EReady::Data); while (idxIter.IsValid()) { TDbTupleRef key = idxIter.GetCurrentKey(); dbgOut << DbgPrintTuple(key, typeRegistry) << " " << stats.RowCount << " " << stats.DataSize.Size << Endl; - idxIter.Next(stats); + UNIT_ASSERT(idxIter.Next(stats) != EReady::Page); } return {stats.RowCount, stats.DataSize.Size}; @@ -301,16 +305,27 @@ Y_UNIT_TEST_SUITE(TLegacy) { }); TPartDataStats stats = { }; + TTestEnv env; TStatsIterator stIter(lay2.RowScheme()->Keys); - stIter.Add(MakeHolder<TScreenedPartIndexIterator>(TPartView{eggs2.At(0), screen2, nullptr}, lay2.RowScheme()->Keys, nullptr, nullptr)); - stIter.Add(MakeHolder<TScreenedPartIndexIterator>(TPartView{eggs1.At(0), screen1, nullptr}, lay2.RowScheme()->Keys, nullptr, nullptr)); - - + { + auto it1 = MakeHolder<TScreenedPartIndexIterator>(TPartView{eggs2.At(0), screen2, nullptr}, &env, lay2.RowScheme()->Keys, nullptr, nullptr); + auto it2 = MakeHolder<TScreenedPartIndexIterator>(TPartView{eggs1.At(0), screen1, nullptr}, &env, lay2.RowScheme()->Keys, nullptr, nullptr); + UNIT_ASSERT_VALUES_EQUAL(it1->Start(), EReady::Data); + UNIT_ASSERT_VALUES_EQUAL(it2->Start(), EReady::Data); + stIter.Add(std::move(it1)); + stIter.Add(std::move(it2)); + } TSerializedCellVec prevKey; ui64 prevRowCount = 0; ui64 prevDataSize = 0; - while (stIter.Next(stats)) { + while (true) { + auto ready = stIter.Next(stats); + if (ready == EReady::Gone) { + break; + } + UNIT_ASSERT_VALUES_EQUAL(ready, EReady::Data); + TDbTupleRef key = stIter.GetCurrentKey(); dbgOut << DbgPrintTuple(key, typeRegistry) diff --git a/ydb/core/tablet_flat/test/libs/table/test_make.h b/ydb/core/tablet_flat/test/libs/table/test_make.h index 8bd0e14d6d..60853f655f 100644 --- a/ydb/core/tablet_flat/test/libs/table/test_make.h +++ b/ydb/core/tablet_flat/test/libs/table/test_make.h @@ -21,6 +21,7 @@ namespace NTest { struct IBand { virtual ~IBand() = default; virtual void Add(const TRow&) noexcept = 0; + virtual void Ver(TRowVersion rowVersion = TRowVersion::Min()) = 0; }; struct TPart : IBand { @@ -36,6 +37,11 @@ namespace NTest { Cook.Add(row); } + void Ver(TRowVersion rowVersion) override + { + Cook.Ver(rowVersion); + } + const TEpoch Epoch; TPartCook Cook; }; @@ -52,6 +58,11 @@ namespace NTest { Cooker.Add(row, ERowOp::Upsert); } + void Ver(TRowVersion) override + { + Y_FAIL("unsupported"); + } + TCooker Cooker; }; @@ -77,8 +88,9 @@ namespace NTest { return cook.Add(Saved.begin(), Saved.end()).Finish(); } - TAutoPtr<TSubset> Mixed(ui32 frozen, ui32 flatten, THash hash) + TAutoPtr<TSubset> Mixed(ui32 frozen, ui32 flatten, THash hash, float history = 0) { + TMersenne<ui64> rnd(0); TDeque<TAutoPtr<IBand>> bands; for (auto it: xrange(flatten)) { @@ -90,8 +102,22 @@ namespace NTest { bands.emplace_back(new TMem(Scheme, TEpoch::FromIndex(bands.size()), it)); if (const auto slots = bands.size()) { - for (auto &row: Saved) - bands[hash(row) % slots]->Add(row); + for (auto &row: Saved) { + auto &band = bands[hash(row) % slots]; + if (history) { + for (ui64 txId = 10; txId; txId--) { + band->Ver({0, txId}); + // FIXME: change row data? + band->Add(row); + if (rnd.GenRandReal4() > history) { + // each row will have from 1 to 10 versions + break; + } + } + } else { + band->Add(row); + } + } } TAutoPtr<TSubset> subset = new TSubset(TEpoch::FromIndex(bands.size()), Scheme); diff --git a/ydb/core/tablet_flat/ut/CMakeLists.darwin-x86_64.txt b/ydb/core/tablet_flat/ut/CMakeLists.darwin-x86_64.txt index ee2423e7d4..9b99d9f774 100644 --- a/ydb/core/tablet_flat/ut/CMakeLists.darwin-x86_64.txt +++ b/ydb/core/tablet_flat/ut/CMakeLists.darwin-x86_64.txt @@ -56,6 +56,7 @@ target_sources(ydb-core-tablet_flat-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_iterator.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_memtable.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_sausage.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_stat.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_comp_gen.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_comp_shard.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_compaction.cpp diff --git a/ydb/core/tablet_flat/ut/CMakeLists.linux-aarch64.txt b/ydb/core/tablet_flat/ut/CMakeLists.linux-aarch64.txt index 24d732f728..4b6443427c 100644 --- a/ydb/core/tablet_flat/ut/CMakeLists.linux-aarch64.txt +++ b/ydb/core/tablet_flat/ut/CMakeLists.linux-aarch64.txt @@ -59,6 +59,7 @@ target_sources(ydb-core-tablet_flat-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_iterator.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_memtable.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_sausage.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_stat.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_comp_gen.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_comp_shard.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_compaction.cpp diff --git a/ydb/core/tablet_flat/ut/CMakeLists.linux-x86_64.txt b/ydb/core/tablet_flat/ut/CMakeLists.linux-x86_64.txt index 449b23320c..4323c2f2a3 100644 --- a/ydb/core/tablet_flat/ut/CMakeLists.linux-x86_64.txt +++ b/ydb/core/tablet_flat/ut/CMakeLists.linux-x86_64.txt @@ -60,6 +60,7 @@ target_sources(ydb-core-tablet_flat-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_iterator.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_memtable.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_sausage.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_stat.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_comp_gen.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_comp_shard.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_compaction.cpp diff --git a/ydb/core/tablet_flat/ut/CMakeLists.windows-x86_64.txt b/ydb/core/tablet_flat/ut/CMakeLists.windows-x86_64.txt index 2b3e5a8d7f..3252f6dd46 100644 --- a/ydb/core/tablet_flat/ut/CMakeLists.windows-x86_64.txt +++ b/ydb/core/tablet_flat/ut/CMakeLists.windows-x86_64.txt @@ -49,6 +49,7 @@ target_sources(ydb-core-tablet_flat-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_iterator.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_memtable.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_sausage.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_stat.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_comp_gen.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_comp_shard.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet_flat/ut/ut_compaction.cpp diff --git a/ydb/core/tablet_flat/ut/ut_stat.cpp b/ydb/core/tablet_flat/ut/ut_stat.cpp new file mode 100644 index 0000000000..906927fb8a --- /dev/null +++ b/ydb/core/tablet_flat/ut/ut_stat.cpp @@ -0,0 +1,104 @@ +#include "flat_stat_table.h" +#include <ydb/core/tablet_flat/test/libs/table/model/large.h> +#include <ydb/core/tablet_flat/test/libs/table/test_make.h> +#include <ydb/core/tablet_flat/test/libs/table/test_mixer.h> + +#include <library/cpp/testing/unittest/registar.h> + +namespace NKikimr::NTable { + +namespace { + using namespace NTest; + + NPage::TConf PageConf(size_t groups = 1) noexcept + { + NPage::TConf conf{ true, 2 * 1024 }; + + conf.Groups.resize(groups); + for (size_t group : xrange(groups)) { + conf.Group(group).IndexMin = 1024; /* Should cover index buffer grow code */ + } + conf.SmallEdge = 19; /* Packed to page collection large cell values */ + conf.LargeEdge = 29; /* Large values placed to single blobs */ + conf.SliceSize = conf.Group(0).PageSize * 4; + + return conf; + } + + const NTest::TMass Mass0(new NTest::TModelStd(false), 24000); + const NTest::TMass Mass1(new NTest::TModelStd(true), 24000); + + void Check(const TSubset& subset, ui64 expectedRows, ui64 expectedData, ui64 expectedIndex) { + TStats stats; + TTestEnv testEnv; + UNIT_ASSERT(NTable::BuildStats(subset, stats, 310, 3105, &testEnv)); + + Cerr << "Stats: " << stats.RowCount << " " << stats.DataSize.Size << " " << stats.IndexSize.Size << " " << stats.DataSizeHistogram.size() << " " << stats.RowCountHistogram.size() << Endl; + UNIT_ASSERT_VALUES_EQUAL(stats.RowCount, expectedRows); + UNIT_ASSERT_VALUES_EQUAL(stats.DataSize.Size, expectedData); + UNIT_ASSERT_VALUES_EQUAL(stats.IndexSize.Size, expectedIndex); + } +} + +Y_UNIT_TEST_SUITE(BuildStats) { + using namespace NTest; + + Y_UNIT_TEST(Single) + { + auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size())).Mixed(0, 1, TMixerOne{ }); + Check(*subset, 24000, 2106439, 25272); + } + + Y_UNIT_TEST(Single_Groups) + { + auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size())).Mixed(0, 1, TMixerOne{ }); + Check(*subset, 24000, 2460139, 13170); + } + + Y_UNIT_TEST(Single_Groups_History) + { + auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size())).Mixed(0, 1, TMixerOne{ }, 0.3); + Check(*subset, 24000, 4054050, 18810); + } + + Y_UNIT_TEST(Mixed) + { + auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size())).Mixed(0, 4, TMixerRnd(4)); + Check(*subset, 24000, 2106459, 25428); + } + + Y_UNIT_TEST(Mixed_Groups) + { + auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size())).Mixed(0, 4, TMixerRnd(4)); + Check(*subset, 24000, 2460219, 13482); + } + + Y_UNIT_TEST(Mixed_Groups_History) + { + auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size())).Mixed(0, 4, TMixerRnd(4), 0.3); + Check(*subset, 24000, 4054270, 19152); + } + + Y_UNIT_TEST(Serial) + { + TMixerSeq mixer(4, Mass0.Saved.Size()); + auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size())).Mixed(0, 4, mixer); + Check(*subset, 24000, 2106459, 25428); + } + + Y_UNIT_TEST(Serial_Groups) + { + TMixerSeq mixer(4, Mass1.Saved.Size()); + auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size())).Mixed(0, 4, mixer); + Check(*subset, 24000, 2460259, 13528); + } + + Y_UNIT_TEST(Serial_Groups_History) + { + TMixerSeq mixer(4, Mass1.Saved.Size()); + auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size())).Mixed(0, 4, mixer, 0.3); + Check(*subset, 24000, 4054290, 19168); + } +} + +} diff --git a/ydb/core/tablet_flat/ut/ya.make b/ydb/core/tablet_flat/ut/ya.make index b26924453a..d1b143bc1a 100644 --- a/ydb/core/tablet_flat/ut/ya.make +++ b/ydb/core/tablet_flat/ut/ya.make @@ -32,6 +32,7 @@ SRCS( ut_iterator.cpp ut_memtable.cpp ut_sausage.cpp + ut_stat.cpp ut_comp_gen.cpp ut_comp_shard.cpp ut_compaction.cpp diff --git a/ydb/core/tx/datashard/datashard__stats.cpp b/ydb/core/tx/datashard/datashard__stats.cpp index 4b439de050..8329472503 100644 --- a/ydb/core/tx/datashard/datashard__stats.cpp +++ b/ydb/core/tx/datashard/datashard__stats.cpp @@ -2,15 +2,93 @@ #include <ydb/core/tablet/resource_broker.h> #include <ydb/core/tablet_flat/flat_stat_table.h> #include <ydb/core/tablet_flat/flat_dbase_sz_env.h> +#include "ydb/core/tablet_flat/shared_sausagecache.h" namespace NKikimr { namespace NDataShard { using namespace NResourceBroker; +using namespace NTable; + +class TStatsEnv : public IPages { + struct TPartPages { + TIntrusiveConstPtr<NPageCollection::IPageCollection> PageCollection; + THashMap<TPageId, TSharedData> Pages; + THashSet<TPageId> NeedPages; + }; + +public: + TResult Locate(const TMemTable*, ui64, ui32) noexcept override + { + Y_FAIL("IPages::Locate(TMemTable*, ...) shouldn't be used here"); + } + + TResult Locate(const TPart*, ui64, ELargeObj) noexcept override + { + Y_FAIL("IPages::Locate(TPart*, ...) shouldn't be used here"); + } + + const TSharedData* TryGetPage(const TPart* part, TPageId pageId, TGroupId groupId) override + { + Y_VERIFY(groupId.IsMain(), "Unsupported column group"); + + auto *partStore = CheckedCast<const TPartStore*>(part); + auto *info = partStore->PageCollections.at(groupId.Index).Get(); + Y_VERIFY(EPage(info->PageCollection->Page(pageId).Type) == EPage::Index); + + if (auto *partPages = Parts.FindPtr(part)) { + if (auto *page = partPages->Pages.FindPtr(pageId)) { + return page; + } else if (partPages->NeedPages.insert(pageId).second) { + Pending++; + } + } else { + Parts.emplace(part, TPartPages{ + .PageCollection = info->PageCollection, + .NeedPages = {pageId}}); + Pending++; + } + + return nullptr; + } + + ui64 GetPending() { + return Pending; + } + + TVector<TAutoPtr<NPageCollection::TFetch>> GetFetches() + { + TVector<TAutoPtr<NPageCollection::TFetch>> result; + for (auto &part : Parts) { + auto &needPages = part.second.NeedPages; + if (needPages) { + TVector<TPageId> pages(needPages.begin(), needPages.end()); + std::sort(pages.begin(), pages.end()); + result.push_back(new NPageCollection::TFetch{ (ui64)part.first, part.second.PageCollection, std::move(pages) }); + } + } + + return result; + } + + void Save(ui64 cookie, TPageId pageId, TSharedData data) noexcept + { + if (auto* partPages = Parts.FindPtr((TPart*)cookie)) { + if (partPages->NeedPages.erase(pageId)) { + partPages->Pages.emplace(pageId, std::move(data)); + Pending--; + } + } + } + +private: + ui64 Pending = 0; + THashMap<const TPart*, TPartPages> Parts; +}; class TAsyncTableStatsBuilder : public TActorBootstrapped<TAsyncTableStatsBuilder> { public: - TAsyncTableStatsBuilder(TActorId replyTo, ui64 tabletId, ui64 tableId, ui64 indexSize, const TAutoPtr<NTable::TSubset> subset, + TAsyncTableStatsBuilder(TActorId replyTo, ui64 tabletId, ui64 tableId, ui64 indexSize, const TAutoPtr<TSubset> subset, ui64 memRowCount, ui64 memDataSize, ui64 rowCountResolution, ui64 dataSizeResolution, ui64 searchHeight, TInstant statsUpdateTime) : ReplyTo(replyTo) @@ -31,7 +109,7 @@ public: } void Bootstrap(const TActorContext& ctx) { - SubmitTask(ctx); + SubmitWaitResourcesTask(ctx); Become(&TThis::StateWaitResource); } @@ -41,7 +119,7 @@ private: TActorBootstrapped::Die(ctx); } - void SubmitTask(const TActorContext& ctx) { + void SubmitWaitResourcesTask(const TActorContext& ctx) { ctx.Send(MakeResourceBrokerID(), new TEvResourceBroker::TEvSubmitTask( /* task id */ 1, @@ -64,14 +142,22 @@ private: } } + STFUNC(StateWaitPages) { + switch (ev->GetTypeRewrite()) { + SFunc(TEvents::TEvPoison, Die); + HFunc(TEvResourceBroker::TEvResourceAllocated, Handle); + HFunc(NSharedCache::TEvResult, Handle); + } + } + void Handle(TEvResourceBroker::TEvResourceAllocated::TPtr& ev, const TActorContext& ctx) { auto* msg = ev->Get(); Y_VERIFY(!msg->Cookie.Get(), "Unexpected cookie in TEvResourceAllocated"); Y_VERIFY(msg->TaskId == 1, "Unexpected task id in TEvResourceAllocated"); - Start(ctx); + TryBuildStats(ctx); } - void Start(const TActorContext& ctx) { + void TryBuildStats(const TActorContext& ctx) { THolder<TDataShard::TEvPrivate::TEvAsyncTableStats> ev = MakeHolder<TDataShard::TEvPrivate::TEvAsyncTableStats>(); ev->TableId = TableId; ev->IndexSize = IndexSize; @@ -81,27 +167,69 @@ private: ev->MemDataSize = MemDataSize; ev->SearchHeight = SearchHeight; - NTable::GetPartOwners(*Subset, ev->PartOwners); + GetPartOwners(*Subset, ev->PartOwners); - NTable::TSizeEnv szEnv; Subset->ColdParts.clear(); // stats won't include cold parts, if any - NTable::BuildStats(*Subset, ev->Stats, RowCountResolution, DataSizeResolution, &szEnv); - Y_VERIFY_DEBUG(IndexSize == ev->Stats.IndexSize.Size); - ctx.Send(ReplyTo, ev.Release()); + if (BuildStats(*Subset, ev->Stats, RowCountResolution, DataSizeResolution, &Env)) { + Y_VERIFY_DEBUG(IndexSize == ev->Stats.IndexSize.Size); + ctx.Send(ReplyTo, ev.Release()); + + FinishTask(ctx); + + return Die(ctx); + } + + // page fault has happened, request needed pages + // graceful continuation is not supported, BuildStats will be restarted + LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, "Stats build at datashard " << TabletId << ", for tableId " << TableId << + " needs to load " << Env.GetPending() << " pages"); + + auto fetches = Env.GetFetches(); + Y_VERIFY(fetches); + for (auto &fetch : fetches) { + ctx.Send(MakeSharedPageCacheId(), new NSharedCache::TEvRequest(NSharedCache::EPriority::Bkgr, std::move(fetch), SelfId())); + } + + // release resources while waiting pages FinishTask(ctx); + Become(&TThis::StateWaitPages); + } - return Die(ctx); + void Handle(NSharedCache::TEvResult::TPtr& ev, const TActorContext& ctx) noexcept + { + auto& msg = *ev->Get(); + + if (msg.Status != NKikimrProto::OK) { + LOG_ERROR_S(ctx, NKikimrServices::TX_DATASHARD, "Stats build failed at datashard " << TabletId << ", for tableId " << TableId + << " requested pages but got " << msg.Status); + return Die(ctx); + } + + for (auto& loaded : msg.Loaded) { + Env.Save(msg.Cookie, loaded.PageId, TPinnedPageRef(loaded.Page).GetData()); + } + + if (Env.GetPending()) { + LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, "Stats build at datashard " << TabletId << ", for tableId " << TableId << + " needs to load " << Env.GetPending() << " more pages"); + } else { + LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, "Stats build at datashard " << TabletId << ", for tableId " << TableId << + " got all needed pages, continue"); + SubmitWaitResourcesTask(ctx); + Become(&TThis::StateWaitResource); + } } private: + TStatsEnv Env; TActorId ReplyTo; ui64 TabletId; ui64 TableId; ui64 IndexSize; TInstant StatsUpdateTime; - TAutoPtr<NTable::TSubset> Subset; + TAutoPtr<TSubset> Subset; ui64 MemRowCount; ui64 MemDataSize; ui64 RowCountResolution; @@ -163,7 +291,7 @@ public: if (!ready) return true; - const NTable::TStats& stats = tableInfo.Stats.DataStats; + const TStats& stats = tableInfo.Stats.DataStats; Result->Record.MutableTableStats()->SetDataSize(stats.DataSize.Size + memSize); Result->Record.MutableTableStats()->SetRowCount(stats.RowCount + memRowCount); FillHistogram(stats.DataSizeHistogram, *Result->Record.MutableTableStats()->MutableDataSizeHistogram()); @@ -195,7 +323,7 @@ public: } private: - static void FillHistogram(const NTable::THistogram& h, NKikimrTableStats::THistogram& pb) { + static void FillHistogram(const THistogram& h, NKikimrTableStats::THistogram& pb) { for (auto& b : h) { auto bucket = pb.AddBuckets(); bucket->SetKey(b.EndKey); @@ -203,7 +331,7 @@ private: } } - static void FillKeyAccessSample(const NTable::TKeyAccessSample& s, NKikimrTableStats::THistogram& pb) { + static void FillKeyAccessSample(const TKeyAccessSample& s, NKikimrTableStats::THistogram& pb) { for (const auto& k : s.GetSample()) { auto bucket = pb.AddBuckets(); bucket->SetKey(k.first); @@ -375,7 +503,7 @@ public: indexSize += txc.DB.GetTableIndexSize(shadowTableId); } - TAutoPtr<NTable::TSubset> subsetForStats = txc.DB.Subset(localTableId, NTable::TEpoch::Max(), NTable::TRawVals(), NTable::TRawVals()); + TAutoPtr<TSubset> subsetForStats = txc.DB.Subset(localTableId, TEpoch::Max(), { }, { }); // Remove memtables from the subset as we only want to look at indexes for parts subsetForStats->Frozen.clear(); @@ -384,7 +512,7 @@ public: // It's only safe to do as long as stats collector performs // index lookups only, and doesn't care about the actual lsm // part order. - auto shadowSubset = txc.DB.Subset(shadowTableId, NTable::TEpoch::Max(), { }, { }); + auto shadowSubset = txc.DB.Subset(shadowTableId, TEpoch::Max(), { }, { }); subsetForStats->Flatten.insert( subsetForStats->Flatten.end(), shadowSubset->Flatten.begin(), @@ -414,8 +542,8 @@ public: Self->SysTablesPartOnwers.clear(); for (ui32 sysTableId : Self->SysTablesToTransferAtSplit) { THashSet<ui64> sysPartOwners; - auto subset = txc.DB.Subset(sysTableId, NTable::TEpoch::Max(), { }, { }); - NTable::GetPartOwners(*subset, sysPartOwners); + auto subset = txc.DB.Subset(sysTableId, TEpoch::Max(), { }, { }); + GetPartOwners(*subset, sysPartOwners); Self->SysTablesPartOnwers.insert(sysPartOwners.begin(), sysPartOwners.end()); } return true; diff --git a/ydb/core/tx/datashard/datashard_ut_stats.cpp b/ydb/core/tx/datashard/datashard_ut_stats.cpp index c1f77ee246..58f4d074af 100644 --- a/ydb/core/tx/datashard/datashard_ut_stats.cpp +++ b/ydb/core/tx/datashard/datashard_ut_stats.cpp @@ -83,6 +83,7 @@ Y_UNIT_TEST_SUITE(DataShardStats) { auto sender = runtime.AllocateEdgeActor(); runtime.SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_TRACE); + runtime.SetLogPriority(NKikimrServices::TABLET_SAUSAGECACHE, NLog::PRI_TRACE); InitRoot(server, sender); |