diff options
author | kungasc <kungasc@yandex-team.com> | 2023-10-02 15:56:53 +0300 |
---|---|---|
committer | kungasc <kungasc@yandex-team.com> | 2023-10-02 16:32:35 +0300 |
commit | dda26179eda44bc3694bcfe1e61554baa2f44b72 (patch) | |
tree | d6a64d933420909dfe865100375aba4a0ff5fd2e | |
parent | 545f15ba85d826d4698a367c17608999d6c278e7 (diff) | |
download | ydb-dda26179eda44bc3694bcfe1e61554baa2f44b72.tar.gz |
Made sticky pages logic consistent for loading and compactions
-rw-r--r-- | ydb/core/tablet_flat/flat_dbase_apply.cpp | 2 | ||||
-rw-r--r-- | ydb/core/tablet_flat/flat_dbase_scheme.h | 7 | ||||
-rw-r--r-- | ydb/core/tablet_flat/flat_executor.cpp | 54 | ||||
-rw-r--r-- | ydb/core/tablet_flat/flat_executor.h | 3 | ||||
-rw-r--r-- | ydb/core/tablet_flat/flat_executor_ut.cpp | 344 | ||||
-rw-r--r-- | ydb/core/tablet_flat/flat_part_store.h | 15 |
6 files changed, 399 insertions, 26 deletions
diff --git a/ydb/core/tablet_flat/flat_dbase_apply.cpp b/ydb/core/tablet_flat/flat_dbase_apply.cpp index 5ae145e7dc3..d6584f8ac23 100644 --- a/ydb/core/tablet_flat/flat_dbase_apply.cpp +++ b/ydb/core/tablet_flat/flat_dbase_apply.cpp @@ -84,6 +84,7 @@ bool TSchemeModifier::Apply(const TAlterRecord &delta) Y_VERIFY(ui32(codec) <= 1, "Invalid page encoding code value"); + // FIXME: for now these changes will affect old parts on boot only (see RequestInMemPagesForPartStore) bool ever = delta.HasInMemory() && delta.GetInMemory(); auto cache = ever ? ECache::Ever : family.Cache; @@ -174,6 +175,7 @@ bool TSchemeModifier::AddColumnToFamily(ui32 tid, ui32 cid, ui32 family) if (column->Family != family) { PreserveTable(tid); + // FIXME: for now ECache::Ever setting will affect old parts on boot only (see RequestInMemPagesForPartStore) column->Family = family; return true; } diff --git a/ydb/core/tablet_flat/flat_dbase_scheme.h b/ydb/core/tablet_flat/flat_dbase_scheme.h index f821b0f998a..668fe9ecbae 100644 --- a/ydb/core/tablet_flat/flat_dbase_scheme.h +++ b/ydb/core/tablet_flat/flat_dbase_scheme.h @@ -168,13 +168,6 @@ public: return nullptr; } - ECache CachePolicy(ui32 id) const noexcept - { - auto *family = DefaultFamilyFor(id); - - return family ? family->Cache : ECache::None; - } - ECompactionStrategy CompactionStrategyFor(ui32 id) const noexcept { if (auto *table = GetTableInfo(id)) { diff --git a/ydb/core/tablet_flat/flat_executor.cpp b/ydb/core/tablet_flat/flat_executor.cpp index bc2c81c8e30..be0288fc3ff 100644 --- a/ydb/core/tablet_flat/flat_executor.cpp +++ b/ydb/core/tablet_flat/flat_executor.cpp @@ -618,12 +618,12 @@ void TExecutor::TranslateCacheTouchesToSharedCache() { void TExecutor::RequestInMemPagesForDatabase() { const auto &scheme = Scheme(); for (auto &sxpair : scheme.Tables) { - // should be over page collection cache with already set inmem flags? - if (scheme.CachePolicy(sxpair.first) == NTable::NPage::ECache::Ever) { + auto stickyColumns = GetStickyColumns(sxpair.first); + if (stickyColumns) { auto subset = Database->Subset(sxpair.first, NTable::TEpoch::Max(), { } , { }); for (auto &partView: subset->Flatten) - RequestInMemPagesForPartStore(sxpair.first, partView); + RequestInMemPagesForPartStore(sxpair.first, partView, stickyColumns); } } } @@ -1269,26 +1269,58 @@ bool TExecutor::ApplyReadyPartSwitches() { return true; } -void TExecutor::RequestInMemPagesForPartStore(ui32 tableId, const NTable::TPartView &partView) { - if (Scheme().CachePolicy(tableId) == NTable::NPage::ECache::Ever) { - auto req = partView.As<NTable::TPartStore>()->DataPages(); +void TExecutor::RequestInMemPagesForPartStore(ui32 tableId, const NTable::TPartView &partView, const THashSet<NTable::TTag> &stickyColumns) { + Y_VERIFY_DEBUG(stickyColumns); - TPrivatePageCache::TInfo *info = PrivatePageCache->Info(req->PageCollection->Label()); - for (ui32 pageId : req->Pages) - PrivatePageCache->MarkSticky(pageId, info); + auto rowScheme = RowScheme(tableId); - RequestFromSharedCache(req, NBlockIO::EPriority::Bkgr, EPageCollectionRequest::CacheSync); + for (size_t groupIndex : xrange(partView->GroupsCount)) { + bool stickyGroup = false; + for (const auto &column : partView->Scheme->Groups[groupIndex].Columns) { + if (stickyColumns.contains(column.Tag)) { + stickyGroup = true; + break; + } + } + + if (stickyGroup) { + auto req = partView.As<NTable::TPartStore>()->GetPages(groupIndex); + + TPrivatePageCache::TInfo *info = PrivatePageCache->Info(req->PageCollection->Label()); + for (ui32 pageId : req->Pages) + PrivatePageCache->MarkSticky(pageId, info); + + RequestFromSharedCache(req, NBlockIO::EPriority::Bkgr, EPageCollectionRequest::CacheSync); + } } } +THashSet<NTable::TTag> TExecutor::GetStickyColumns(ui32 tableId) { + auto *tableInfo = Scheme().GetTableInfo(tableId); + + THashSet<NTable::TTag> stickyColumns; + for (const auto &column : tableInfo->Columns) { + const auto* family = tableInfo->Families.FindPtr(column.second.Family); + if (family && family->Cache == NTable::NPage::ECache::Ever) { + stickyColumns.insert(column.first); + } + } + + return stickyColumns; +} + void TExecutor::ApplyExternalPartSwitch(TPendingPartSwitch &partSwitch) { TVector<NTable::TPartView> newParts; newParts.reserve(partSwitch.NewBundles.size()); + auto stickyColumns = GetStickyColumns(partSwitch.TableId); + for (auto &bundle : partSwitch.NewBundles) { auto* stage = bundle.GetStage<TPendingPartSwitch::TResultStage>(); Y_VERIFY(stage && stage->PartView, "Missing bundle result in part switch"); AddCachesOfBundle(stage->PartView); - RequestInMemPagesForPartStore(partSwitch.TableId, stage->PartView); + if (stickyColumns) { + RequestInMemPagesForPartStore(partSwitch.TableId, stage->PartView, stickyColumns); + } newParts.push_back(std::move(stage->PartView)); } diff --git a/ydb/core/tablet_flat/flat_executor.h b/ydb/core/tablet_flat/flat_executor.h index ae49cb47100..79b2b838bdb 100644 --- a/ydb/core/tablet_flat/flat_executor.h +++ b/ydb/core/tablet_flat/flat_executor.h @@ -522,7 +522,8 @@ class TExecutor void TranslateCacheTouchesToSharedCache(); void RequestInMemPagesForDatabase(); - void RequestInMemPagesForPartStore(ui32 tableId, const NTable::TPartView &partView); + void RequestInMemPagesForPartStore(ui32 tableId, const NTable::TPartView &partView, const THashSet<NTable::TTag> &stickyColumns); + THashSet<NTable::TTag> GetStickyColumns(ui32 tableId); void RequestFromSharedCache(TAutoPtr<NPageCollection::TFetch> fetch, NBlockIO::EPriority way, EPageCollectionRequest requestCategory); THolder<TScanSnapshot> PrepareScanSnapshot(ui32 table, diff --git a/ydb/core/tablet_flat/flat_executor_ut.cpp b/ydb/core/tablet_flat/flat_executor_ut.cpp index b009c258d38..209fb0fff82 100644 --- a/ydb/core/tablet_flat/flat_executor_ut.cpp +++ b/ydb/core/tablet_flat/flat_executor_ut.cpp @@ -5054,5 +5054,349 @@ Y_UNIT_TEST_SUITE(TFlatTableExecutorIndexLoading) { } +Y_UNIT_TEST_SUITE(TFlatTableExecutorStickyPages) { + using EReady = NTable::EReady; + using ENext = NTable::ENext; + + struct TTxFullScan : public ITransaction { + int& FailedAttempts; + + TTxFullScan(int& failedAttempts) + : FailedAttempts(failedAttempts) + { + FailedAttempts = 0; + } + + bool Execute(TTransactionContext &txc, const TActorContext &) override + { + TVector<NTable::TTag> tags{ { TRowsModel::ColumnKeyId, TRowsModel::ColumnValueId } }; + + auto iter = txc.DB.IterateRange(TRowsModel::TableId, { }, tags, {2, 0}); + while (iter->Next(ENext::Data) == EReady::Data) { + // iterate over all rows + } + + if (iter->Last() != EReady::Page) { + return true; + } + FailedAttempts++; + return false; + } + + void Complete(const TActorContext &ctx) override + { + ctx.Send(ctx.SelfID, new NFake::TEvReturn); + } + }; + + struct TTxKeepFamilyInMemory : public ITransaction { + ui32 Family; + + TTxKeepFamilyInMemory(ui32 family) + : Family(family) + { } + + bool Execute(TTransactionContext &txc, const TActorContext &) override + { + using namespace NTable::NPage; + + txc.DB.Alter().SetFamily(TRowsModel::TableId, Family, ECache::Ever, ECodec::Plain); + + return true; + } + + void Complete(const TActorContext &ctx) override + { + ctx.Send(ctx.SelfID, new NFake::TEvReturn); + } + }; + + struct TTxAddFamily : public ITransaction { + bool Execute(TTransactionContext &txc, const TActorContext &) override + { + using namespace NTable::NPage; + + txc.DB.Alter() + .SetFamily(TRowsModel::TableId, TRowsModel::AltFamilyId, ECache::None, ECodec::Plain) + .AddColumnToFamily(TRowsModel::TableId, TRowsModel::ColumnValueId, TRowsModel::AltFamilyId); + + return true; + } + + void Complete(const TActorContext &ctx) override + { + ctx.Send(ctx.SelfID, new NFake::TEvReturn); + } + }; + + void ZeroSharedCache(TMyEnvBase &env) { + env.Env.GetMemObserver()->NotifyStat({1, 1, 1}); + TDispatchOptions options; + options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(NSharedCache::EvMem, 1)); + env->DispatchEvents(options); + } + + Y_UNIT_TEST(TestNonSticky) { + TMyEnvBase env; + TRowsModel rows; + + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + ZeroSharedCache(env); + + env.SendSync(rows.MakeScheme(new TCompactionPolicy())); + + // 10 history pages + env.SendSync(rows.VersionTo(TRowVersion(1, 10)).RowTo(0).MakeRows(70, 950)); + + // 10 data pages + env.SendSync(rows.VersionTo(TRowVersion(2, 20)).RowTo(0).MakeRows(70, 950)); + + env.SendSync(new NFake::TEvCompact(TRowsModel::TableId)); + env.WaitFor<NFake::TEvCompacted>(); + + int failedAttempts = 0; + env.SendSync(new NFake::TEvExecute{ new TTxFullScan(failedAttempts) }); + UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 20); // data pages only, 2 indexes are sticky + + // restart tablet + env.SendSync(new TEvents::TEvPoison, false, true); + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + + // should have the same behaviour + env.SendSync(new NFake::TEvExecute{ new TTxFullScan(failedAttempts) }, true); + UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 20); + } + + Y_UNIT_TEST(TestSticky) { + TMyEnvBase env; + TRowsModel rows; + + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + ZeroSharedCache(env); + + env.SendSync(rows.MakeScheme(new TCompactionPolicy())); + env.SendSync(new NFake::TEvExecute{ new TTxKeepFamilyInMemory(0) }); + + // 10 history pages + env.SendSync(rows.VersionTo(TRowVersion(1, 10)).RowTo(0).MakeRows(70, 950)); + + // 10 data pages + env.SendSync(rows.VersionTo(TRowVersion(2, 20)).RowTo(0).MakeRows(70, 950)); + + env.SendSync(new NFake::TEvCompact(TRowsModel::TableId)); + env.WaitFor<NFake::TEvCompacted>(); + + int failedAttempts = 0; + env.SendSync(new NFake::TEvExecute{ new TTxFullScan(failedAttempts) }); + UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 0); + + // restart tablet + env.SendSync(new TEvents::TEvPoison, false, true); + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + + // should have the same behaviour + env.SendSync(new NFake::TEvExecute{ new TTxFullScan(failedAttempts) }, true); + UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 0); + } + + Y_UNIT_TEST(TestNonStickyGroup) { + TMyEnvBase env; + TRowsModel rows; + + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + ZeroSharedCache(env); + + env.SendSync(rows.MakeScheme(new TCompactionPolicy(), true)); + + // 1 historic[0] + 10 historic[1] pages + env.SendSync(rows.VersionTo(TRowVersion(1, 10)).RowTo(0).MakeRows(70, 950)); + + // 1 groups[0] + 10 groups[1] pages + env.SendSync(rows.VersionTo(TRowVersion(2, 20)).RowTo(0).MakeRows(70, 950)); + + env.SendSync(new NFake::TEvCompact(TRowsModel::TableId)); + env.WaitFor<NFake::TEvCompacted>(); + + int failedAttempts = 0; + env.SendSync(new NFake::TEvExecute{ new TTxFullScan(failedAttempts) }); + UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 12); // 1 groups[0], 1 historic[0], 10 historic[1] pages, 4 indexes are sticky + + // restart tablet + env.SendSync(new TEvents::TEvPoison, false, true); + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + + // should have the same behaviour + env.SendSync(new NFake::TEvExecute{ new TTxFullScan(failedAttempts) }, true); + UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 12); + } + + Y_UNIT_TEST(TestStickyMain) { + TMyEnvBase env; + TRowsModel rows; + + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + ZeroSharedCache(env); + + env.SendSync(rows.MakeScheme(new TCompactionPolicy(), true)); + env.SendSync(new NFake::TEvExecute{ new TTxKeepFamilyInMemory(0) }); + + // 1 historic[0] + 10 historic[1] pages + env.SendSync(rows.VersionTo(TRowVersion(1, 10)).RowTo(0).MakeRows(70, 950)); + + // 1 groups[0] + 10 groups[1] pages + env.SendSync(rows.VersionTo(TRowVersion(2, 20)).RowTo(0).MakeRows(70, 950)); + + env.SendSync(new NFake::TEvCompact(TRowsModel::TableId)); + env.WaitFor<NFake::TEvCompacted>(); + + int failedAttempts = 0; + env.SendSync(new NFake::TEvExecute{ new TTxFullScan(failedAttempts) }); + UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 10); // 10 historic[1] pages + + // restart tablet + env.SendSync(new TEvents::TEvPoison, false, true); + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + + // should have the same behaviour + env.SendSync(new NFake::TEvExecute{ new TTxFullScan(failedAttempts) }, true); + UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 10); + } + + Y_UNIT_TEST(TestStickyAlt) { + TMyEnvBase env; + TRowsModel rows; + + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + ZeroSharedCache(env); + + env.SendSync(rows.MakeScheme(new TCompactionPolicy(), true)); + + env.SendSync(new NFake::TEvExecute{ new TTxKeepFamilyInMemory(TRowsModel::AltFamilyId) }); + + // 1 historic[0] + 10 historic[1] pages + env.SendSync(rows.VersionTo(TRowVersion(1, 10)).RowTo(0).MakeRows(70, 950)); + + // 1 groups[0] + 10 groups[1] pages + env.SendSync(rows.VersionTo(TRowVersion(2, 20)).RowTo(0).MakeRows(70, 950)); + + env.SendSync(new NFake::TEvCompact(TRowsModel::TableId)); + env.WaitFor<NFake::TEvCompacted>(); + + int failedAttempts = 0; + env.SendSync(new NFake::TEvExecute{ new TTxFullScan(failedAttempts) }); + UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 2); // 1 groups[0], 1 historic[0], 4 indexes are sticky + + // restart tablet + env.SendSync(new TEvents::TEvPoison, false, true); + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + + // should have the same behaviour + env.SendSync(new NFake::TEvExecute{ new TTxFullScan(failedAttempts) }, true); + UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 2); + } + + Y_UNIT_TEST(TestStickyAll) { + TMyEnvBase env; + TRowsModel rows; + + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + ZeroSharedCache(env); + + env.SendSync(rows.MakeScheme(new TCompactionPolicy(), true)); + env.SendSync(new NFake::TEvExecute{ new TTxKeepFamilyInMemory(0) }); + env.SendSync(new NFake::TEvExecute{ new TTxKeepFamilyInMemory(TRowsModel::AltFamilyId) }); + + // 1 historic[0] + 10 historic[1] pages + env.SendSync(rows.VersionTo(TRowVersion(1, 10)).RowTo(0).MakeRows(70, 950)); + + // 1 groups[0] + 10 groups[1] pages + env.SendSync(rows.VersionTo(TRowVersion(2, 20)).RowTo(0).MakeRows(70, 950)); + + env.SendSync(new NFake::TEvCompact(TRowsModel::TableId)); + env.WaitFor<NFake::TEvCompacted>(); + + int failedAttempts = 0; + env.SendSync(new NFake::TEvExecute{ new TTxFullScan(failedAttempts) }); + UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 0); + + // restart tablet + env.SendSync(new TEvents::TEvPoison, false, true); + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + + // should have the same behaviour + env.SendSync(new NFake::TEvExecute{ new TTxFullScan(failedAttempts) }, true); + UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 0); + } + + Y_UNIT_TEST(TestAlterAddFamilySticky) { + TMyEnvBase env; + TRowsModel rows; + + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + ZeroSharedCache(env); + + env.SendSync(rows.MakeScheme(new TCompactionPolicy())); + + // 10 historic[0] pages + env.SendSync(rows.VersionTo(TRowVersion(1, 10)).RowTo(0).MakeRows(70, 950)); + + // 10 groups[0] pages + env.SendSync(rows.VersionTo(TRowVersion(2, 20)).RowTo(0).MakeRows(70, 950)); + + env.SendSync(new NFake::TEvCompact(TRowsModel::TableId)); + env.WaitFor<NFake::TEvCompacted>(); + + // add family, old parts won't have it + env.SendSync(new NFake::TEvExecute{ new TTxAddFamily() }); + env.SendSync(new NFake::TEvExecute{ new TTxKeepFamilyInMemory(0) }); + env.SendSync(new NFake::TEvExecute{ new TTxKeepFamilyInMemory(TRowsModel::AltFamilyId) }); + + int failedAttempts = 0; + env.SendSync(new NFake::TEvExecute{ new TTxFullScan(failedAttempts) }); + UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 20); // old parts aren't sticky before restart + + // restart tablet + env.SendSync(new TEvents::TEvPoison, false, true); + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + + env.SendSync(new NFake::TEvExecute{ new TTxFullScan(failedAttempts) }, true); + UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 0); // all old columns were made sticky, load them on start + } + + Y_UNIT_TEST(TestAlterAddFamilyPartiallySticky) { + TMyEnvBase env; + TRowsModel rows; + + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + ZeroSharedCache(env); + + env.SendSync(rows.MakeScheme(new TCompactionPolicy())); + + // 10 historic[0] pages + env.SendSync(rows.VersionTo(TRowVersion(1, 10)).RowTo(0).MakeRows(70, 950)); + + // 10 groups[0] pages + env.SendSync(rows.VersionTo(TRowVersion(2, 20)).RowTo(0).MakeRows(70, 950)); + + env.SendSync(new NFake::TEvCompact(TRowsModel::TableId)); + env.WaitFor<NFake::TEvCompacted>(); + + // add family, old parts won't have it + env.SendSync(new NFake::TEvExecute{ new TTxAddFamily() }); + env.SendSync(new NFake::TEvExecute{ new TTxKeepFamilyInMemory(0) }); + + int failedAttempts = 0; + env.SendSync(new NFake::TEvExecute{ new TTxFullScan(failedAttempts) }); + UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 20); // old parts aren't sticky before restart + + // restart tablet + env.SendSync(new TEvents::TEvPoison, false, true); + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + + env.SendSync(new NFake::TEvExecute{ new TTxFullScan(failedAttempts) }, true); + UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 0); // if at least one family of a group is for memory load it + } +} + } // namespace NTabletFlatExecutor } // namespace NKikimr diff --git a/ydb/core/tablet_flat/flat_part_store.h b/ydb/core/tablet_flat/flat_part_store.h index b9a6c73d4a8..6ea44f07837 100644 --- a/ydb/core/tablet_flat/flat_part_store.h +++ b/ydb/core/tablet_flat/flat_part_store.h @@ -128,17 +128,18 @@ public: return (lob == ELargeObj::Extern ? Pseudo : PageCollections.at(GroupsCount)).Get(); } - TAutoPtr<NPageCollection::TFetch> DataPages() const noexcept + TAutoPtr<NPageCollection::TFetch> GetPages(ui32 room) const noexcept { - TVector<TPageId> pages; + Y_VERIFY(room < PageCollections.size()); - pages.reserve(Index->End() - Index->Begin()); + auto total = PageCollections[room]->PageCollection->Total(); - auto it = Index.LookupKey({ }, Scheme->Groups[0], ESeek::Lower, nullptr); - - for (; it; ++it) pages.emplace_back(it->GetPageId()); + TVector<TPageId> pages(total); + for (size_t i : xrange(total)) { + pages[i] = i; + } - return new NPageCollection::TFetch{ 0, PageCollections[0]->PageCollection , std::move(pages) }; + return new NPageCollection::TFetch{ 0, PageCollections[room]->PageCollection, std::move(pages) }; } static TVector<TIntrusivePtr<TCache>> Construct(TVector<TPageCollectionComponents> components) noexcept |