diff options
author | zalyalov <zalyalov@yandex-team.com> | 2023-08-21 13:07:54 +0300 |
---|---|---|
committer | zalyalov <zalyalov@yandex-team.com> | 2023-08-21 15:02:53 +0300 |
commit | 4cd60db82ef163d1c9e25435024f3289f12e0930 (patch) | |
tree | 27f7e222ea084edb38feaad04be78d83caa13bfe | |
parent | b8963aaeed6a55ebd44d69d3447825aec7ce4a8e (diff) | |
download | ydb-4cd60db82ef163d1c9e25435024f3289f12e0930.tar.gz |
add option to continue booting tablets when a node hits MaxTabletsScheduled KIKIMR-18947
-rw-r--r-- | ydb/core/mind/hive/hive_impl.cpp | 12 | ||||
-rw-r--r-- | ydb/core/mind/hive/hive_impl.h | 4 | ||||
-rw-r--r-- | ydb/core/mind/hive/hive_ut.cpp | 27 | ||||
-rw-r--r-- | ydb/core/protos/config.proto | 6 |
4 files changed, 47 insertions, 2 deletions
diff --git a/ydb/core/mind/hive/hive_impl.cpp b/ydb/core/mind/hive/hive_impl.cpp index bb2f90d36cb..6a1c0849ad2 100644 --- a/ydb/core/mind/hive/hive_impl.cpp +++ b/ydb/core/mind/hive/hive_impl.cpp @@ -1215,6 +1215,7 @@ THive::TBestNodeResult THive::FindBestNode(const TTabletInfo& tablet) { } TVector<TSelectedNode> selectedNodes; + bool thereAreNodesWithManyStarts = false; for (auto itCandidateNodes = candidateGroups.begin(); itCandidateNodes != candidateGroups.end(); ++itCandidateNodes) { const std::vector<TNodeInfo*>& candidateNodes(*itCandidateNodes); @@ -1240,8 +1241,11 @@ THive::TBestNodeResult THive::FindBestNode(const TTabletInfo& tablet) { } } else { BLOG_TRACE("[FBN] Tablet " << tablet.ToString() << " node " << nodeInfo.Id << " is not able to schedule the tablet"); - tablet.BootState = BootStateTooManyStarting; - return TBestNodeResult(false); + thereAreNodesWithManyStarts = true; + if (GetBootStrategy() == NKikimrConfig::THiveConfig::HIVE_BOOT_STRATEGY_BALANCED) { + tablet.BootState = BootStateTooManyStarting; + return TBestNodeResult(false); + } } } else { BLOG_TRACE("[FBN] Node " << nodeInfo.Id << " is not allowed" @@ -1256,6 +1260,10 @@ THive::TBestNodeResult THive::FindBestNode(const TTabletInfo& tablet) { } } BLOG_TRACE("[FBN] Tablet " << tablet.ToString() << " selected nodes count " << selectedNodes.size()); + if (selectedNodes.empty() && thereAreNodesWithManyStarts) { + BLOG_TRACE("[FBN] Tablet " << tablet.ToString() << " all available nodes are booting too many tablets"); + return TBestNodeResult(false); + } TNodeInfo* selectedNode = nullptr; if (!selectedNodes.empty()) { diff --git a/ydb/core/mind/hive/hive_impl.h b/ydb/core/mind/hive/hive_impl.h index a26b4b94228..4ab7ff5bd15 100644 --- a/ydb/core/mind/hive/hive_impl.h +++ b/ydb/core/mind/hive/hive_impl.h @@ -830,6 +830,10 @@ public: return CurrentConfig.GetNodeRestartsToIgnoreInWarmup(); } + NKikimrConfig::THiveConfig::EHiveBootStrategy GetBootStrategy() const { + return CurrentConfig.GetBootStrategy(); + } + static void ActualizeRestartStatistics(google::protobuf::RepeatedField<google::protobuf::uint64>& restartTimestamps, ui64 barrier); static bool IsSystemTablet(TTabletTypes::EType type); diff --git a/ydb/core/mind/hive/hive_ut.cpp b/ydb/core/mind/hive/hive_ut.cpp index 5861ffbbe0c..c505ae5aee8 100644 --- a/ydb/core/mind/hive/hive_ut.cpp +++ b/ydb/core/mind/hive/hive_ut.cpp @@ -5532,6 +5532,33 @@ Y_UNIT_TEST_SUITE(THiveTest) { UNIT_ASSERT_VALUES_EQUAL(nodeTablets[4], 0); UNIT_ASSERT_VALUES_EQUAL(nodeTablets[5], NUM_TABLETS); } + + Y_UNIT_TEST(TestProgressWithMaxTabletsScheduled) { + TTestBasicRuntime runtime(2, false); + + Setup(runtime, true, 1, [](TAppPrepare& app) { + app.HiveConfig.SetMaxTabletsScheduled(1); + app.HiveConfig.SetBootStrategy(NKikimrConfig::THiveConfig::HIVE_BOOT_STRATEGY_FAST); + }); + + const ui64 hiveTablet = MakeDefaultHiveID(0); + const ui64 testerTablet = MakeDefaultHiveID(1); + + CreateTestBootstrapper(runtime, CreateTestTabletInfo(hiveTablet, TTabletTypes::Hive), &CreateDefaultHive); + + TTabletTypes::EType tabletType = TTabletTypes::Dummy; + TVector<ui64> tablets; + for (int i = 0; i < 10; ++i) { + THolder<TEvHive::TEvCreateTablet> ev(new TEvHive::TEvCreateTablet(testerTablet, 100500 + i, tabletType, BINDED_CHANNELS)); + ui64 tabletId = SendCreateTestTablet(runtime, hiveTablet, testerTablet, std::move(ev), 0, true); + tablets.emplace_back(tabletId); + }; + + SendKillLocal(runtime, 0); + for (auto tablet : tablets) { + WaitForTabletIsUp(runtime, tablet, 1); + } + } } } diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto index d7aed47fad4..b72face3ebc 100644 --- a/ydb/core/protos/config.proto +++ b/ydb/core/protos/config.proto @@ -1666,6 +1666,11 @@ message THiveConfig { HIVE_NODE_SELECT_STRATEGY_RANDOM = 3; } + enum EHiveBootStrategy { + HIVE_BOOT_STRATEGY_BALANCED = 0; + HIVE_BOOT_STRATEGY_FAST = 1; + } + optional uint64 MaxTabletsScheduled = 2 [default = 100]; optional uint64 MaxResourceCounter = 3 [default = 100000000]; optional uint64 MaxResourceCPU = 4 [default = 10000000]; @@ -1723,6 +1728,7 @@ message THiveConfig { optional uint64 MaxMovementsOnEmergencyBalancer = 57 [default = 2]; optional bool ContinueEmergencyBalancer = 58 [default = true]; optional double MinPeriodBetweenEmergencyBalance = 59 [default = 0.1]; // seconds + optional EHiveBootStrategy BootStrategy = 60 [default = HIVE_BOOT_STRATEGY_BALANCED]; } message TDataShardConfig { |