diff options
| author | Sergey Belyakov <[email protected]> | 2024-03-07 12:00:00 +0300 |
|---|---|---|
| committer | GitHub <[email protected]> | 2024-03-07 12:00:00 +0300 |
| commit | 9235b5deda77c5ac20a5876aa3d01301a95f60e9 (patch) | |
| tree | d8aed654a786d02fd41071a63c1c2c810de9642e | |
| parent | d9db5d6a97386e9f7db96f9ed1a16ac07df95cd9 (diff) | |
Add DiskTimeAvailable scaling option to BlobStorage configuration, improve burst threshold configuration (#2530)
13 files changed, 99 insertions, 35 deletions
diff --git a/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp b/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp index 781dd6070fb..eeea89001a2 100644 --- a/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp +++ b/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp @@ -176,6 +176,19 @@ namespace NKikimr::NStorage { vdiskConfig->EnableVPatch = EnableVPatch; vdiskConfig->FeatureFlags = Cfg->FeatureFlags; + if (Cfg->BlobStorageConfig.HasCostMetricsSettings()) { + for (auto type : Cfg->BlobStorageConfig.GetCostMetricsSettings().GetVDiskTypes()) { + if (type.HasPDiskType() && deviceType == PDiskTypeToPDiskType(type.GetPDiskType())) { + if (type.HasBurstThresholdNs()) { + vdiskConfig->BurstThresholdNs = type.GetBurstThresholdNs(); + } + if (type.HasDiskTimeAvailableScale()) { + vdiskConfig->DiskTimeAvailableScale = type.GetDiskTimeAvailableScale(); + } + } + } + } + // issue initial report to whiteboard before creating actor to avoid races Send(WhiteboardId, new NNodeWhiteboard::TEvWhiteboard::TEvVDiskStateUpdate(vdiskId, groupInfo->GetStoragePoolName(), vslotId.PDiskId, vslotId.VDiskSlotId, pdiskGuid, kind, donorMode, whiteboardInstanceGuid, std::move(donors))); diff --git a/ydb/core/blobstorage/ut_blobstorage/lib/env.h b/ydb/core/blobstorage/ut_blobstorage/lib/env.h index 8be7af96c4d..c774c235035 100644 --- a/ydb/core/blobstorage/ut_blobstorage/lib/env.h +++ b/ydb/core/blobstorage/ut_blobstorage/lib/env.h @@ -40,7 +40,7 @@ struct TEnvironmentSetup { const TFeatureFlags FeatureFlags; const NPDisk::EDeviceType DiskType = NPDisk::EDeviceType::DEVICE_TYPE_NVME; const ui32 BurstThresholdNs = 0; - const TString VDiskKind = ""; + const float DiskTimeAvailableScale = 1; }; const TSettings Settings; @@ -325,16 +325,16 @@ struct TEnvironmentSetup { config->CacheAccessor = std::make_unique<TAccessor>(Cache[nodeId]); } config->FeatureFlags = Settings.FeatureFlags; - if (Settings.VDiskKind) { - NKikimrBlobStorage::TAllVDiskKinds vdiskConfig; - auto* kind = vdiskConfig.AddVDiskKinds(); - kind->SetKind(NKikimrBlobStorage::TVDiskKind::Test1); + + { + auto* type = config->BlobStorageConfig.MutableCostMetricsSettings()->AddVDiskTypes(); + type->SetPDiskType(NKikimrBlobStorage::EPDiskType::ROT); if (Settings.BurstThresholdNs) { - kind->MutableConfig()->SetBurstThresholdNs(Settings.BurstThresholdNs); + type->SetBurstThresholdNs(Settings.BurstThresholdNs); } - - config->AllVDiskKinds = MakeIntrusive<TAllVDiskKinds>(vdiskConfig); + type->SetDiskTimeAvailableScale(Settings.DiskTimeAvailableScale); } + warden.reset(CreateBSNodeWarden(config)); } @@ -419,11 +419,7 @@ struct TEnvironmentSetup { cmd2->SetName(StoragePoolName); cmd2->SetKind(StoragePoolName); cmd2->SetErasureSpecies(TBlobStorageGroupType::ErasureSpeciesName(Settings.Erasure.GetErasure())); - if (Settings.VDiskKind) { - cmd2->SetVDiskKind(Settings.VDiskKind); - } else { - cmd2->SetVDiskKind("Default"); - } + cmd2->SetVDiskKind("Default"); cmd2->SetNumGroups(numGroups ? numGroups : NumGroups); cmd2->AddPDiskFilter()->AddProperty()->SetType(pdiskType); if (Settings.Encryption) { @@ -443,11 +439,7 @@ struct TEnvironmentSetup { cmd->SetName(poolName); cmd->SetKind(poolName); cmd->SetErasureSpecies(TBlobStorageGroupType::ErasureSpeciesName(Settings.Erasure.GetErasure())); - if (Settings.VDiskKind) { - cmd->SetVDiskKind(Settings.VDiskKind); - } else { - cmd->SetVDiskKind("Default"); - } + cmd->SetVDiskKind("Default"); cmd->SetNumGroups(1); cmd->AddPDiskFilter()->AddProperty()->SetType(NKikimrBlobStorage::EPDiskType::ROT); if (Settings.Encryption) { diff --git a/ydb/core/blobstorage/ut_blobstorage/monitoring.cpp b/ydb/core/blobstorage/ut_blobstorage/monitoring.cpp index c4e3d42f770..db60b5eef96 100644 --- a/ydb/core/blobstorage/ut_blobstorage/monitoring.cpp +++ b/ydb/core/blobstorage/ut_blobstorage/monitoring.cpp @@ -34,7 +34,7 @@ ui64 AggregateVDiskCounters(std::unique_ptr<TEnvironmentSetup>& env, TString sto void SetupEnv(const TBlobStorageGroupInfo::TTopology& topology, std::unique_ptr<TEnvironmentSetup>& env, ui32& groupSize, TBlobStorageGroupType& groupType, ui32& groupId, std::vector<ui32>& pdiskLayout, - ui32 burstThresholdNs = 0, TString vdiskKind = "") { + ui32 burstThresholdNs = 0, float diskTimeAvailableScale = 1) { groupSize = topology.TotalVDisks; groupType = topology.GType; env.reset(new TEnvironmentSetup({ @@ -42,7 +42,7 @@ void SetupEnv(const TBlobStorageGroupInfo::TTopology& topology, std::unique_ptr< .Erasure = groupType, .DiskType = NPDisk::EDeviceType::DEVICE_TYPE_ROT, .BurstThresholdNs = burstThresholdNs, - .VDiskKind = vdiskKind, + .DiskTimeAvailableScale = diskTimeAvailableScale, })); env->CreateBoxAndPool(1, 1); @@ -248,7 +248,7 @@ enum class ELoadDistribution : ui8 { template <typename TInflightActor> void TestBurst(ui32 requests, ui32 inflight, TDuration delay, ELoadDistribution loadDistribution, - ui32 burstThresholdNs = 0) { + ui32 burstThresholdNs = 0, float diskTimeAvailableScale = 1) { TBlobStorageGroupInfo::TTopology topology(TBlobStorageGroupType::ErasureNone, 1, 1, 1, true); auto* actor = new TInflightActor({requests, inflight, delay}, 8_MB); std::unique_ptr<TEnvironmentSetup> env; @@ -256,7 +256,8 @@ void TestBurst(ui32 requests, ui32 inflight, TDuration delay, ELoadDistribution TBlobStorageGroupType groupType; ui32 groupId; std::vector<ui32> pdiskLayout; - SetupEnv(topology, env, groupSize, groupType, groupId, pdiskLayout, burstThresholdNs, "Test1"); + SetupEnv(topology, env, groupSize, groupType, groupId, pdiskLayout, burstThresholdNs, + diskTimeAvailableScale); actor->SetGroupId(groupId); env->Runtime->Register(actor, 1); @@ -286,4 +287,33 @@ Y_UNIT_TEST_SUITE(BurstDetection) { } } +void TestDiskTimeAvailableScaling() { + auto measure = [](float scale) { + TBlobStorageGroupInfo::TTopology topology(TBlobStorageGroupType::ErasureNone, 1, 1, 1, true); + std::unique_ptr<TEnvironmentSetup> env; + ui32 groupSize; + TBlobStorageGroupType groupType; + ui32 groupId; + std::vector<ui32> pdiskLayout; + SetupEnv(topology, env, groupSize, groupType, groupId, pdiskLayout, 0, scale); + + return AggregateVDiskCounters(env, env->StoragePoolName, groupSize, groupId, pdiskLayout, + "advancedCost", "DiskTimeAvailable"); + }; + + i64 test1 = measure(1); + i64 test2 = measure(2); + + i64 delta = test1 * 2 - test2; + + UNIT_ASSERT_LE_C(std::abs(delta), 10, "Total time available: with scale=1 time=" << test1 << + ", with scale=2 time=" << test2); +} + +Y_UNIT_TEST_SUITE(DiskTimeAvailable) { + Y_UNIT_TEST(Scaling) { + TestDiskTimeAvailableScaling(); + } +} + #undef MAKE_BURST_TEST diff --git a/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.cpp b/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.cpp index 188fd70c052..f4a0ec15f4d 100644 --- a/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.cpp +++ b/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.cpp @@ -42,7 +42,8 @@ public: }; TBsCostTracker::TBsCostTracker(const TBlobStorageGroupType& groupType, NPDisk::EDeviceType diskType, - const TIntrusivePtr<::NMonitoring::TDynamicCounters>& counters, ui64 burstThresholdNs) + const TIntrusivePtr<::NMonitoring::TDynamicCounters>& counters, ui64 burstThresholdNs, + float diskTimeAvailableScale) : GroupType(groupType) , CostCounters(counters->GetSubgroup("subsystem", "advancedCost")) , UserDiskCost(CostCounters->GetCounter("UserDiskCost", true)) @@ -50,8 +51,10 @@ TBsCostTracker::TBsCostTracker(const TBlobStorageGroupType& groupType, NPDisk::E , ScrubDiskCost(CostCounters->GetCounter("ScrubDiskCost", true)) , DefragDiskCost(CostCounters->GetCounter("DefragDiskCost", true)) , InternalDiskCost(CostCounters->GetCounter("InternalDiskCost", true)) - , BucketCapacity(burstThresholdNs / GroupType.BlobSubgroupSize()) - , Bucket(&DiskTimeAvailableNs, &BucketCapacity, nullptr, nullptr, nullptr, nullptr, true) + , DiskTimeAvailableCtr(CostCounters->GetCounter("DiskTimeAvailable", false)) + , BucketCapacity(burstThresholdNs * diskTimeAvailableScale / GroupType.BlobSubgroupSize()) + , Bucket(&DiskTimeAvailable, &BucketCapacity, nullptr, nullptr, nullptr, nullptr, true) + , DiskTimeAvailableScale(diskTimeAvailableScale) { BurstDetector.Initialize(CostCounters, "BurstDetector"); switch (GroupType.GetErasure()) { diff --git a/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.h b/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.h index c8ead3dc52b..115392cc9ab 100644 --- a/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.h +++ b/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.h @@ -317,17 +317,20 @@ private: ::NMonitoring::TDynamicCounters::TCounterPtr ScrubDiskCost; ::NMonitoring::TDynamicCounters::TCounterPtr DefragDiskCost; ::NMonitoring::TDynamicCounters::TCounterPtr InternalDiskCost; + ::NMonitoring::TDynamicCounters::TCounterPtr DiskTimeAvailableCtr; TAtomic BucketCapacity; // 10^9 nsec - TAtomic DiskTimeAvailableNs = 1'000'000'000; + TAtomic DiskTimeAvailable = 1'000'000'000; TBucketQuoter<i64, TSpinLock, TAppDataTimerMs<TInstantTimerMs>> Bucket; TLight BurstDetector; std::atomic<ui64> SeqnoBurstDetector = 0; static constexpr ui32 ConcurrentHugeRequestsAllowed = 3; + float DiskTimeAvailableScale = 1; public: TBsCostTracker(const TBlobStorageGroupType& groupType, NPDisk::EDeviceType diskType, - const TIntrusivePtr<::NMonitoring::TDynamicCounters>& counters, ui64 burstThresholdNs); + const TIntrusivePtr<::NMonitoring::TDynamicCounters>& counters, ui64 burstThresholdNs, + float diskTimeAvailableScale); template<class TEv> ui64 GetCost(const TEv& ev) const { @@ -353,7 +356,9 @@ public: } void SetTimeAvailable(ui32 diskTimeAvailableNSec) { - AtomicSet(DiskTimeAvailableNs, diskTimeAvailableNSec); + ui64 diskTimeAvailable = diskTimeAvailableNSec * DiskTimeAvailableScale; + AtomicSet(DiskTimeAvailable, diskTimeAvailable); + *DiskTimeAvailableCtr = diskTimeAvailable; } public: diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_config.cpp b/ydb/core/blobstorage/vdisk/common/vdisk_config.cpp index 32c3647fc4b..18f5e5735ae 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_config.cpp +++ b/ydb/core/blobstorage/vdisk/common/vdisk_config.cpp @@ -121,6 +121,7 @@ namespace NKikimr { #endif BurstThresholdNs = NPDisk::DevicePerformance.at(baseInfo.DeviceType).BurstThresholdNs; + DiskTimeAvailableScale = 1; } void TVDiskConfig::SetupHugeBytes() { @@ -163,7 +164,6 @@ namespace NKikimr { UPDATE_MACRO(BarrierValidation); - UPDATE_MACRO(BurstThresholdNs); #undef UPDATE_MACRO } diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_config.h b/ydb/core/blobstorage/vdisk/common/vdisk_config.h index f613e48360d..a2d5bd28b19 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_config.h +++ b/ydb/core/blobstorage/vdisk/common/vdisk_config.h @@ -209,7 +209,10 @@ namespace NKikimr { TDuration WhiteboardUpdateInterval; bool EnableVDiskCooldownTimeout; TControlWrapper EnableVPatch = true; + + ///////////// COST METRICS SETTINGS //////////////// ui64 BurstThresholdNs = 1'000'000'000; + float DiskTimeAvailableScale = 1; ///////////// FEATURE FLAGS //////////////////////// NKikimrConfig::TFeatureFlags FeatureFlags; diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_context.cpp b/ydb/core/blobstorage/vdisk/common/vdisk_context.cpp index 7068c670d48..7afc10652df 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_context.cpp +++ b/ydb/core/blobstorage/vdisk/common/vdisk_context.cpp @@ -31,7 +31,8 @@ namespace NKikimr { TReplQuoter::TPtr replPDiskWriteQuoter, TReplQuoter::TPtr replNodeRequestQuoter, TReplQuoter::TPtr replNodeResponseQuoter, - ui64 burstThresholdNs) + ui64 burstThresholdNs, + float diskTimeAvailableScale) : TBSProxyContext(vdiskCounters->GetSubgroup("subsystem", "memhull")) , VDiskActorId(vdiskActorId) , Top(std::move(top)) @@ -58,7 +59,8 @@ namespace NKikimr { , ReplPDiskWriteQuoter(std::move(replPDiskWriteQuoter)) , ReplNodeRequestQuoter(std::move(replNodeRequestQuoter)) , ReplNodeResponseQuoter(std::move(replNodeResponseQuoter)) - , CostTracker(std::make_shared<TBsCostTracker>(Top->GType, type, vdiskCounters, burstThresholdNs)) + , CostTracker(std::make_shared<TBsCostTracker>(Top->GType, type, vdiskCounters, burstThresholdNs, + diskTimeAvailableScale)) , OutOfSpaceState(Top->GetTotalVDisksNum(), Top->GetOrderNumber(ShortSelfVDisk)) , CostMonGroup(vdiskCounters, "subsystem", "cost") , Logger(as ? ActorSystemLogger(as) : DevNullLogger()) diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_context.h b/ydb/core/blobstorage/vdisk/common/vdisk_context.h index 4982b1cac20..cdf099729f3 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_context.h +++ b/ydb/core/blobstorage/vdisk/common/vdisk_context.h @@ -101,7 +101,8 @@ namespace NKikimr { TReplQuoter::TPtr replPDiskWriteQuoter = nullptr, TReplQuoter::TPtr replNodeRequestQuoter = nullptr, TReplQuoter::TPtr replNodeResponseQuoter = nullptr, - ui64 burstThresholdNs = 1'000'000'000); + ui64 burstThresholdNs = 1'000'000'000, + float diskTimeAvailableScale = 1); // The function checks response from PDisk. Normally, it's OK. // Other alternatives are: 1) shutdown; 2) FAIL diff --git a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp index 03bed8d88fb..47b6b14f35d 100644 --- a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp +++ b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp @@ -2157,6 +2157,10 @@ namespace NKikimr { TABLED() {str << "BurstThresholdNs";} TABLED() {str << Config->BurstThresholdNs;} } + TABLER() { + TABLED() {str << "DiskTimeAvailableScale";} + TABLED() {str << Config->DiskTimeAvailableScale;} + } } } diff --git a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeletonfront.cpp b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeletonfront.cpp index b27176250ed..d2101a87a83 100644 --- a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeletonfront.cpp +++ b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeletonfront.cpp @@ -716,7 +716,7 @@ namespace NKikimr { VCtx = MakeIntrusive<TVDiskContext>(ctx.SelfID, GInfo->PickTopology(), VDiskCounters, SelfVDiskId, ctx.ExecutorThread.ActorSystem, baseInfo.DeviceType, baseInfo.DonorMode, baseInfo.ReplPDiskReadQuoter, baseInfo.ReplPDiskWriteQuoter, baseInfo.ReplNodeRequestQuoter, - baseInfo.ReplNodeResponseQuoter, Config->BurstThresholdNs); + baseInfo.ReplNodeResponseQuoter, Config->BurstThresholdNs, Config->DiskTimeAvailableScale); // create IntQueues IntQueueAsyncGets = std::make_unique<TIntQueueClass>( diff --git a/ydb/core/protos/blobstorage_vdisk_config.proto b/ydb/core/protos/blobstorage_vdisk_config.proto index 1467405be4c..5b800c1cf65 100644 --- a/ydb/core/protos/blobstorage_vdisk_config.proto +++ b/ydb/core/protos/blobstorage_vdisk_config.proto @@ -22,8 +22,6 @@ message TVDiskConfig { optional bool BarrierValidation = 60; optional bool EnableOverseerLsnReporting = 61; // deprecated - - optional uint64 BurstThresholdNs = 62; }; // organizes hierarchy of VDisk configs: VDisk config may have a base config, diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto index a2d2b5386ec..ce0d18baacc 100644 --- a/ydb/core/protos/config.proto +++ b/ydb/core/protos/config.proto @@ -4,6 +4,7 @@ import "ydb/core/fq/libs/config/protos/fq_config.proto"; import "ydb/core/protos/alloc.proto"; import "ydb/core/protos/auth.proto"; import "ydb/core/protos/blobstorage.proto"; +import "ydb/core/protos/blobstorage_base3.proto"; import "ydb/core/protos/blobstorage_config.proto"; import "ydb/core/protos/blobstorage_pdisk_config.proto"; import "ydb/core/protos/blobstorage_vdisk_config.proto"; @@ -280,6 +281,18 @@ message TBlobStorageConfig { } optional TAutoconfigSettings AutoconfigSettings = 6; + + message TCostMetricsConfig { + optional NKikimrBlobStorage.EPDiskType PDiskType = 1; + optional uint64 BurstThresholdNs = 2; + optional float DiskTimeAvailableScale = 3; + } + + message TCostMetricsSettings { + repeated TCostMetricsConfig VDiskTypes = 1; + }; + + optional TCostMetricsSettings CostMetricsSettings = 7; } message TBlobStorageFormatConfig { |
