summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSergey Belyakov <[email protected]>2024-03-07 12:00:00 +0300
committerGitHub <[email protected]>2024-03-07 12:00:00 +0300
commit9235b5deda77c5ac20a5876aa3d01301a95f60e9 (patch)
treed8aed654a786d02fd41071a63c1c2c810de9642e
parentd9db5d6a97386e9f7db96f9ed1a16ac07df95cd9 (diff)
Add DiskTimeAvailable scaling option to BlobStorage configuration, improve burst threshold configuration (#2530)
-rw-r--r--ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp13
-rw-r--r--ydb/core/blobstorage/ut_blobstorage/lib/env.h28
-rw-r--r--ydb/core/blobstorage/ut_blobstorage/monitoring.cpp38
-rw-r--r--ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.cpp9
-rw-r--r--ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.h11
-rw-r--r--ydb/core/blobstorage/vdisk/common/vdisk_config.cpp2
-rw-r--r--ydb/core/blobstorage/vdisk/common/vdisk_config.h3
-rw-r--r--ydb/core/blobstorage/vdisk/common/vdisk_context.cpp6
-rw-r--r--ydb/core/blobstorage/vdisk/common/vdisk_context.h3
-rw-r--r--ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp4
-rw-r--r--ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeletonfront.cpp2
-rw-r--r--ydb/core/protos/blobstorage_vdisk_config.proto2
-rw-r--r--ydb/core/protos/config.proto13
13 files changed, 99 insertions, 35 deletions
diff --git a/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp b/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp
index 781dd6070fb..eeea89001a2 100644
--- a/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp
+++ b/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp
@@ -176,6 +176,19 @@ namespace NKikimr::NStorage {
vdiskConfig->EnableVPatch = EnableVPatch;
vdiskConfig->FeatureFlags = Cfg->FeatureFlags;
+ if (Cfg->BlobStorageConfig.HasCostMetricsSettings()) {
+ for (auto type : Cfg->BlobStorageConfig.GetCostMetricsSettings().GetVDiskTypes()) {
+ if (type.HasPDiskType() && deviceType == PDiskTypeToPDiskType(type.GetPDiskType())) {
+ if (type.HasBurstThresholdNs()) {
+ vdiskConfig->BurstThresholdNs = type.GetBurstThresholdNs();
+ }
+ if (type.HasDiskTimeAvailableScale()) {
+ vdiskConfig->DiskTimeAvailableScale = type.GetDiskTimeAvailableScale();
+ }
+ }
+ }
+ }
+
// issue initial report to whiteboard before creating actor to avoid races
Send(WhiteboardId, new NNodeWhiteboard::TEvWhiteboard::TEvVDiskStateUpdate(vdiskId, groupInfo->GetStoragePoolName(),
vslotId.PDiskId, vslotId.VDiskSlotId, pdiskGuid, kind, donorMode, whiteboardInstanceGuid, std::move(donors)));
diff --git a/ydb/core/blobstorage/ut_blobstorage/lib/env.h b/ydb/core/blobstorage/ut_blobstorage/lib/env.h
index 8be7af96c4d..c774c235035 100644
--- a/ydb/core/blobstorage/ut_blobstorage/lib/env.h
+++ b/ydb/core/blobstorage/ut_blobstorage/lib/env.h
@@ -40,7 +40,7 @@ struct TEnvironmentSetup {
const TFeatureFlags FeatureFlags;
const NPDisk::EDeviceType DiskType = NPDisk::EDeviceType::DEVICE_TYPE_NVME;
const ui32 BurstThresholdNs = 0;
- const TString VDiskKind = "";
+ const float DiskTimeAvailableScale = 1;
};
const TSettings Settings;
@@ -325,16 +325,16 @@ struct TEnvironmentSetup {
config->CacheAccessor = std::make_unique<TAccessor>(Cache[nodeId]);
}
config->FeatureFlags = Settings.FeatureFlags;
- if (Settings.VDiskKind) {
- NKikimrBlobStorage::TAllVDiskKinds vdiskConfig;
- auto* kind = vdiskConfig.AddVDiskKinds();
- kind->SetKind(NKikimrBlobStorage::TVDiskKind::Test1);
+
+ {
+ auto* type = config->BlobStorageConfig.MutableCostMetricsSettings()->AddVDiskTypes();
+ type->SetPDiskType(NKikimrBlobStorage::EPDiskType::ROT);
if (Settings.BurstThresholdNs) {
- kind->MutableConfig()->SetBurstThresholdNs(Settings.BurstThresholdNs);
+ type->SetBurstThresholdNs(Settings.BurstThresholdNs);
}
-
- config->AllVDiskKinds = MakeIntrusive<TAllVDiskKinds>(vdiskConfig);
+ type->SetDiskTimeAvailableScale(Settings.DiskTimeAvailableScale);
}
+
warden.reset(CreateBSNodeWarden(config));
}
@@ -419,11 +419,7 @@ struct TEnvironmentSetup {
cmd2->SetName(StoragePoolName);
cmd2->SetKind(StoragePoolName);
cmd2->SetErasureSpecies(TBlobStorageGroupType::ErasureSpeciesName(Settings.Erasure.GetErasure()));
- if (Settings.VDiskKind) {
- cmd2->SetVDiskKind(Settings.VDiskKind);
- } else {
- cmd2->SetVDiskKind("Default");
- }
+ cmd2->SetVDiskKind("Default");
cmd2->SetNumGroups(numGroups ? numGroups : NumGroups);
cmd2->AddPDiskFilter()->AddProperty()->SetType(pdiskType);
if (Settings.Encryption) {
@@ -443,11 +439,7 @@ struct TEnvironmentSetup {
cmd->SetName(poolName);
cmd->SetKind(poolName);
cmd->SetErasureSpecies(TBlobStorageGroupType::ErasureSpeciesName(Settings.Erasure.GetErasure()));
- if (Settings.VDiskKind) {
- cmd->SetVDiskKind(Settings.VDiskKind);
- } else {
- cmd->SetVDiskKind("Default");
- }
+ cmd->SetVDiskKind("Default");
cmd->SetNumGroups(1);
cmd->AddPDiskFilter()->AddProperty()->SetType(NKikimrBlobStorage::EPDiskType::ROT);
if (Settings.Encryption) {
diff --git a/ydb/core/blobstorage/ut_blobstorage/monitoring.cpp b/ydb/core/blobstorage/ut_blobstorage/monitoring.cpp
index c4e3d42f770..db60b5eef96 100644
--- a/ydb/core/blobstorage/ut_blobstorage/monitoring.cpp
+++ b/ydb/core/blobstorage/ut_blobstorage/monitoring.cpp
@@ -34,7 +34,7 @@ ui64 AggregateVDiskCounters(std::unique_ptr<TEnvironmentSetup>& env, TString sto
void SetupEnv(const TBlobStorageGroupInfo::TTopology& topology, std::unique_ptr<TEnvironmentSetup>& env,
ui32& groupSize, TBlobStorageGroupType& groupType, ui32& groupId, std::vector<ui32>& pdiskLayout,
- ui32 burstThresholdNs = 0, TString vdiskKind = "") {
+ ui32 burstThresholdNs = 0, float diskTimeAvailableScale = 1) {
groupSize = topology.TotalVDisks;
groupType = topology.GType;
env.reset(new TEnvironmentSetup({
@@ -42,7 +42,7 @@ void SetupEnv(const TBlobStorageGroupInfo::TTopology& topology, std::unique_ptr<
.Erasure = groupType,
.DiskType = NPDisk::EDeviceType::DEVICE_TYPE_ROT,
.BurstThresholdNs = burstThresholdNs,
- .VDiskKind = vdiskKind,
+ .DiskTimeAvailableScale = diskTimeAvailableScale,
}));
env->CreateBoxAndPool(1, 1);
@@ -248,7 +248,7 @@ enum class ELoadDistribution : ui8 {
template <typename TInflightActor>
void TestBurst(ui32 requests, ui32 inflight, TDuration delay, ELoadDistribution loadDistribution,
- ui32 burstThresholdNs = 0) {
+ ui32 burstThresholdNs = 0, float diskTimeAvailableScale = 1) {
TBlobStorageGroupInfo::TTopology topology(TBlobStorageGroupType::ErasureNone, 1, 1, 1, true);
auto* actor = new TInflightActor({requests, inflight, delay}, 8_MB);
std::unique_ptr<TEnvironmentSetup> env;
@@ -256,7 +256,8 @@ void TestBurst(ui32 requests, ui32 inflight, TDuration delay, ELoadDistribution
TBlobStorageGroupType groupType;
ui32 groupId;
std::vector<ui32> pdiskLayout;
- SetupEnv(topology, env, groupSize, groupType, groupId, pdiskLayout, burstThresholdNs, "Test1");
+ SetupEnv(topology, env, groupSize, groupType, groupId, pdiskLayout, burstThresholdNs,
+ diskTimeAvailableScale);
actor->SetGroupId(groupId);
env->Runtime->Register(actor, 1);
@@ -286,4 +287,33 @@ Y_UNIT_TEST_SUITE(BurstDetection) {
}
}
+void TestDiskTimeAvailableScaling() {
+ auto measure = [](float scale) {
+ TBlobStorageGroupInfo::TTopology topology(TBlobStorageGroupType::ErasureNone, 1, 1, 1, true);
+ std::unique_ptr<TEnvironmentSetup> env;
+ ui32 groupSize;
+ TBlobStorageGroupType groupType;
+ ui32 groupId;
+ std::vector<ui32> pdiskLayout;
+ SetupEnv(topology, env, groupSize, groupType, groupId, pdiskLayout, 0, scale);
+
+ return AggregateVDiskCounters(env, env->StoragePoolName, groupSize, groupId, pdiskLayout,
+ "advancedCost", "DiskTimeAvailable");
+ };
+
+ i64 test1 = measure(1);
+ i64 test2 = measure(2);
+
+ i64 delta = test1 * 2 - test2;
+
+ UNIT_ASSERT_LE_C(std::abs(delta), 10, "Total time available: with scale=1 time=" << test1 <<
+ ", with scale=2 time=" << test2);
+}
+
+Y_UNIT_TEST_SUITE(DiskTimeAvailable) {
+ Y_UNIT_TEST(Scaling) {
+ TestDiskTimeAvailableScaling();
+ }
+}
+
#undef MAKE_BURST_TEST
diff --git a/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.cpp b/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.cpp
index 188fd70c052..f4a0ec15f4d 100644
--- a/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.cpp
+++ b/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.cpp
@@ -42,7 +42,8 @@ public:
};
TBsCostTracker::TBsCostTracker(const TBlobStorageGroupType& groupType, NPDisk::EDeviceType diskType,
- const TIntrusivePtr<::NMonitoring::TDynamicCounters>& counters, ui64 burstThresholdNs)
+ const TIntrusivePtr<::NMonitoring::TDynamicCounters>& counters, ui64 burstThresholdNs,
+ float diskTimeAvailableScale)
: GroupType(groupType)
, CostCounters(counters->GetSubgroup("subsystem", "advancedCost"))
, UserDiskCost(CostCounters->GetCounter("UserDiskCost", true))
@@ -50,8 +51,10 @@ TBsCostTracker::TBsCostTracker(const TBlobStorageGroupType& groupType, NPDisk::E
, ScrubDiskCost(CostCounters->GetCounter("ScrubDiskCost", true))
, DefragDiskCost(CostCounters->GetCounter("DefragDiskCost", true))
, InternalDiskCost(CostCounters->GetCounter("InternalDiskCost", true))
- , BucketCapacity(burstThresholdNs / GroupType.BlobSubgroupSize())
- , Bucket(&DiskTimeAvailableNs, &BucketCapacity, nullptr, nullptr, nullptr, nullptr, true)
+ , DiskTimeAvailableCtr(CostCounters->GetCounter("DiskTimeAvailable", false))
+ , BucketCapacity(burstThresholdNs * diskTimeAvailableScale / GroupType.BlobSubgroupSize())
+ , Bucket(&DiskTimeAvailable, &BucketCapacity, nullptr, nullptr, nullptr, nullptr, true)
+ , DiskTimeAvailableScale(diskTimeAvailableScale)
{
BurstDetector.Initialize(CostCounters, "BurstDetector");
switch (GroupType.GetErasure()) {
diff --git a/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.h b/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.h
index c8ead3dc52b..115392cc9ab 100644
--- a/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.h
+++ b/ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.h
@@ -317,17 +317,20 @@ private:
::NMonitoring::TDynamicCounters::TCounterPtr ScrubDiskCost;
::NMonitoring::TDynamicCounters::TCounterPtr DefragDiskCost;
::NMonitoring::TDynamicCounters::TCounterPtr InternalDiskCost;
+ ::NMonitoring::TDynamicCounters::TCounterPtr DiskTimeAvailableCtr;
TAtomic BucketCapacity; // 10^9 nsec
- TAtomic DiskTimeAvailableNs = 1'000'000'000;
+ TAtomic DiskTimeAvailable = 1'000'000'000;
TBucketQuoter<i64, TSpinLock, TAppDataTimerMs<TInstantTimerMs>> Bucket;
TLight BurstDetector;
std::atomic<ui64> SeqnoBurstDetector = 0;
static constexpr ui32 ConcurrentHugeRequestsAllowed = 3;
+ float DiskTimeAvailableScale = 1;
public:
TBsCostTracker(const TBlobStorageGroupType& groupType, NPDisk::EDeviceType diskType,
- const TIntrusivePtr<::NMonitoring::TDynamicCounters>& counters, ui64 burstThresholdNs);
+ const TIntrusivePtr<::NMonitoring::TDynamicCounters>& counters, ui64 burstThresholdNs,
+ float diskTimeAvailableScale);
template<class TEv>
ui64 GetCost(const TEv& ev) const {
@@ -353,7 +356,9 @@ public:
}
void SetTimeAvailable(ui32 diskTimeAvailableNSec) {
- AtomicSet(DiskTimeAvailableNs, diskTimeAvailableNSec);
+ ui64 diskTimeAvailable = diskTimeAvailableNSec * DiskTimeAvailableScale;
+ AtomicSet(DiskTimeAvailable, diskTimeAvailable);
+ *DiskTimeAvailableCtr = diskTimeAvailable;
}
public:
diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_config.cpp b/ydb/core/blobstorage/vdisk/common/vdisk_config.cpp
index 32c3647fc4b..18f5e5735ae 100644
--- a/ydb/core/blobstorage/vdisk/common/vdisk_config.cpp
+++ b/ydb/core/blobstorage/vdisk/common/vdisk_config.cpp
@@ -121,6 +121,7 @@ namespace NKikimr {
#endif
BurstThresholdNs = NPDisk::DevicePerformance.at(baseInfo.DeviceType).BurstThresholdNs;
+ DiskTimeAvailableScale = 1;
}
void TVDiskConfig::SetupHugeBytes() {
@@ -163,7 +164,6 @@ namespace NKikimr {
UPDATE_MACRO(BarrierValidation);
- UPDATE_MACRO(BurstThresholdNs);
#undef UPDATE_MACRO
}
diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_config.h b/ydb/core/blobstorage/vdisk/common/vdisk_config.h
index f613e48360d..a2d5bd28b19 100644
--- a/ydb/core/blobstorage/vdisk/common/vdisk_config.h
+++ b/ydb/core/blobstorage/vdisk/common/vdisk_config.h
@@ -209,7 +209,10 @@ namespace NKikimr {
TDuration WhiteboardUpdateInterval;
bool EnableVDiskCooldownTimeout;
TControlWrapper EnableVPatch = true;
+
+ ///////////// COST METRICS SETTINGS ////////////////
ui64 BurstThresholdNs = 1'000'000'000;
+ float DiskTimeAvailableScale = 1;
///////////// FEATURE FLAGS ////////////////////////
NKikimrConfig::TFeatureFlags FeatureFlags;
diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_context.cpp b/ydb/core/blobstorage/vdisk/common/vdisk_context.cpp
index 7068c670d48..7afc10652df 100644
--- a/ydb/core/blobstorage/vdisk/common/vdisk_context.cpp
+++ b/ydb/core/blobstorage/vdisk/common/vdisk_context.cpp
@@ -31,7 +31,8 @@ namespace NKikimr {
TReplQuoter::TPtr replPDiskWriteQuoter,
TReplQuoter::TPtr replNodeRequestQuoter,
TReplQuoter::TPtr replNodeResponseQuoter,
- ui64 burstThresholdNs)
+ ui64 burstThresholdNs,
+ float diskTimeAvailableScale)
: TBSProxyContext(vdiskCounters->GetSubgroup("subsystem", "memhull"))
, VDiskActorId(vdiskActorId)
, Top(std::move(top))
@@ -58,7 +59,8 @@ namespace NKikimr {
, ReplPDiskWriteQuoter(std::move(replPDiskWriteQuoter))
, ReplNodeRequestQuoter(std::move(replNodeRequestQuoter))
, ReplNodeResponseQuoter(std::move(replNodeResponseQuoter))
- , CostTracker(std::make_shared<TBsCostTracker>(Top->GType, type, vdiskCounters, burstThresholdNs))
+ , CostTracker(std::make_shared<TBsCostTracker>(Top->GType, type, vdiskCounters, burstThresholdNs,
+ diskTimeAvailableScale))
, OutOfSpaceState(Top->GetTotalVDisksNum(), Top->GetOrderNumber(ShortSelfVDisk))
, CostMonGroup(vdiskCounters, "subsystem", "cost")
, Logger(as ? ActorSystemLogger(as) : DevNullLogger())
diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_context.h b/ydb/core/blobstorage/vdisk/common/vdisk_context.h
index 4982b1cac20..cdf099729f3 100644
--- a/ydb/core/blobstorage/vdisk/common/vdisk_context.h
+++ b/ydb/core/blobstorage/vdisk/common/vdisk_context.h
@@ -101,7 +101,8 @@ namespace NKikimr {
TReplQuoter::TPtr replPDiskWriteQuoter = nullptr,
TReplQuoter::TPtr replNodeRequestQuoter = nullptr,
TReplQuoter::TPtr replNodeResponseQuoter = nullptr,
- ui64 burstThresholdNs = 1'000'000'000);
+ ui64 burstThresholdNs = 1'000'000'000,
+ float diskTimeAvailableScale = 1);
// The function checks response from PDisk. Normally, it's OK.
// Other alternatives are: 1) shutdown; 2) FAIL
diff --git a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp
index 03bed8d88fb..47b6b14f35d 100644
--- a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp
+++ b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp
@@ -2157,6 +2157,10 @@ namespace NKikimr {
TABLED() {str << "BurstThresholdNs";}
TABLED() {str << Config->BurstThresholdNs;}
}
+ TABLER() {
+ TABLED() {str << "DiskTimeAvailableScale";}
+ TABLED() {str << Config->DiskTimeAvailableScale;}
+ }
}
}
diff --git a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeletonfront.cpp b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeletonfront.cpp
index b27176250ed..d2101a87a83 100644
--- a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeletonfront.cpp
+++ b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeletonfront.cpp
@@ -716,7 +716,7 @@ namespace NKikimr {
VCtx = MakeIntrusive<TVDiskContext>(ctx.SelfID, GInfo->PickTopology(), VDiskCounters, SelfVDiskId,
ctx.ExecutorThread.ActorSystem, baseInfo.DeviceType, baseInfo.DonorMode,
baseInfo.ReplPDiskReadQuoter, baseInfo.ReplPDiskWriteQuoter, baseInfo.ReplNodeRequestQuoter,
- baseInfo.ReplNodeResponseQuoter, Config->BurstThresholdNs);
+ baseInfo.ReplNodeResponseQuoter, Config->BurstThresholdNs, Config->DiskTimeAvailableScale);
// create IntQueues
IntQueueAsyncGets = std::make_unique<TIntQueueClass>(
diff --git a/ydb/core/protos/blobstorage_vdisk_config.proto b/ydb/core/protos/blobstorage_vdisk_config.proto
index 1467405be4c..5b800c1cf65 100644
--- a/ydb/core/protos/blobstorage_vdisk_config.proto
+++ b/ydb/core/protos/blobstorage_vdisk_config.proto
@@ -22,8 +22,6 @@ message TVDiskConfig {
optional bool BarrierValidation = 60;
optional bool EnableOverseerLsnReporting = 61; // deprecated
-
- optional uint64 BurstThresholdNs = 62;
};
// organizes hierarchy of VDisk configs: VDisk config may have a base config,
diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto
index a2d2b5386ec..ce0d18baacc 100644
--- a/ydb/core/protos/config.proto
+++ b/ydb/core/protos/config.proto
@@ -4,6 +4,7 @@ import "ydb/core/fq/libs/config/protos/fq_config.proto";
import "ydb/core/protos/alloc.proto";
import "ydb/core/protos/auth.proto";
import "ydb/core/protos/blobstorage.proto";
+import "ydb/core/protos/blobstorage_base3.proto";
import "ydb/core/protos/blobstorage_config.proto";
import "ydb/core/protos/blobstorage_pdisk_config.proto";
import "ydb/core/protos/blobstorage_vdisk_config.proto";
@@ -280,6 +281,18 @@ message TBlobStorageConfig {
}
optional TAutoconfigSettings AutoconfigSettings = 6;
+
+ message TCostMetricsConfig {
+ optional NKikimrBlobStorage.EPDiskType PDiskType = 1;
+ optional uint64 BurstThresholdNs = 2;
+ optional float DiskTimeAvailableScale = 3;
+ }
+
+ message TCostMetricsSettings {
+ repeated TCostMetricsConfig VDiskTypes = 1;
+ };
+
+ optional TCostMetricsSettings CostMetricsSettings = 7;
}
message TBlobStorageFormatConfig {