diff options
author | Iuliia Sidorina <yulia@ydb.tech> | 2025-03-21 15:24:50 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-03-21 15:24:50 +0100 |
commit | a95ce0d66fef8de308eaeb8c9e90bfe6fa03cca8 (patch) | |
tree | 1faec2037af73ed8a4de2d7c993e07c89e3c0318 | |
parent | 22e147293016ec53d2a9f043afdbd6b2601aa3da (diff) | |
download | ydb-a95ce0d66fef8de308eaeb8c9e90bfe6fa03cca8.tar.gz |
log(schemeshard): fix message for split/merge logs (#15909)
-rw-r--r-- | ydb/core/tx/schemeshard/schemeshard__table_stats.cpp | 2 | ||||
-rw-r--r-- | ydb/core/tx/schemeshard/schemeshard_info_types.cpp | 49 | ||||
-rw-r--r-- | ydb/core/tx/schemeshard/schemeshard_info_types.h | 16 |
3 files changed, 38 insertions, 29 deletions
diff --git a/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp b/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp index c51db56120..3c665d18ed 100644 --- a/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp @@ -425,7 +425,7 @@ bool TTxStoreTableStats::PersistSingleStats(const TPathId& pathId, TVector<TShardIdx> shardsToMerge; TString mergeReason; if ((!index || index->State == NKikimrSchemeOp::EIndexStateReady) - && table->CheckCanMergePartitions(Self->SplitSettings, forceShardSplitSettings, shardIdx, shardsToMerge, mainTableForIndex, mergeReason) + && table->CheckCanMergePartitions(Self->SplitSettings, forceShardSplitSettings, shardIdx, Self->ShardInfos[shardIdx].TabletID, shardsToMerge, mainTableForIndex, mergeReason) ) { TTxId txId = Self->GetCachedTxId(ctx); diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp index 6741d643e5..2217042217 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp @@ -1873,7 +1873,8 @@ bool TTableInfo::TryAddShardToMerge(const TSplitSettings& splitSettings, const TForceShardSplitSettings& forceShardSplitSettings, TShardIdx shardIdx, TVector<TShardIdx>& shardsToMerge, THashSet<TTabletId>& partOwners, ui64& totalSize, float& totalLoad, - const TTableInfo* mainTableForIndex, TString& reason) const + float cpuUsageThreshold, const TTableInfo* mainTableForIndex, + TString& reason) const { if (ExpectedPartitionCount + 1 - shardsToMerge.size() <= GetMinPartitionsCount()) { return false; @@ -1907,9 +1908,7 @@ bool TTableInfo::TryAddShardToMerge(const TSplitSettings& splitSettings, const auto sizeToMerge = GetSizeToMerge(forceShardSplitSettings); if (IsMergeBySizeEnabled(forceShardSplitSettings) && stats->DataSize + totalSize <= sizeToMerge) { reason = TStringBuilder() << "merge by size (" - << "dataSize: " << stats->DataSize << ", " - << "totalSize: " << stats->DataSize + totalSize << ", " - << "sizeToMerge: " << sizeToMerge << ")"; + << "shardSize: " << stats->DataSize << ")"; canMerge = true; } @@ -1932,21 +1931,15 @@ bool TTableInfo::TryAddShardToMerge(const TSplitSettings& splitSettings, // Check that total load doesn't exceed the limits float shardLoad = stats->GetCurrentRawCpuUsage() * 0.000001; if (IsMergeByLoadEnabled(mainTableForIndex)) { - const auto settings = GetEffectiveSplitByLoadSettings(mainTableForIndex); - i64 cpuPercentage = settings.GetCpuPercentageThreshold(); - float cpuUsageThreshold = 0.01 * (cpuPercentage ? cpuPercentage : (i64)splitSettings.FastSplitCpuPercentageThreshold); - // Calculate shard load based on historical data TDuration loadDuration = TDuration::Seconds(splitSettings.MergeByLoadMinLowLoadDurationSec); shardLoad = 0.01 * stats->GetLatestMaxCpuUsagePercent(now - loadDuration); - if (shardLoad + totalLoad > cpuUsageThreshold *0.7) + if (shardLoad + totalLoad > cpuUsageThreshold) return false; reason = TStringBuilder() << "merge by load (" - << "shardLoad: " << shardLoad << ", " - << "totalLoad: " << shardLoad + totalLoad << ", " - << "loadThreshold: " << cpuUsageThreshold * 0.7 << ")"; + << "shardLoad: " << shardLoad << ")"; } // Merged shards must not have borrowed parts from the same original tablet @@ -1966,8 +1959,9 @@ bool TTableInfo::TryAddShardToMerge(const TSplitSettings& splitSettings, bool TTableInfo::CheckCanMergePartitions(const TSplitSettings& splitSettings, const TForceShardSplitSettings& forceShardSplitSettings, - TShardIdx shardIdx, TVector<TShardIdx>& shardsToMerge, - const TTableInfo* mainTableForIndex, TString& reason) const + TShardIdx shardIdx, const TTabletId& tabletId, + TVector<TShardIdx>& shardsToMerge, const TTableInfo* mainTableForIndex, + TString& reason) const { // Don't split/merge backup tables if (IsBackup) { @@ -1997,16 +1991,24 @@ bool TTableInfo::CheckCanMergePartitions(const TSplitSettings& splitSettings, shardsToMerge.clear(); ui64 totalSize = 0; float totalLoad = 0; + const auto settings = GetEffectiveSplitByLoadSettings(mainTableForIndex); + const i64 cpuPercentageThreshold = settings.GetCpuPercentageThreshold(); + const float cpuUsageThreshold = 0.01 * (cpuPercentageThreshold ? cpuPercentageThreshold : (i64)splitSettings.FastSplitCpuPercentageThreshold); + const float cpuMergeThreshold = 0.7 * cpuUsageThreshold; + THashSet<TTabletId> partOwners; + TString shardMergeReason; // Make sure we can actually merge current shard first - if (!TryAddShardToMerge(splitSettings, forceShardSplitSettings, shardIdx, shardsToMerge, partOwners, totalSize, totalLoad, mainTableForIndex, reason)) { + if (!TryAddShardToMerge(splitSettings, forceShardSplitSettings, shardIdx, shardsToMerge, partOwners, totalSize, totalLoad, cpuMergeThreshold, mainTableForIndex, shardMergeReason)) { return false; } - TString mergeReason; + reason = TStringBuilder() << "shard with tabletId: " << tabletId + << " " << shardMergeReason; + for (i64 pi = partitionIdx - 1; pi >= 0; --pi) { - if (!TryAddShardToMerge(splitSettings, forceShardSplitSettings, GetPartitions()[pi].ShardIdx, shardsToMerge, partOwners, totalSize, totalLoad, mainTableForIndex, mergeReason)) { + if (!TryAddShardToMerge(splitSettings, forceShardSplitSettings, GetPartitions()[pi].ShardIdx, shardsToMerge, partOwners, totalSize, totalLoad, cpuMergeThreshold, mainTableForIndex, shardMergeReason)) { break; } } @@ -2014,11 +2016,18 @@ bool TTableInfo::CheckCanMergePartitions(const TSplitSettings& splitSettings, Reverse(shardsToMerge.begin(), shardsToMerge.end()); for (ui64 pi = partitionIdx + 1; pi < GetPartitions().size(); ++pi) { - if (!TryAddShardToMerge(splitSettings, forceShardSplitSettings, GetPartitions()[pi].ShardIdx, shardsToMerge, partOwners, totalSize, totalLoad, mainTableForIndex, mergeReason)) { + if (!TryAddShardToMerge(splitSettings, forceShardSplitSettings, GetPartitions()[pi].ShardIdx, shardsToMerge, partOwners, totalSize, totalLoad, cpuMergeThreshold, mainTableForIndex, shardMergeReason)) { break; } } + reason += TStringBuilder() + << ", shardToMergeCount: " << shardsToMerge.size() + << ", totalSize: " << totalSize + << ", sizeToMerge: " << GetSizeToMerge(forceShardSplitSettings) + << ", totalLoad: " << totalLoad + << ", loadThreshold: " << cpuMergeThreshold; + return shardsToMerge.size() > 1; } @@ -2082,8 +2091,8 @@ bool TTableInfo::CheckSplitByLoad( reason = TStringBuilder() << "split by load (" << "rowCount: " << rowCount << ", " << "minRowCount: " << MIN_ROWS_FOR_SPLIT_BY_LOAD << ", " - << "dataSize: " << dataSize << ", " - << "minDataSize: " << MIN_SIZE_FOR_SPLIT_BY_LOAD << ", " + << "shardSize: " << dataSize << ", " + << "minShardSize: " << MIN_SIZE_FOR_SPLIT_BY_LOAD << ", " << "shardCount: " << Stats.PartitionStats.size() << ", " << "maxShardCount: " << maxShards << ", " << "cpuUsage: " << stats.GetCurrentRawCpuUsage() << ", " diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index 971c1aa048..c29ea5711e 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -685,11 +685,11 @@ public: const TForceShardSplitSettings& forceShardSplitSettings, TShardIdx shardIdx, TVector<TShardIdx>& shardsToMerge, THashSet<TTabletId>& partOwners, ui64& totalSize, float& totalLoad, - const TTableInfo* mainTableForIndex, TString& reason) const; + float cpuUsageThreshold, const TTableInfo* mainTableForIndex, TString& reason) const; bool CheckCanMergePartitions(const TSplitSettings& splitSettings, const TForceShardSplitSettings& forceShardSplitSettings, - TShardIdx shardIdx, TVector<TShardIdx>& shardsToMerge, + TShardIdx shardIdx, const TTabletId& tabletId, TVector<TShardIdx>& shardsToMerge, const TTableInfo* mainTableForIndex, TString& reason) const; bool CheckSplitByLoad( @@ -825,18 +825,18 @@ public: // When shard is over the maximum size we split even when over max partitions if (dataSize >= params.ForceShardSplitDataSize && !params.DisableForceShardSplit) { reason = TStringBuilder() << "force split by size (" - << "dataSize: " << dataSize << ", " - << "maxDataSize: " << params.ForceShardSplitDataSize << ")"; + << "shardSize: " << dataSize << ", " + << "maxShardSize: " << params.ForceShardSplitDataSize << ")"; return true; } // Otherwise we split when we may add one more partition if (Partitions.size() < GetMaxPartitionsCount() && dataSize >= GetShardSizeToSplit(params)) { reason = TStringBuilder() << "split by size (" - << "partitionCount: " << Partitions.size() << ", " - << "maxPartitionCount: " << GetMaxPartitionsCount() << ", " - << "dataSize: " << dataSize << ", " - << "maxDataSize: " << GetShardSizeToSplit(params) << ")"; + << "shardCount: " << Partitions.size() << ", " + << "maxShardCount: " << GetMaxPartitionsCount() << ", " + << "shardSize: " << dataSize << ", " + << "maxShardSize: " << GetShardSizeToSplit(params) << ")"; return true; } |