diff options
author | Ilnaz Nizametdinov <i.nizametdinov@gmail.com> | 2022-04-19 15:43:24 +0300 |
---|---|---|
committer | Ilnaz Nizametdinov <i.nizametdinov@gmail.com> | 2022-04-19 15:43:24 +0300 |
commit | 3f1e64f30ae0cfb98ea444db17be12bcf351ad51 (patch) | |
tree | 7416bb754f7a14ad794806a19a37e81489216ef3 | |
parent | bc9f7f593c157d11a53c69c7f544ea040693f643 (diff) | |
download | ydb-3f1e64f30ae0cfb98ea444db17be12bcf351ad51.tar.gz |
Reworked TTL's metrics KIKIMR-13355
ref:abd8e4b7122fd4047664b72b18301d6524cefa24
9 files changed, 198 insertions, 33 deletions
diff --git a/ydb/core/tx/schemeshard/schemeshard__conditional_erase.cpp b/ydb/core/tx/schemeshard/schemeshard__conditional_erase.cpp index d804955b69b..f4f5db4ba75 100644 --- a/ydb/core/tx/schemeshard/schemeshard__conditional_erase.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__conditional_erase.cpp @@ -416,8 +416,11 @@ struct TSchemeShard::TTxScheduleConditionalErase : public TTransactionBase<TSche const auto& tableShardInfo = partitions.at(partitionIdx); const auto& lag = tableShardInfo.LastCondEraseLag; + if (lag) { Self->TabletCounters->Percentile()[COUNTER_NUM_SHARDS_BY_TTL_LAG].DecrementFor(lag->Seconds()); + } else { + Y_VERIFY_DEBUG(false); } const auto now = ctx.Now(); diff --git a/ydb/core/tx/schemeshard/schemeshard__init.cpp b/ydb/core/tx/schemeshard/schemeshard__init.cpp index 2cc91b5366d..af7b3537784 100644 --- a/ydb/core/tx/schemeshard/schemeshard__init.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__init.cpp @@ -2036,7 +2036,7 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> { partitions[id] = TTableShardInfo(datashardIdx, rangeEnd, lastCondErase, nextCondErase); - if (Self->TTLEnabledTables.contains(tableId) && partitions[id].LastCondErase) { + if (Self->TTLEnabledTables.contains(tableId)) { auto& lag = partitions[id].LastCondEraseLag; if (now >= partitions[id].LastCondErase) { lag = now - partitions[id].LastCondErase; diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_table.cpp index 849a111d91c..1774dad9d20 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_table.cpp @@ -343,15 +343,25 @@ public: if (table->IsTTLEnabled() && ttlIt == context.SS->TTLEnabledTables.end()) { context.SS->TTLEnabledTables[pathId] = table; context.SS->TabletCounters->Simple()[COUNTER_TTL_ENABLED_TABLE_COUNT].Add(1); + + const auto now = context.Ctx.Now(); + for (auto& shard : table->GetPartitions()) { + auto& lag = shard.LastCondEraseLag; + Y_VERIFY_DEBUG(!lag.Defined()); + + lag = now - shard.LastCondErase; + context.SS->TabletCounters->Percentile()[COUNTER_NUM_SHARDS_BY_TTL_LAG].IncrementFor(lag->Seconds()); + } } else if (!table->IsTTLEnabled() && ttlIt != context.SS->TTLEnabledTables.end()) { context.SS->TTLEnabledTables.erase(ttlIt); context.SS->TabletCounters->Simple()[COUNTER_TTL_ENABLED_TABLE_COUNT].Sub(1); - for (ui32 i = 0; i < table->GetPartitions().size(); ++i) { - auto& shardInfo = table->GetPartitions().at(i); - if (auto& lag = shardInfo.LastCondEraseLag) { + for (auto& shard : table->GetPartitions()) { + if (auto& lag = shard.LastCondEraseLag) { context.SS->TabletCounters->Percentile()[COUNTER_NUM_SHARDS_BY_TTL_LAG].DecrementFor(lag->Seconds()); lag.Clear(); + } else { + Y_VERIFY_DEBUG(false); } } } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp index f65f7c6f2c6..38a15b1e315 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp @@ -193,9 +193,18 @@ public: context.SS->TabletCounters->Simple()[COUNTER_TABLE_COUNT].Add(1); - if (table->IsTTLEnabled()) { + if (table->IsTTLEnabled() && !context.SS->TTLEnabledTables.contains(pathId)) { context.SS->TTLEnabledTables[pathId] = table; context.SS->TabletCounters->Simple()[COUNTER_TTL_ENABLED_TABLE_COUNT].Add(1); + + const auto now = context.Ctx.Now(); + for (auto& shard : table->GetPartitions()) { + auto& lag = shard.LastCondEraseLag; + Y_VERIFY_DEBUG(!lag.Defined()); + + lag = now - shard.LastCondErase; + context.SS->TabletCounters->Percentile()[COUNTER_NUM_SHARDS_BY_TTL_LAG].IncrementFor(lag->Seconds()); + } } auto parentDir = context.SS->PathsById.at(path->ParentPathId); // TargetPathId has been created diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_table.cpp index 50045225298..5d3a2361942 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_table.cpp @@ -273,9 +273,18 @@ public: Y_VERIFY(table); table->AlterVersion = NEW_TABLE_ALTER_VERSION; - if (table->IsTTLEnabled()) { + if (table->IsTTLEnabled() && !context.SS->TTLEnabledTables.contains(pathId)) { context.SS->TTLEnabledTables[pathId] = table; context.SS->TabletCounters->Simple()[COUNTER_TTL_ENABLED_TABLE_COUNT].Add(1); + + const auto now = context.Ctx.Now(); + for (auto& shard : table->GetPartitions()) { + auto& lag = shard.LastCondEraseLag; + Y_VERIFY_DEBUG(!lag.Defined()); + + lag = now - shard.LastCondErase; + context.SS->TabletCounters->Percentile()[COUNTER_NUM_SHARDS_BY_TTL_LAG].IncrementFor(lag->Seconds()); + } } context.SS->PersistTableCreated(db, pathId); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_split_merge.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_split_merge.cpp index 645d7f37e19..92f91a09182 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_split_merge.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_split_merge.cpp @@ -248,11 +248,20 @@ public: if (txShard.Operation == TTxState::TransferData) allSrcShardIdxs.insert(txShard.Idx); } + bool dstAdded = false; + const auto now = context.Ctx.Now(); for (const auto& shard : tableInfo->GetPartitions()) { if (allSrcShardIdxs.contains(shard.ShardIdx)) { - if (dstAdded) + if (auto& lag = shard.LastCondEraseLag) { + context.SS->TabletCounters->Percentile()[COUNTER_NUM_SHARDS_BY_TTL_LAG].DecrementFor(lag->Seconds()); + lag.Clear(); + } + + if (dstAdded) { continue; + } + for (const auto& txShard : txState->Shards) { if (txShard.Operation != TTxState::CreateParts) continue; @@ -260,8 +269,18 @@ public: // TODO: make sure dst are sorted by range end Y_VERIFY(context.SS->ShardInfos.contains(txShard.Idx)); TTableShardInfo dst(txShard.Idx, txShard.RangeEnd); + + if (tableInfo->IsTTLEnabled()) { + auto& lag = dst.LastCondEraseLag; + Y_VERIFY_DEBUG(!lag.Defined()); + + lag = now - dst.LastCondErase; + context.SS->TabletCounters->Percentile()[COUNTER_NUM_SHARDS_BY_TTL_LAG].IncrementFor(lag->Seconds()); + } + newPartitioning.push_back(dst); } + dstAdded = true; } else { newPartitioning.push_back(shard); diff --git a/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp b/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp index 4ade8158a3a..21007ff77da 100644 --- a/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp @@ -239,18 +239,18 @@ bool TTxStorePartitionStats::Execute(TTransactionContext& txc, const TActorConte if (lag) { Self->TabletCounters->Percentile()[COUNTER_NUM_SHARDS_BY_TTL_LAG].DecrementFor(lag->Seconds()); + } else { + Y_VERIFY_DEBUG(false); } - if (shardInfo.LastCondErase) { - const auto now = ctx.Now(); - if (now >= shardInfo.LastCondErase) { - lag = now - shardInfo.LastCondErase; - } else { - lag = TDuration::Zero(); - } - - Self->TabletCounters->Percentile()[COUNTER_NUM_SHARDS_BY_TTL_LAG].IncrementFor(lag->Seconds()); + const auto now = ctx.Now(); + if (now >= shardInfo.LastCondErase) { + lag = now - shardInfo.LastCondErase; + } else { + lag = TDuration::Zero(); } + + Self->TabletCounters->Percentile()[COUNTER_NUM_SHARDS_BY_TTL_LAG].IncrementFor(lag->Seconds()); } TVector<TShardIdx> shardsToMerge; diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.cpp b/ydb/core/tx/schemeshard/schemeshard_impl.cpp index 8a4e9a37b16..d89b7b94cff 100644 --- a/ydb/core/tx/schemeshard/schemeshard_impl.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_impl.cpp @@ -3213,8 +3213,9 @@ void TSchemeShard::PersistRemoveTable(NIceDb::TNiceDb& db, TPathId pathId, const db.Table<Schema::TablePartitionStats>().Key(pathId.OwnerId, pathId.LocalPathId, pNo).Delete(); const auto& shardInfo = tableInfo->GetPartitions().at(pNo); - if (const auto& lag = shardInfo.LastCondEraseLag) { + if (auto& lag = shardInfo.LastCondEraseLag) { TabletCounters->Percentile()[COUNTER_NUM_SHARDS_BY_TTL_LAG].DecrementFor(lag->Seconds()); + lag.Clear(); } } diff --git a/ydb/core/tx/schemeshard/ut_ttl.cpp b/ydb/core/tx/schemeshard/ut_ttl.cpp index ce7e9c9cbfd..cafa7bfcb32 100644 --- a/ydb/core/tx/schemeshard/ut_ttl.cpp +++ b/ydb/core/tx/schemeshard/ut_ttl.cpp @@ -849,6 +849,10 @@ Y_UNIT_TEST_SUITE(TSchemeShardTTLTests) { return 0; // unreachable } + void CheckSimpleCounter(TTestBasicRuntime& runtime, const TString& name, ui64 value) { + UNIT_ASSERT_VALUES_EQUAL(value, GetSimpleCounter(runtime, name)); + } + ui64 GetPercentileCounter(TTestBasicRuntime& runtime, const TString& name, const TString& range) { const auto counters = GetCounters(runtime); for (const auto& counter : counters.GetTabletCounters().GetAppCounters().GetPercentileCounters()) { @@ -872,6 +876,16 @@ Y_UNIT_TEST_SUITE(TSchemeShardTTLTests) { return 0; // unreachable } + void CheckPercentileCounter(TTestBasicRuntime& runtime, const TString& name, const THashMap<TString, ui64>& rangeValues) { + for (const auto& [range, value] : rangeValues) { + const auto v = GetPercentileCounter(runtime, name, range); + UNIT_ASSERT_VALUES_EQUAL_C(v, value, "Unexpected value in range" + << ": range# " << range + << ", expected# " << value + << ", got# " << v); + } + } + void WaitForStats(TTestActorRuntimeBase& runtime, ui32 count) { TDispatchOptions opts; opts.FinalEvents.emplace_back(TEvDataShard::EvPeriodicTableStats, count); @@ -883,6 +897,10 @@ Y_UNIT_TEST_SUITE(TSchemeShardTTLTests) { TTestEnv env(runtime); ui64 txId = 100; + runtime.UpdateCurrentTime(TInstant::Now()); + CheckSimpleCounter(runtime, "SchemeShard/TTLEnabledTables", 0); + CheckPercentileCounter(runtime, "SchemeShard/NumShardsByTtlLag", {{"0", 0}, {"900", 0}, {"inf", 0}}); + // create TestCreateTable(runtime, ++txId, "/MyRoot", R"( Name: "TTLEnabledTable" @@ -896,31 +914,36 @@ Y_UNIT_TEST_SUITE(TSchemeShardTTLTests) { } )"); env.TestWaitNotification(runtime, txId); - UNIT_ASSERT_VALUES_EQUAL(1, GetSimpleCounter(runtime, "SchemeShard/TTLEnabledTables")); - // check lag + // just after create + CheckSimpleCounter(runtime, "SchemeShard/TTLEnabledTables", 1); + CheckPercentileCounter(runtime, "SchemeShard/NumShardsByTtlLag", {{"0", 0}, {"900", 0}, {"inf", 1}}); + + // after erase WaitForCondErase(runtime); WaitForStats(runtime, 1); - UNIT_ASSERT_VALUES_EQUAL(1, GetPercentileCounter(runtime, "SchemeShard/NumShardsByTtlLag", "0")); + CheckPercentileCounter(runtime, "SchemeShard/NumShardsByTtlLag", {{"0", 1}, {"900", 0}, {"inf", 0}}); - // check lag + // after a little more time runtime.AdvanceCurrentTime(TDuration::Minutes(20)); WaitForStats(runtime, 1); - UNIT_ASSERT_VALUES_EQUAL(0, GetPercentileCounter(runtime, "SchemeShard/NumShardsByTtlLag", "0")); - UNIT_ASSERT_VALUES_EQUAL(1, GetPercentileCounter(runtime, "SchemeShard/NumShardsByTtlLag", "900")); + CheckPercentileCounter(runtime, "SchemeShard/NumShardsByTtlLag", {{"0", 0}, {"900", 1}, {"inf", 0}}); - // check copy table + // copy table TestCopyTable(runtime, ++txId, "/MyRoot", "TTLEnabledTableCopy", "/MyRoot/TTLEnabledTable"); env.TestWaitNotification(runtime, txId); - UNIT_ASSERT_VALUES_EQUAL(2, GetSimpleCounter(runtime, "SchemeShard/TTLEnabledTables")); - // check lag + // just after copy + CheckSimpleCounter(runtime, "SchemeShard/TTLEnabledTables", 2); + CheckPercentileCounter(runtime, "SchemeShard/NumShardsByTtlLag", {{"0", 0}, {"900", 2}, {"inf", 0}}); + + // after erase runtime.AdvanceCurrentTime(TDuration::Hours(1)); WaitForCondErase(runtime); WaitForStats(runtime, 2); - UNIT_ASSERT_VALUES_EQUAL(2, GetPercentileCounter(runtime, "SchemeShard/NumShardsByTtlLag", "0")); + CheckPercentileCounter(runtime, "SchemeShard/NumShardsByTtlLag", {{"0", 2}, {"900", 0}, {"inf", 0}}); - // check alter (disable ttl) + // alter (disable ttl) TestAlterTable(runtime, ++txId, "/MyRoot", R"( Name: "TTLEnabledTable" TTLSettings { @@ -929,14 +952,20 @@ Y_UNIT_TEST_SUITE(TSchemeShardTTLTests) { } )"); env.TestWaitNotification(runtime, txId); - UNIT_ASSERT_VALUES_EQUAL(1, GetSimpleCounter(runtime, "SchemeShard/TTLEnabledTables")); - // check drop + // just after alter + CheckSimpleCounter(runtime, "SchemeShard/TTLEnabledTables", 1); + CheckPercentileCounter(runtime, "SchemeShard/NumShardsByTtlLag", {{"0", 1}, {"900", 0}, {"inf", 0}}); + + // drop TestDropTable(runtime, ++txId, "/MyRoot", "TTLEnabledTableCopy"); env.TestWaitNotification(runtime, txId); - UNIT_ASSERT_VALUES_EQUAL(0, GetSimpleCounter(runtime, "SchemeShard/TTLEnabledTables")); - // check alter (enable ttl) + // just after drop + CheckSimpleCounter(runtime, "SchemeShard/TTLEnabledTables", 0); + CheckPercentileCounter(runtime, "SchemeShard/NumShardsByTtlLag", {{"0", 0}, {"900", 0}, {"inf", 0}}); + + // alter (enable ttl) TestAlterTable(runtime, ++txId, "/MyRoot", R"( Name: "TTLEnabledTable" TTLSettings { @@ -946,7 +975,32 @@ Y_UNIT_TEST_SUITE(TSchemeShardTTLTests) { } )"); env.TestWaitNotification(runtime, txId); - UNIT_ASSERT_VALUES_EQUAL(1, GetSimpleCounter(runtime, "SchemeShard/TTLEnabledTables")); + + // just after alter + CheckSimpleCounter(runtime, "SchemeShard/TTLEnabledTables", 1); + CheckPercentileCounter(runtime, "SchemeShard/NumShardsByTtlLag", {{"0", 1}, {"900", 0}, {"inf", 0}}); + + // after a little more time + runtime.AdvanceCurrentTime(TDuration::Minutes(20)); + WaitForStats(runtime, 1); + CheckPercentileCounter(runtime, "SchemeShard/NumShardsByTtlLag", {{"0", 0}, {"900", 1}, {"inf", 0}}); + + // split + TestSplitTable(runtime, ++txId, "/MyRoot/TTLEnabledTable", Sprintf(R"( + SourceTabletId: %lu + SplitBoundary { + KeyPrefix { + Tuple { Optional { Uint64: 100 } } + } + } + )", TTestTxConfig::FakeHiveTablets)); + env.TestWaitNotification(runtime, txId); + + // after erase + runtime.AdvanceCurrentTime(TDuration::Hours(1)); + WaitForCondErase(runtime); + WaitForStats(runtime, 2); + CheckPercentileCounter(runtime, "SchemeShard/NumShardsByTtlLag", {{"0", 2}, {"900", 0}, {"inf", 0}}); } } @@ -1006,4 +1060,64 @@ Y_UNIT_TEST_SUITE(TSchemeShardTTLTestsWithReboots) { } }); } + + Y_UNIT_TEST(CopyTable) { + TTestWithReboots t; + t.Run([&](TTestActorRuntime& runtime, bool& activeZone) { + { + TInactiveZone inactive(activeZone); + TestCreateTable(runtime, ++t.TxId, "/MyRoot", R"( + Name: "TTLEnabledTable" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "modified_at" Type: "Timestamp" } + KeyColumnNames: ["key"] + TTLSettings { + Enabled { + ColumnName: "modified_at" + ExpireAfterSeconds: 3600 + } + } + )"); + t.TestEnv->TestWaitNotification(runtime, t.TxId); + } + + TestCopyTable(runtime, ++t.TxId, "/MyRoot", "TTLEnabledTableCopy", "/MyRoot/TTLEnabledTable"); + t.TestEnv->TestWaitNotification(runtime, t.TxId); + + { + TInactiveZone inactive(activeZone); + CheckTTLSettings(runtime, "TTLEnabledTableCopy"); + } + }); + } + + Y_UNIT_TEST(MoveTable) { + TTestWithReboots t; + t.Run([&](TTestActorRuntime& runtime, bool& activeZone) { + { + TInactiveZone inactive(activeZone); + TestCreateTable(runtime, ++t.TxId, "/MyRoot", R"( + Name: "TTLEnabledTable" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "modified_at" Type: "Timestamp" } + KeyColumnNames: ["key"] + TTLSettings { + Enabled { + ColumnName: "modified_at" + ExpireAfterSeconds: 3600 + } + } + )"); + t.TestEnv->TestWaitNotification(runtime, t.TxId); + } + + TestMoveTable(runtime, ++t.TxId, "/MyRoot/TTLEnabledTable", "/MyRoot/TTLEnabledTableMoved"); + t.TestEnv->TestWaitNotification(runtime, t.TxId); + + { + TInactiveZone inactive(activeZone); + CheckTTLSettings(runtime, "TTLEnabledTableMoved"); + } + }); + } } |