diff options
author | ijon <ijon@yandex-team.com> | 2022-12-19 14:28:33 +0300 |
---|---|---|
committer | ijon <ijon@yandex-team.com> | 2022-12-19 14:28:33 +0300 |
commit | beb9d5aca2fb16e6aea1f2fd6a2028de50c3799e (patch) | |
tree | 282f4cdab6917b1795661babc6385e0d9747d40d | |
parent | 2865b7518d18d8a476256c5e825ac65e5c3c4b73 (diff) | |
download | ydb-beb9d5aca2fb16e6aea1f2fd6a2028de50c3799e.tar.gz |
schemeshard, drop-extsubdomain: allow nodes to reconnect during operation
Allow nodes to reconnect to the database during drop-extsubdomain operation
by fixing the moment when database path is considered deleted.
Database root path is marked as _being deleted_ at the operation start,
but it was marked actually _deleted_ also at the start of the operation.
Which is too early because path that have the _deleted_ mark becomes
unresolvable to any external observer such as compute node that is trying
to reconnect to the database.
In the case of a dedicated database when its hive tablet has no other place to
run but on the dedicated database nodes, losing all database nodes would
prevent drop-extsubdomain operation from any chance to complete ever --
-- hive is used by operation to stop all database tablets, it has no
place to run if all database nodes had disconnected but no node is able
to reconnect again.
Databases that hosts serverless databases also suffer from this,
they are just much less likely to be removed.
10 files changed, 130 insertions, 47 deletions
diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_common.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_common.cpp index 446b70083e3..9a2ca79ce23 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_common.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_common.cpp @@ -615,15 +615,15 @@ void NForceDrop::CollectShards(const THashSet<TPathId>& pathes, TOperationId ope } void NForceDrop::ValidateNoTransactionOnPathes(TOperationId operationId, const THashSet<TPathId>& pathes, TOperationContext &context) { - // it is not supposed that someone transaction is able to materialise in dropping subdomain - // all transaction should check parent dir status - // however, it is better to check that all locks are ours + // No transaction should materialize in a subdomain that is being deleted -- + // -- all operations should be checking parent dir status at Propose stage. + // However, it is better to verify that, just in case. auto transactions = context.SS->GetRelatedTransactions(pathes, context.Ctx); for (auto otherTxId: transactions) { if (otherTxId == operationId.GetTxId()) { continue; } - Y_VERIFY_S(false, "transaction: " << otherTxId << " found on deleted subdomain"); + Y_VERIFY_S(false, "unexpected transaction: " << otherTxId << " found on the subdomain being deleted by transaction " << operationId.GetTxId()); } } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_drop_extsubdomain.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_drop_extsubdomain.cpp index c06f992d5f0..98e6de0fc2c 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_drop_extsubdomain.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_drop_extsubdomain.cpp @@ -66,9 +66,23 @@ public: IgnoreMessages(DebugHint(), toIgnore); } + void FinishState(TTxState* txState, TOperationContext& context) { + auto targetPath = context.SS->PathsById.at(txState->TargetPathId); + + NIceDb::TNiceDb db(context.GetDB()); + + // We are done with the extsubdomain's tablets, now its a good time + // to make extsubdomain root unresolvable to any external observer + context.SS->DropNode(targetPath, txState->PlanStep, OperationId.GetTxId(), db, context.Ctx); + + context.OnComplete.PublishToSchemeBoard(OperationId, targetPath->PathId); + + context.SS->ChangeTxState(db, OperationId, TTxState::DeletePrivateShards); + } + bool HandleReply(TEvHive::TEvDeleteOwnerTabletsReply::TPtr& ev, TOperationContext& context) override { TTabletId ssId = context.SS->SelfTabletId(); - NKikimrHive::TEvDeleteOwnerTabletsReply record = ev->Get()->Record; + NKikimrHive::TEvDeleteOwnerTabletsReply record = ev->Get()->Record; LOG_INFO_S(context.Ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, DebugHint() << " HandleReply TDeleteExternalShards" @@ -95,8 +109,7 @@ public: TTabletId hive = TTabletId(record.GetOrigin()); context.OnComplete.UnbindMsgFromPipe(OperationId, hive, TPipeMessageId(0, 0)); - NIceDb::TNiceDb db(context.GetDB()); - context.SS->ChangeTxState(db, OperationId, TTxState::DeletePrivateShards); + FinishState(txState, context); return true; } @@ -118,9 +131,10 @@ public: TTabletId tenantSchemeshard = domainInfo->GetTenantSchemeShardID(); - if (!tenantSchemeshard) { // ext_subdomain was't altered at all, and there has't been added tenantSchemeshard - NIceDb::TNiceDb db(context.GetDB()); - context.SS->ChangeTxState(db, OperationId, TTxState::DeletePrivateShards); + if (!tenantSchemeshard) { + // extsubdomain was't altered at all, there are no tenantSchemeshard, + // nothing to do + FinishState(txState, context); return true; } @@ -169,7 +183,19 @@ public: NIceDb::TNiceDb db(context.GetDB()); + txState->PlanStep = step; + context.SS->PersistTxPlanStep(db, OperationId, step); + + //NOTE: drop entire extsubdomain path tree except the extsubdomain root itself, + // root should stay alive (but marked for deletion) until operation is done. + // Or at least until we are done with deleting tablets via extsubdomain's hive. + // In a configuration with dedicated nodes extsubdomain's hive runs on + // extsubdomain's nodes and nodes can't reconnect to the extsubdomain + // if its root is not resolvable. And nodes could go away right in the middle of anything -- + // -- being able to reconnect node any time until extsubdomain is actually gone + // is a good thing. auto pathes = context.SS->ListSubTree(pathId, context.Ctx); + pathes.erase(pathId); context.SS->DropPathes(pathes, step, OperationId.GetTxId(), db, context.Ctx); auto parentDir = context.SS->PathsById.at(path->ParentPathId); @@ -197,12 +223,15 @@ public: Y_VERIFY(txState); Y_VERIFY(txState->TxType == TTxState::TxForceDropExtSubDomain); - auto pathes = context.SS->ListSubTree(txState->TargetPathId, context.Ctx); + auto targetPath = context.SS->PathsById.at(txState->TargetPathId); + + auto pathes = context.SS->ListSubTree(targetPath->PathId, context.Ctx); NForceDrop::ValidateNoTransactionOnPathes(OperationId, pathes, context); - context.SS->MarkAsDropping({txState->TargetPathId}, OperationId.GetTxId(), context.Ctx); NForceDrop::CollectShards(pathes, OperationId, txState, context); - context.OnComplete.ProposeToCoordinator(OperationId, txState->TargetPathId, TStepId(0)); + context.SS->MarkAsDropping(targetPath, OperationId.GetTxId(), context.Ctx); + + context.OnComplete.ProposeToCoordinator(OperationId, targetPath->PathId, TStepId(0)); return false; } }; @@ -328,7 +357,7 @@ public: } } - context.SS->MarkAsDropping({path.Base()->PathId}, OperationId.GetTxId(), context.Ctx); + context.SS->MarkAsDropping(path.Base(), OperationId.GetTxId(), context.Ctx); txState.State = TTxState::Propose; context.OnComplete.ActivateTx(OperationId); diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.cpp b/ydb/core/tx/schemeshard/schemeshard_impl.cpp index c75ca3ecc14..614612a387d 100644 --- a/ydb/core/tx/schemeshard/schemeshard_impl.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_impl.cpp @@ -4187,7 +4187,7 @@ void TSchemeShard::StateWork(STFUNC_SIG) { HFuncTraced(TEvSchemeShard::TEvSyncTenantSchemeShard, Handle); HFuncTraced(TEvSchemeShard::TEvProcessingRequest, Handle); - + HFuncTraced(TEvSchemeShard::TEvUpdateTenantSchemeShard, Handle); HFuncTraced(TSchemeBoardEvents::TEvUpdateAck, Handle); @@ -4473,16 +4473,6 @@ THashSet<TShardIdx> TSchemeShard::CollectAllShards(const THashSet<TPathId> &path return shards; } -void TSchemeShard::MarkAsDroping(TPathElement::TPtr node, TTxId txId, const TActorContext &ctx) { - LOG_WARN_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, - "Mark as Dropping path id " << node->PathId << - " by tx: " << txId); - if (!node->Dropped()) { - node->PathState = TPathElement::EPathState::EPathStateDrop; - node->DropTxId = txId; - } -} - void TSchemeShard::UncountNode(TPathElement::TPtr node) { const auto isBackupTable = IsBackupTable(node->PathId); @@ -4550,12 +4540,6 @@ void TSchemeShard::UncountNode(TPathElement::TPtr node) { } } -void TSchemeShard::MarkAsDropping(const THashSet<TPathId> &pathes, TTxId txId, const TActorContext &ctx) { - for (auto id: pathes) { - MarkAsDroping(PathsById.at(id), txId, ctx); - } -} - void TSchemeShard::MarkAsMigrated(TPathElement::TPtr node, const TActorContext &ctx) { LOG_WARN_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, "Mark as Migrated path id " << node->PathId); @@ -4576,6 +4560,21 @@ void TSchemeShard::MarkAsMigrated(TPathElement::TPtr node, const TActorContext & UncountNode(node); } +void TSchemeShard::MarkAsDropping(TPathElement::TPtr node, TTxId txId, const TActorContext &ctx) { + LOG_WARN_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, + "Mark as Dropping path id " << node->PathId << + " by tx: " << txId); + if (!node->Dropped()) { + node->PathState = TPathElement::EPathState::EPathStateDrop; + node->DropTxId = txId; + } +} + +void TSchemeShard::MarkAsDropping(const THashSet<TPathId> &pathes, TTxId txId, const TActorContext &ctx) { + for (auto id: pathes) { + MarkAsDropping(PathsById.at(id), txId, ctx); + } +} void TSchemeShard::DropNode(TPathElement::TPtr node, TStepId step, TTxId txId, NIceDb::TNiceDb &db, const TActorContext &ctx) { Y_VERIFY_S(node->PathState == TPathElement::EPathState::EPathStateDrop diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.h b/ydb/core/tx/schemeshard/schemeshard_impl.h index 550e7159eac..54953898eeb 100644 --- a/ydb/core/tx/schemeshard/schemeshard_impl.h +++ b/ydb/core/tx/schemeshard/schemeshard_impl.h @@ -533,7 +533,7 @@ public: THashSet<TPathId> ListSubTree(TPathId subdomain_root, const TActorContext& ctx); THashSet<TTxId> GetRelatedTransactions(const THashSet<TPathId>& pathes, const TActorContext &ctx); - void MarkAsDroping(TPathElement::TPtr node, TTxId txId, const TActorContext& ctx); + void MarkAsDropping(TPathElement::TPtr node, TTxId txId, const TActorContext& ctx); void MarkAsDropping(const THashSet<TPathId>& pathes, TTxId txId, const TActorContext& ctx); void UncountNode(TPathElement::TPtr node); @@ -960,7 +960,7 @@ public: void Handle(TEvDataShard::TEvCompactTableResult::TPtr &ev, const TActorContext &ctx); void Handle(TEvDataShard::TEvCompactBorrowedResult::TPtr &ev, const TActorContext &ctx); - + void Handle(TEvSchemeShard::TEvProcessingRequest::TPtr& ev, const TActorContext& ctx); void Handle(TEvSchemeShard::TEvSyncTenantSchemeShard::TPtr& ev, const TActorContext& ctx); void Handle(TEvSchemeShard::TEvUpdateTenantSchemeShard::TPtr& ev, const TActorContext& ctx); diff --git a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp index 4b3be0d21de..7397b6f013d 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp @@ -782,9 +782,9 @@ THolder<TEvSchemeShard::TEvDescribeSchemeResultBuilder> TPathDescriber::Describe checks .NotDeleted(); - if (checks && !path.Base()->IsTable() && !path.Base()->IsTableIndex() && !path.Base()->IsDirectory()) { + if (checks && !path.Base()->IsTable() && !path.Base()->IsTableIndex() && !path.Base()->IsDirectory() && !path.Base()->IsDomainRoot()) { // KIKIMR-13173 - // PQ BSV drop their shard before PlatStep + // PQ BSV drop their shard before PlanStep // If they are being deleted consider them as deleted checks.NotUnderDeleting(NKikimrScheme::StatusPathDoesNotExist); } diff --git a/ydb/core/tx/schemeshard/ut_extsubdomain_reboots.cpp b/ydb/core/tx/schemeshard/ut_extsubdomain_reboots.cpp index c579cef26f1..e780c7d7145 100644 --- a/ydb/core/tx/schemeshard/ut_extsubdomain_reboots.cpp +++ b/ydb/core/tx/schemeshard/ut_extsubdomain_reboots.cpp @@ -191,7 +191,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardTestExtSubdomainReboots) { TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), {NLs::PathNotExist}); TestDescribeResult(DescribePath(runtime, "/MyRoot"), - {NLs::ChildrenCount(1)}); + {NLs::ChildrenCount(1, NKikimrSchemeOp::EPathState::EPathStateNoChanges)}); UNIT_ASSERT(!CheckLocalRowExists(runtime, TTestTxConfig::SchemeShard, "SubDomains", "PathId", 3)); UNIT_ASSERT(!CheckLocalRowExists(runtime, TTestTxConfig::SchemeShard, "Paths", "Id", 3)); } @@ -235,7 +235,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardTestExtSubdomainReboots) { TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), {NLs::PathNotExist}); TestDescribeResult(DescribePath(runtime, "/MyRoot"), - {NLs::ChildrenCount(1)}); + {NLs::ChildrenCount(1, NKikimrSchemeOp::EPathState::EPathStateNoChanges)}); t.TestEnv->TestWaitShardDeletion(runtime, {1, 2, 3, 4, 5, 6}); UNIT_ASSERT(!CheckLocalRowExists(runtime, TTestTxConfig::SchemeShard, "SubDomains", "PathId", 3)); UNIT_ASSERT(!CheckLocalRowExists(runtime, TTestTxConfig::SchemeShard, "Paths", "Id", 3)); diff --git a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp index de8fd06c4bc..2b00a872c9e 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp @@ -674,6 +674,19 @@ TCheckFunc ChildrenCount(ui32 count) { }; } +TCheckFunc ChildrenCount(ui32 count, NKikimrSchemeOp::EPathState pathState) { + return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) { + auto actual = std::count_if( + record.GetPathDescription().GetChildren().begin(), + record.GetPathDescription().GetChildren().end(), + [pathState] (auto item) { + return item.GetPathState() == pathState; + } + ); + UNIT_ASSERT_VALUES_EQUAL(actual, count); + }; +} + TCheckFunc IndexesCount(ui32 count) { return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) { UNIT_ASSERT_VALUES_EQUAL(record.GetPathDescription().GetTable().TableIndexesSize(), count); diff --git a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h index c183a8042ec..a4ca194b3a8 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h +++ b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h @@ -74,6 +74,7 @@ namespace NLs { TCheckFunc PathStringEqual(const TString& path); void NoChildren(const NKikimrScheme::TEvDescribeSchemeResult& record); TCheckFunc ChildrenCount(ui32 count); + TCheckFunc ChildrenCount(ui32 count, NKikimrSchemeOp::EPathState pathState); void PathNotExist(const NKikimrScheme::TEvDescribeSchemeResult& record); void PathExist(const NKikimrScheme::TEvDescribeSchemeResult& record); void PathRedirected(const NKikimrScheme::TEvDescribeSchemeResult& record); diff --git a/ydb/tests/functional/tenants/test_dynamic_tenants.py b/ydb/tests/functional/tenants/test_dynamic_tenants.py index b9d87618a54..cf5827ae393 100644 --- a/ydb/tests/functional/tenants/test_dynamic_tenants.py +++ b/ydb/tests/functional/tenants/test_dynamic_tenants.py @@ -84,6 +84,33 @@ def test_create_tenant_with_cpu(ydb_cluster): ydb_cluster.unregister_and_stop_slots(database_nodes) +def test_drop_tenant_without_nodes_could_continue(ydb_cluster): + database = '/Root/users/database' + ydb_cluster.create_database( + database, + storage_pool_units_count={ + 'hdd': 1 + } + ) + database_nodes = ydb_cluster.register_and_start_slots(database, count=1) + ydb_cluster.wait_tenant_up(database) + time.sleep(1) + + logger.debug("stop database nodes") + ydb_cluster.unregister_and_stop_slots(database_nodes) + + logger.debug("remove database") + operation_id = ydb_cluster._remove_database_send_op(database) + + logger.debug("restart database nodes") + database_nodes = ydb_cluster.register_and_start_slots(database, count=1) + + ydb_cluster._remove_database_wait_op(database, operation_id) + ydb_cluster._remove_database_wait_tenant_gone(database) + + ydb_cluster.unregister_and_stop_slots(database_nodes) + + def test_create_tenant_then_exec_yql_empty_database_header(ydb_cluster, ydb_endpoint): database = '/Root/users/database' @@ -180,7 +207,7 @@ def test_create_tenant_then_exec_yql(ydb_cluster): ydb_cluster.unregister_and_stop_slots(database_nodes) -def test_test_create_and_drop_tenants(ydb_cluster, robust_retries): +def test_create_and_drop_tenants(ydb_cluster, robust_retries): for iNo in range(10): database = '/Root/users/database_%d' % iNo diff --git a/ydb/tests/library/harness/kikimr_cluster_interface.py b/ydb/tests/library/harness/kikimr_cluster_interface.py index 7717281d3ec..1caf513af9a 100644 --- a/ydb/tests/library/harness/kikimr_cluster_interface.py +++ b/ydb/tests/library/harness/kikimr_cluster_interface.py @@ -286,27 +286,41 @@ class KiKiMRClusterInterface(object): ): logger.debug(database_name) - req = RemoveTenantRequest(database_name) + operation_id = self._remove_database_send_op(database_name) + self._remove_database_wait_op(database_name, operation_id, timeout_seconds=timeout_seconds) + self._remove_database_wait_tenant_gone(database_name, timeout_seconds=timeout_seconds) + + return database_name + + def _remove_database_send_op(self, database_name): + logger.debug('%s: send console operation', database_name) + req = RemoveTenantRequest(database_name) response = self.client.send_request(req.protobuf, method='ConsoleRequest') operation = response.RemoveTenantResponse.Response.operation - logger.debug('response from console: %s', response) + logger.debug('%s: response from console: %s', database_name, response) + if not operation.ready and response.Status.Code != StatusIds.STATUS_CODE_UNSPECIFIED: raise RuntimeError('remove_database failed: %s: %s' % (response.Status.Code, response.Status.Reason)) - if not operation.ready: - logger.debug('waiting for operation done') - operation = self.__wait_console_op(operation.id, timeout_seconds=timeout_seconds) - logger.debug('operation done') + + return operation.id + + def _remove_database_wait_op(self, database_name, operation_id, timeout_seconds=20): + logger.debug('%s: wait console operation done', database_name) + operation = self.__wait_console_op(operation_id, timeout_seconds=timeout_seconds) + logger.debug('%s: console operation done', database_name) + if operation.status not in (StatusIds.SUCCESS, StatusIds.NOT_FOUND): raise RuntimeError('remove_database failed: %s' % (operation.status,)) + def _remove_database_wait_tenant_gone(self, database_name, timeout_seconds=20): + logger.debug('%s: wait tenant gone', database_name) + def predicate(): response = self.client.send_request( GetTenantStatusRequest(database_name).protobuf, method='ConsoleRequest').GetTenantStatusResponse return response.Response.operation.status == StatusIds.NOT_FOUND - logger.debug('waiting tenant gone') - tenant_not_found = wait_for( predicate=predicate, timeout_seconds=timeout_seconds, @@ -314,7 +328,7 @@ class KiKiMRClusterInterface(object): ) assert tenant_not_found - logger.debug('tenant gone') + logger.debug('%s: tenant gone', database_name) return database_name |