diff options
author | Nikita Vasilev <ns-vasilev@ydb.tech> | 2025-04-25 16:57:12 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-04-25 16:57:12 +0300 |
commit | 1befd32be377b13e80c40527163e9b9d0d47f973 (patch) | |
tree | 443e10f4ae265d2ab73078869521481541b9377a | |
parent | 48459a580b146b6ef7ceaff1a1360b17862d9ce8 (diff) | |
download | ydb-1befd32be377b13e80c40527163e9b9d0d47f973.tar.gz |
Fix UNAVAILABLE while having delivery problem from finished shard (#17723)
-rw-r--r-- | ydb/core/kqp/runtime/kqp_write_actor.cpp | 49 |
1 files changed, 26 insertions, 23 deletions
diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 00e93fb7aa8..2b23a2aab57 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -1033,10 +1033,11 @@ public: void Handle(TEvPipeCache::TEvDeliveryProblem::TPtr& ev) { CA_LOG_W("TEvDeliveryProblem was received from tablet: " << ev->Get()->TabletId); + const auto state = TxManager->GetState(ev->Get()->TabletId); if (InconsistentTx) { RetryShard(ev->Get()->TabletId, std::nullopt); - } else if ((TxManager->GetState(ev->Get()->TabletId) == IKqpTransactionManager::PREPARED - || TxManager->GetState(ev->Get()->TabletId) == IKqpTransactionManager::EXECUTING) + } else if ((state == IKqpTransactionManager::PREPARED + || state == IKqpTransactionManager::EXECUTING) && TxManager->ShouldReattach(ev->Get()->TabletId, TlsActivationContext->Now())) { // Disconnected while waiting for other shards to prepare auto& reattachState = TxManager->GetReattachState(ev->Get()->TabletId); @@ -1044,28 +1045,30 @@ public: << reattachState.ReattachInfo.Delay << ")"); Schedule(reattachState.ReattachInfo.Delay, new TEvPrivate::TEvReattachToShard(ev->Get()->TabletId)); + } else if (state == IKqpTransactionManager::EXECUTING) { + TxManager->SetError(ev->Get()->TabletId); + RuntimeError( + NYql::NDqProto::StatusIds::UNDETERMINED, + NYql::TIssuesIds::KIKIMR_OPERATION_STATE_UNKNOWN, + TStringBuilder() + << "State of operation is unknown. " + << "Error writing to table `" << TablePath << "`" + << ". Transaction state unknown for tablet " << ev->Get()->TabletId << "."); + return; + } else if (state == IKqpTransactionManager::PROCESSING + || state == IKqpTransactionManager::PREPARING + || state == IKqpTransactionManager::PREPARED) { + TxManager->SetError(ev->Get()->TabletId); + RuntimeError( + NYql::NDqProto::StatusIds::UNAVAILABLE, + NYql::TIssuesIds::KIKIMR_TEMPORARILY_UNAVAILABLE, + TStringBuilder() + << "Kikimr cluster or one of its subsystems was unavailable. " + << "Error writing to table `" << TablePath << "`" + << ": can't deliver message to tablet " << ev->Get()->TabletId << "."); + return; } else { - if (TxManager->GetState(ev->Get()->TabletId) == IKqpTransactionManager::EXECUTING) { - TxManager->SetError(ev->Get()->TabletId); - RuntimeError( - NYql::NDqProto::StatusIds::UNDETERMINED, - NYql::TIssuesIds::KIKIMR_OPERATION_STATE_UNKNOWN, - TStringBuilder() - << "State of operation is unknown. " - << "Error writing to table `" << TablePath << "`" - << ". Transaction state unknown for tablet " << ev->Get()->TabletId << "."); - return; - } else { - TxManager->SetError(ev->Get()->TabletId); - RuntimeError( - NYql::NDqProto::StatusIds::UNAVAILABLE, - NYql::TIssuesIds::KIKIMR_TEMPORARILY_UNAVAILABLE, - TStringBuilder() - << "Kikimr cluster or one of its subsystems was unavailable. " - << "Error writing to table `" << TablePath << "`" - << ": can't deliver message to tablet " << ev->Get()->TabletId << "."); - return; - } + AFL_ENSURE(state == IKqpTransactionManager::FINISHED || state == IKqpTransactionManager::ERROR); } } |