aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNikita Vasilev <ns-vasilev@ydb.tech>2025-04-25 16:57:12 +0300
committerGitHub <noreply@github.com>2025-04-25 16:57:12 +0300
commit1befd32be377b13e80c40527163e9b9d0d47f973 (patch)
tree443e10f4ae265d2ab73078869521481541b9377a
parent48459a580b146b6ef7ceaff1a1360b17862d9ce8 (diff)
downloadydb-1befd32be377b13e80c40527163e9b9d0d47f973.tar.gz
Fix UNAVAILABLE while having delivery problem from finished shard (#17723)
-rw-r--r--ydb/core/kqp/runtime/kqp_write_actor.cpp49
1 files changed, 26 insertions, 23 deletions
diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp
index 00e93fb7aa8..2b23a2aab57 100644
--- a/ydb/core/kqp/runtime/kqp_write_actor.cpp
+++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp
@@ -1033,10 +1033,11 @@ public:
void Handle(TEvPipeCache::TEvDeliveryProblem::TPtr& ev) {
CA_LOG_W("TEvDeliveryProblem was received from tablet: " << ev->Get()->TabletId);
+ const auto state = TxManager->GetState(ev->Get()->TabletId);
if (InconsistentTx) {
RetryShard(ev->Get()->TabletId, std::nullopt);
- } else if ((TxManager->GetState(ev->Get()->TabletId) == IKqpTransactionManager::PREPARED
- || TxManager->GetState(ev->Get()->TabletId) == IKqpTransactionManager::EXECUTING)
+ } else if ((state == IKqpTransactionManager::PREPARED
+ || state == IKqpTransactionManager::EXECUTING)
&& TxManager->ShouldReattach(ev->Get()->TabletId, TlsActivationContext->Now())) {
// Disconnected while waiting for other shards to prepare
auto& reattachState = TxManager->GetReattachState(ev->Get()->TabletId);
@@ -1044,28 +1045,30 @@ public:
<< reattachState.ReattachInfo.Delay << ")");
Schedule(reattachState.ReattachInfo.Delay, new TEvPrivate::TEvReattachToShard(ev->Get()->TabletId));
+ } else if (state == IKqpTransactionManager::EXECUTING) {
+ TxManager->SetError(ev->Get()->TabletId);
+ RuntimeError(
+ NYql::NDqProto::StatusIds::UNDETERMINED,
+ NYql::TIssuesIds::KIKIMR_OPERATION_STATE_UNKNOWN,
+ TStringBuilder()
+ << "State of operation is unknown. "
+ << "Error writing to table `" << TablePath << "`"
+ << ". Transaction state unknown for tablet " << ev->Get()->TabletId << ".");
+ return;
+ } else if (state == IKqpTransactionManager::PROCESSING
+ || state == IKqpTransactionManager::PREPARING
+ || state == IKqpTransactionManager::PREPARED) {
+ TxManager->SetError(ev->Get()->TabletId);
+ RuntimeError(
+ NYql::NDqProto::StatusIds::UNAVAILABLE,
+ NYql::TIssuesIds::KIKIMR_TEMPORARILY_UNAVAILABLE,
+ TStringBuilder()
+ << "Kikimr cluster or one of its subsystems was unavailable. "
+ << "Error writing to table `" << TablePath << "`"
+ << ": can't deliver message to tablet " << ev->Get()->TabletId << ".");
+ return;
} else {
- if (TxManager->GetState(ev->Get()->TabletId) == IKqpTransactionManager::EXECUTING) {
- TxManager->SetError(ev->Get()->TabletId);
- RuntimeError(
- NYql::NDqProto::StatusIds::UNDETERMINED,
- NYql::TIssuesIds::KIKIMR_OPERATION_STATE_UNKNOWN,
- TStringBuilder()
- << "State of operation is unknown. "
- << "Error writing to table `" << TablePath << "`"
- << ". Transaction state unknown for tablet " << ev->Get()->TabletId << ".");
- return;
- } else {
- TxManager->SetError(ev->Get()->TabletId);
- RuntimeError(
- NYql::NDqProto::StatusIds::UNAVAILABLE,
- NYql::TIssuesIds::KIKIMR_TEMPORARILY_UNAVAILABLE,
- TStringBuilder()
- << "Kikimr cluster or one of its subsystems was unavailable. "
- << "Error writing to table `" << TablePath << "`"
- << ": can't deliver message to tablet " << ev->Get()->TabletId << ".");
- return;
- }
+ AFL_ENSURE(state == IKqpTransactionManager::FINISHED || state == IKqpTransactionManager::ERROR);
}
}