diff options
author | alexvru <alexvru@ydb.tech> | 2023-08-29 08:09:13 +0300 |
---|---|---|
committer | alexvru <alexvru@ydb.tech> | 2023-08-29 08:43:06 +0300 |
commit | 5ed85ba839150bd1a4580f0b989859a899559535 (patch) | |
tree | 313971349ce68b3bb36615c470ba46aba2a98ae3 | |
parent | 700daca8b71c578d994301fde50b8c16ce5605a5 (diff) | |
download | ydb-5ed85ba839150bd1a4580f0b989859a899559535.tar.gz |
Fix deadlock in get_blob monitoring page KIKIMR-19185
-rw-r--r-- | ydb/core/blobstorage/other/mon_get_blob_page.cpp | 192 | ||||
-rw-r--r-- | ydb/core/blobstorage/other/mon_get_blob_page.h | 6 | ||||
-rw-r--r-- | ydb/core/driver_lib/run/run.cpp | 11 |
3 files changed, 94 insertions, 115 deletions
diff --git a/ydb/core/blobstorage/other/mon_get_blob_page.cpp b/ydb/core/blobstorage/other/mon_get_blob_page.cpp index 8814ebd17f..78cadf2911 100644 --- a/ydb/core/blobstorage/other/mon_get_blob_page.cpp +++ b/ydb/core/blobstorage/other/mon_get_blob_page.cpp @@ -7,93 +7,32 @@ namespace NKikimr { namespace { - class TMonGetBlobPage - : public NMonitoring::IMonPage + class TMonGetBlobActor + : public TActor<TMonGetBlobActor> { - struct TRequestResult { - TLogoBlobID LogoBlobId; - NKikimrProto::EReplyStatus Status; - TString Buffer; - TString DebugInfo; - TVector<TEvBlobStorage::TEvGetResult::TPartMapItem> PartMap; - }; - - class TRequestActor - : public TActorBootstrapped<TRequestActor> - { - const ui32 GroupId; - const TLogoBlobID LogoBlobId; - const bool CollectDebugInfo; - NThreading::TPromise<TRequestResult> Promise; - - public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::BS_GET_ACTOR; - } - - TRequestActor(ui32 groupId, const TLogoBlobID& logoBlobId, bool collectDebugInfo, - NThreading::TPromise<TRequestResult> promise) - : GroupId(groupId) - , LogoBlobId(logoBlobId) - , CollectDebugInfo(collectDebugInfo) - , Promise(promise) - {} - - void Bootstrap(const TActorContext& ctx) { - auto query = std::make_unique<TEvBlobStorage::TEvGet>(LogoBlobId, 0, 0, TInstant::Max(), - NKikimrBlobStorage::AsyncRead); - query->CollectDebugInfo = CollectDebugInfo; - query->ReportDetailedPartMap = true; - SendToBSProxy(ctx, GroupId, query.release()); - Become(&TRequestActor::StateFunc); - } - - void Handle(TEvBlobStorage::TEvGetResult::TPtr& ev, const TActorContext& ctx) { - TEvBlobStorage::TEvGetResult *msg = ev->Get(); - - TRequestResult result; - if (msg->Status != NKikimrProto::OK) { - result.Status = msg->Status; - } else if (msg->ResponseSz != 1) { - result.Status = NKikimrProto::ERROR; - } else { - result.LogoBlobId = msg->Responses[0].Id; - result.Status = msg->Responses[0].Status; - result.Buffer = msg->Responses[0].Buffer.ConvertToString(); - result.PartMap = std::move(msg->Responses[0].PartMap); - } - result.DebugInfo = std::move(msg->DebugInfo); - - Promise.SetValue(result); - Die(ctx); - } - - STRICT_STFUNC(StateFunc, - HFunc(TEvBlobStorage::TEvGetResult, Handle) - ) - }; - - private: - TActorSystem *ActorSystem; + ui64 LastCookie = 0; + THashMap<ui64, std::tuple<TActorId, ui64, int, ui32, bool, bool>> RequestsInFlight; public: - TMonGetBlobPage(const TString& path, TActorSystem *actorSystem) - : IMonPage(path) - , ActorSystem(actorSystem) + TMonGetBlobActor() + : TActor(&TThis::StateFunc) {} - void Output(NMonitoring::IMonHttpRequest& request) override { - IOutputStream& out = request.Output(); - + void Handle(NMon::TEvHttpInfo::TPtr ev) { // parse HTTP request - const TCgiParameters& params = request.GetParams(); + const TCgiParameters& params = ev->Get()->Request.GetParams(); auto generateError = [&](const TString& msg) { + TStringStream out; + out << "HTTP/1.1 400 Bad Request\r\n" << "Content-Type: text/plain\r\n" << "Connection: close\r\n" << "\r\n" << msg << "\r\n"; + + Send(ev->Sender, new NMon::TEvHttpInfoRes(out.Str(), ev->Get()->SubRequestId, NMon::TEvHttpInfoRes::Custom), 0, + ev->Cookie); }; ui32 groupId = 0; @@ -139,30 +78,48 @@ namespace { } } - // create promise & future to obtain query result - auto promise = NThreading::NewPromise<TRequestResult>(); - auto future = promise.GetFuture(); + const ui64 cookie = ++LastCookie; + auto query = std::make_unique<TEvBlobStorage::TEvGet>(logoBlobId, 0, 0, TInstant::Max(), + NKikimrBlobStorage::AsyncRead); + query->CollectDebugInfo = collectDebugInfo; + query->ReportDetailedPartMap = true; + SendToBSProxy(SelfId(), groupId, query.release(), cookie); + RequestsInFlight[cookie] = {ev->Sender, ev->Cookie, ev->Get()->SubRequestId, groupId, binary, collectDebugInfo}; + } - // register and start actor - ActorSystem->Register(new TRequestActor(groupId, logoBlobId, collectDebugInfo, promise)); + void Handle(TEvBlobStorage::TEvGetResult::TPtr ev) { + const auto it = RequestsInFlight.find(ev->Cookie); + Y_VERIFY(it != RequestsInFlight.end()); + const auto& [sender, senderCookie, subRequestId, groupId, binary, collectDebugInfo] = it->second; - // wait for query to complete - future.Wait(); + TEvBlobStorage::TEvGetResult& msg = *ev->Get(); + if (msg.Status == NKikimrProto::OK && msg.ResponseSz != 1) { + msg.Status = NKikimrProto::ERROR; + } + + NKikimrProto::EReplyStatus status = msg.Status; + TLogoBlobID id; + TString buffer; - // obtain result - const TRequestResult& result = future.GetValue(); + if (msg.ResponseSz == 1) { + status = msg.Responses[0].Status; + id = msg.Responses[0].Id; + buffer = msg.Responses[0].Buffer.ConvertToString(); + } + + TStringStream out; if (binary) { // generate data stream depending on result status - if (result.Status == NKikimrProto::OK) { + if (status == NKikimrProto::OK) { out << "HTTP/1.1 200 OK\r\n" << "Content-Type: application/octet-stream\r\n" - << "Content-Disposition: attachment; filename=\"" << result.LogoBlobId.ToString() << "\"\r\n" + << "Content-Disposition: attachment; filename=\"" << id.ToString() << "\"\r\n" << "Connection: close\r\n" << "\r\n"; - out.Write(result.Buffer); + out.Write(buffer); } else { - if (result.Status == NKikimrProto::NODATA) { + if (status == NKikimrProto::NODATA) { out << "HTTP/1.1 204 No Content\r\n"; } else { out << "HTTP/1.1 500 Error\r\n"; @@ -172,7 +129,7 @@ namespace { << "\r\n"; out << "{\n" - << " \"Status\": \"" << NKikimrProto::EReplyStatus_Name(result.Status) << "\"\n" + << " \"Status\": \"" << NKikimrProto::EReplyStatus_Name(status) << "\"\n" << "}\n"; } } else { @@ -204,14 +161,14 @@ namespace { DIV() { out << "LogoBlobId: "; STRONG() { - out << logoBlobId.ToString(); + out << id.ToString(); } } DIV() { out << "Status: "; STRONG() { - out << NKikimrProto::EReplyStatus_Name(result.Status); + out << NKikimrProto::EReplyStatus_Name(status); } } @@ -220,7 +177,7 @@ namespace { out << "Debug Info:"; DIV() { out << "<pre><small>"; - out << result.DebugInfo; + out << msg.DebugInfo; out << "</small></pre>"; } } @@ -241,27 +198,29 @@ namespace { } } TABLEBODY() { - for (const auto& item : result.PartMap) { - auto prefix = [&] { - TABLED() { out << item.DiskOrderNumber; } - TABLED() { out << item.PartIdRequested; } - TABLED() { out << item.RequestIndex; } - TABLED() { out << item.ResponseIndex; } - }; - if (item.Status) { - for (const auto& x : item.Status) { + if (msg.ResponseSz == 1) { + for (const auto& item : msg.Responses[0].PartMap) { + auto prefix = [&] { + TABLED() { out << item.DiskOrderNumber; } + TABLED() { out << item.PartIdRequested; } + TABLED() { out << item.RequestIndex; } + TABLED() { out << item.ResponseIndex; } + }; + if (item.Status) { + for (const auto& x : item.Status) { + TABLER() { + prefix(); + TABLED() { out << x.first; } + TABLED() { out << NKikimrProto::EReplyStatus_Name(x.second); } + } + } + } else { TABLER() { prefix(); - TABLED() { out << x.first; } - TABLED() { out << NKikimrProto::EReplyStatus_Name(x.second); } + TABLED() { out << "-"; } + TABLED() { out << "-"; } } } - } else { - TABLER() { - prefix(); - TABLED() { out << "-"; } - TABLED() { out << "-"; } - } } } } @@ -269,16 +228,15 @@ namespace { } } - if (result.Status == NKikimrProto::OK) { + if (status == NKikimrProto::OK) { DIV() { TCgiParameters params; - params.InsertEscaped("blob", logoBlobId.ToString()); + params.InsertEscaped("blob", id.ToString()); params.InsertEscaped("groupId", ToString(groupId)); params.InsertEscaped("binary", "1"); out << "<a href=\"?" << params() << "\">Data</a>"; DIV() { out << "<pre><small>"; - const TString& buffer = result.Buffer; const size_t rowSize = 64; for (size_t offset = 0; offset < buffer.size(); offset += rowSize) { out << Sprintf("0x%06zx | ", offset); @@ -319,13 +277,21 @@ namespace { } } } + + Send(sender, new NMon::TEvHttpInfoRes(out.Str(), subRequestId, NMon::TEvHttpInfoRes::Custom), 0, senderCookie); + RequestsInFlight.erase(it); } + + STRICT_STFUNC(StateFunc, + hFunc(NMon::TEvHttpInfo, Handle); + hFunc(TEvBlobStorage::TEvGetResult, Handle); + ) }; } // anon -NMonitoring::IMonPage *CreateMonGetBlobPage(const TString& path, TActorSystem *actorSystem) { - return new TMonGetBlobPage(path, actorSystem); +IActor *CreateMonGetBlobActor() { + return new TMonGetBlobActor; } } // NKikimr diff --git a/ydb/core/blobstorage/other/mon_get_blob_page.h b/ydb/core/blobstorage/other/mon_get_blob_page.h index d822023ab7..3c5ca87234 100644 --- a/ydb/core/blobstorage/other/mon_get_blob_page.h +++ b/ydb/core/blobstorage/other/mon_get_blob_page.h @@ -6,6 +6,10 @@ namespace NKikimr { - NMonitoring::IMonPage *CreateMonGetBlobPage(const TString& path, TActorSystem *actorSystem); + inline TActorId MakeMonGetBlobId() { + return TActorId(0, TStringBuf("get_blob_mon", 12)); + } + + IActor *CreateMonGetBlobActor(); } // NKikimr diff --git a/ydb/core/driver_lib/run/run.cpp b/ydb/core/driver_lib/run/run.cpp index c088c6a6b9..b8b99c8ecc 100644 --- a/ydb/core/driver_lib/run/run.cpp +++ b/ydb/core/driver_lib/run/run.cpp @@ -1219,6 +1219,8 @@ void TKikimrRunner::InitializeActorSystem( TMailboxType::HTSwap, AppData->SystemPoolId)); setup->LocalServices.emplace_back(MakeMonVDiskStreamId(), TActorSetupCmd(CreateMonVDiskStreamActor(), TMailboxType::HTSwap, AppData->SystemPoolId)); + setup->LocalServices.emplace_back(MakeMonGetBlobId(), TActorSetupCmd(CreateMonGetBlobActor(), + TMailboxType::HTSwap, AppData->SystemPoolId)); } ApplyLogSettings(runConfig); @@ -1266,7 +1268,14 @@ void TKikimrRunner::InitializeActorSystem( MakeBlobStorageFailureInjectionID(runConfig.NodeId)); } - Monitoring->Register(CreateMonGetBlobPage("get_blob", ActorSystem.Get())); + Monitoring->RegisterActorPage( + nullptr, + "get_blob", + TString(), + false, + ActorSystem.Get(), + MakeMonGetBlobId()); + Monitoring->Register(CreateMonBlobRangePage("blob_range", ActorSystem.Get())); Monitoring->RegisterActorPage( |