aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoralexvru <alexvru@ydb.tech>2023-08-29 08:09:13 +0300
committeralexvru <alexvru@ydb.tech>2023-08-29 08:43:06 +0300
commit5ed85ba839150bd1a4580f0b989859a899559535 (patch)
tree313971349ce68b3bb36615c470ba46aba2a98ae3
parent700daca8b71c578d994301fde50b8c16ce5605a5 (diff)
downloadydb-5ed85ba839150bd1a4580f0b989859a899559535.tar.gz
Fix deadlock in get_blob monitoring page KIKIMR-19185
-rw-r--r--ydb/core/blobstorage/other/mon_get_blob_page.cpp192
-rw-r--r--ydb/core/blobstorage/other/mon_get_blob_page.h6
-rw-r--r--ydb/core/driver_lib/run/run.cpp11
3 files changed, 94 insertions, 115 deletions
diff --git a/ydb/core/blobstorage/other/mon_get_blob_page.cpp b/ydb/core/blobstorage/other/mon_get_blob_page.cpp
index 8814ebd17f..78cadf2911 100644
--- a/ydb/core/blobstorage/other/mon_get_blob_page.cpp
+++ b/ydb/core/blobstorage/other/mon_get_blob_page.cpp
@@ -7,93 +7,32 @@ namespace NKikimr {
namespace {
- class TMonGetBlobPage
- : public NMonitoring::IMonPage
+ class TMonGetBlobActor
+ : public TActor<TMonGetBlobActor>
{
- struct TRequestResult {
- TLogoBlobID LogoBlobId;
- NKikimrProto::EReplyStatus Status;
- TString Buffer;
- TString DebugInfo;
- TVector<TEvBlobStorage::TEvGetResult::TPartMapItem> PartMap;
- };
-
- class TRequestActor
- : public TActorBootstrapped<TRequestActor>
- {
- const ui32 GroupId;
- const TLogoBlobID LogoBlobId;
- const bool CollectDebugInfo;
- NThreading::TPromise<TRequestResult> Promise;
-
- public:
- static constexpr NKikimrServices::TActivity::EType ActorActivityType() {
- return NKikimrServices::TActivity::BS_GET_ACTOR;
- }
-
- TRequestActor(ui32 groupId, const TLogoBlobID& logoBlobId, bool collectDebugInfo,
- NThreading::TPromise<TRequestResult> promise)
- : GroupId(groupId)
- , LogoBlobId(logoBlobId)
- , CollectDebugInfo(collectDebugInfo)
- , Promise(promise)
- {}
-
- void Bootstrap(const TActorContext& ctx) {
- auto query = std::make_unique<TEvBlobStorage::TEvGet>(LogoBlobId, 0, 0, TInstant::Max(),
- NKikimrBlobStorage::AsyncRead);
- query->CollectDebugInfo = CollectDebugInfo;
- query->ReportDetailedPartMap = true;
- SendToBSProxy(ctx, GroupId, query.release());
- Become(&TRequestActor::StateFunc);
- }
-
- void Handle(TEvBlobStorage::TEvGetResult::TPtr& ev, const TActorContext& ctx) {
- TEvBlobStorage::TEvGetResult *msg = ev->Get();
-
- TRequestResult result;
- if (msg->Status != NKikimrProto::OK) {
- result.Status = msg->Status;
- } else if (msg->ResponseSz != 1) {
- result.Status = NKikimrProto::ERROR;
- } else {
- result.LogoBlobId = msg->Responses[0].Id;
- result.Status = msg->Responses[0].Status;
- result.Buffer = msg->Responses[0].Buffer.ConvertToString();
- result.PartMap = std::move(msg->Responses[0].PartMap);
- }
- result.DebugInfo = std::move(msg->DebugInfo);
-
- Promise.SetValue(result);
- Die(ctx);
- }
-
- STRICT_STFUNC(StateFunc,
- HFunc(TEvBlobStorage::TEvGetResult, Handle)
- )
- };
-
- private:
- TActorSystem *ActorSystem;
+ ui64 LastCookie = 0;
+ THashMap<ui64, std::tuple<TActorId, ui64, int, ui32, bool, bool>> RequestsInFlight;
public:
- TMonGetBlobPage(const TString& path, TActorSystem *actorSystem)
- : IMonPage(path)
- , ActorSystem(actorSystem)
+ TMonGetBlobActor()
+ : TActor(&TThis::StateFunc)
{}
- void Output(NMonitoring::IMonHttpRequest& request) override {
- IOutputStream& out = request.Output();
-
+ void Handle(NMon::TEvHttpInfo::TPtr ev) {
// parse HTTP request
- const TCgiParameters& params = request.GetParams();
+ const TCgiParameters& params = ev->Get()->Request.GetParams();
auto generateError = [&](const TString& msg) {
+ TStringStream out;
+
out << "HTTP/1.1 400 Bad Request\r\n"
<< "Content-Type: text/plain\r\n"
<< "Connection: close\r\n"
<< "\r\n"
<< msg << "\r\n";
+
+ Send(ev->Sender, new NMon::TEvHttpInfoRes(out.Str(), ev->Get()->SubRequestId, NMon::TEvHttpInfoRes::Custom), 0,
+ ev->Cookie);
};
ui32 groupId = 0;
@@ -139,30 +78,48 @@ namespace {
}
}
- // create promise & future to obtain query result
- auto promise = NThreading::NewPromise<TRequestResult>();
- auto future = promise.GetFuture();
+ const ui64 cookie = ++LastCookie;
+ auto query = std::make_unique<TEvBlobStorage::TEvGet>(logoBlobId, 0, 0, TInstant::Max(),
+ NKikimrBlobStorage::AsyncRead);
+ query->CollectDebugInfo = collectDebugInfo;
+ query->ReportDetailedPartMap = true;
+ SendToBSProxy(SelfId(), groupId, query.release(), cookie);
+ RequestsInFlight[cookie] = {ev->Sender, ev->Cookie, ev->Get()->SubRequestId, groupId, binary, collectDebugInfo};
+ }
- // register and start actor
- ActorSystem->Register(new TRequestActor(groupId, logoBlobId, collectDebugInfo, promise));
+ void Handle(TEvBlobStorage::TEvGetResult::TPtr ev) {
+ const auto it = RequestsInFlight.find(ev->Cookie);
+ Y_VERIFY(it != RequestsInFlight.end());
+ const auto& [sender, senderCookie, subRequestId, groupId, binary, collectDebugInfo] = it->second;
- // wait for query to complete
- future.Wait();
+ TEvBlobStorage::TEvGetResult& msg = *ev->Get();
+ if (msg.Status == NKikimrProto::OK && msg.ResponseSz != 1) {
+ msg.Status = NKikimrProto::ERROR;
+ }
+
+ NKikimrProto::EReplyStatus status = msg.Status;
+ TLogoBlobID id;
+ TString buffer;
- // obtain result
- const TRequestResult& result = future.GetValue();
+ if (msg.ResponseSz == 1) {
+ status = msg.Responses[0].Status;
+ id = msg.Responses[0].Id;
+ buffer = msg.Responses[0].Buffer.ConvertToString();
+ }
+
+ TStringStream out;
if (binary) {
// generate data stream depending on result status
- if (result.Status == NKikimrProto::OK) {
+ if (status == NKikimrProto::OK) {
out << "HTTP/1.1 200 OK\r\n"
<< "Content-Type: application/octet-stream\r\n"
- << "Content-Disposition: attachment; filename=\"" << result.LogoBlobId.ToString() << "\"\r\n"
+ << "Content-Disposition: attachment; filename=\"" << id.ToString() << "\"\r\n"
<< "Connection: close\r\n"
<< "\r\n";
- out.Write(result.Buffer);
+ out.Write(buffer);
} else {
- if (result.Status == NKikimrProto::NODATA) {
+ if (status == NKikimrProto::NODATA) {
out << "HTTP/1.1 204 No Content\r\n";
} else {
out << "HTTP/1.1 500 Error\r\n";
@@ -172,7 +129,7 @@ namespace {
<< "\r\n";
out << "{\n"
- << " \"Status\": \"" << NKikimrProto::EReplyStatus_Name(result.Status) << "\"\n"
+ << " \"Status\": \"" << NKikimrProto::EReplyStatus_Name(status) << "\"\n"
<< "}\n";
}
} else {
@@ -204,14 +161,14 @@ namespace {
DIV() {
out << "LogoBlobId: ";
STRONG() {
- out << logoBlobId.ToString();
+ out << id.ToString();
}
}
DIV() {
out << "Status: ";
STRONG() {
- out << NKikimrProto::EReplyStatus_Name(result.Status);
+ out << NKikimrProto::EReplyStatus_Name(status);
}
}
@@ -220,7 +177,7 @@ namespace {
out << "Debug Info:";
DIV() {
out << "<pre><small>";
- out << result.DebugInfo;
+ out << msg.DebugInfo;
out << "</small></pre>";
}
}
@@ -241,27 +198,29 @@ namespace {
}
}
TABLEBODY() {
- for (const auto& item : result.PartMap) {
- auto prefix = [&] {
- TABLED() { out << item.DiskOrderNumber; }
- TABLED() { out << item.PartIdRequested; }
- TABLED() { out << item.RequestIndex; }
- TABLED() { out << item.ResponseIndex; }
- };
- if (item.Status) {
- for (const auto& x : item.Status) {
+ if (msg.ResponseSz == 1) {
+ for (const auto& item : msg.Responses[0].PartMap) {
+ auto prefix = [&] {
+ TABLED() { out << item.DiskOrderNumber; }
+ TABLED() { out << item.PartIdRequested; }
+ TABLED() { out << item.RequestIndex; }
+ TABLED() { out << item.ResponseIndex; }
+ };
+ if (item.Status) {
+ for (const auto& x : item.Status) {
+ TABLER() {
+ prefix();
+ TABLED() { out << x.first; }
+ TABLED() { out << NKikimrProto::EReplyStatus_Name(x.second); }
+ }
+ }
+ } else {
TABLER() {
prefix();
- TABLED() { out << x.first; }
- TABLED() { out << NKikimrProto::EReplyStatus_Name(x.second); }
+ TABLED() { out << "-"; }
+ TABLED() { out << "-"; }
}
}
- } else {
- TABLER() {
- prefix();
- TABLED() { out << "-"; }
- TABLED() { out << "-"; }
- }
}
}
}
@@ -269,16 +228,15 @@ namespace {
}
}
- if (result.Status == NKikimrProto::OK) {
+ if (status == NKikimrProto::OK) {
DIV() {
TCgiParameters params;
- params.InsertEscaped("blob", logoBlobId.ToString());
+ params.InsertEscaped("blob", id.ToString());
params.InsertEscaped("groupId", ToString(groupId));
params.InsertEscaped("binary", "1");
out << "<a href=\"?" << params() << "\">Data</a>";
DIV() {
out << "<pre><small>";
- const TString& buffer = result.Buffer;
const size_t rowSize = 64;
for (size_t offset = 0; offset < buffer.size(); offset += rowSize) {
out << Sprintf("0x%06zx | ", offset);
@@ -319,13 +277,21 @@ namespace {
}
}
}
+
+ Send(sender, new NMon::TEvHttpInfoRes(out.Str(), subRequestId, NMon::TEvHttpInfoRes::Custom), 0, senderCookie);
+ RequestsInFlight.erase(it);
}
+
+ STRICT_STFUNC(StateFunc,
+ hFunc(NMon::TEvHttpInfo, Handle);
+ hFunc(TEvBlobStorage::TEvGetResult, Handle);
+ )
};
} // anon
-NMonitoring::IMonPage *CreateMonGetBlobPage(const TString& path, TActorSystem *actorSystem) {
- return new TMonGetBlobPage(path, actorSystem);
+IActor *CreateMonGetBlobActor() {
+ return new TMonGetBlobActor;
}
} // NKikimr
diff --git a/ydb/core/blobstorage/other/mon_get_blob_page.h b/ydb/core/blobstorage/other/mon_get_blob_page.h
index d822023ab7..3c5ca87234 100644
--- a/ydb/core/blobstorage/other/mon_get_blob_page.h
+++ b/ydb/core/blobstorage/other/mon_get_blob_page.h
@@ -6,6 +6,10 @@
namespace NKikimr {
- NMonitoring::IMonPage *CreateMonGetBlobPage(const TString& path, TActorSystem *actorSystem);
+ inline TActorId MakeMonGetBlobId() {
+ return TActorId(0, TStringBuf("get_blob_mon", 12));
+ }
+
+ IActor *CreateMonGetBlobActor();
} // NKikimr
diff --git a/ydb/core/driver_lib/run/run.cpp b/ydb/core/driver_lib/run/run.cpp
index c088c6a6b9..b8b99c8ecc 100644
--- a/ydb/core/driver_lib/run/run.cpp
+++ b/ydb/core/driver_lib/run/run.cpp
@@ -1219,6 +1219,8 @@ void TKikimrRunner::InitializeActorSystem(
TMailboxType::HTSwap, AppData->SystemPoolId));
setup->LocalServices.emplace_back(MakeMonVDiskStreamId(), TActorSetupCmd(CreateMonVDiskStreamActor(),
TMailboxType::HTSwap, AppData->SystemPoolId));
+ setup->LocalServices.emplace_back(MakeMonGetBlobId(), TActorSetupCmd(CreateMonGetBlobActor(),
+ TMailboxType::HTSwap, AppData->SystemPoolId));
}
ApplyLogSettings(runConfig);
@@ -1266,7 +1268,14 @@ void TKikimrRunner::InitializeActorSystem(
MakeBlobStorageFailureInjectionID(runConfig.NodeId));
}
- Monitoring->Register(CreateMonGetBlobPage("get_blob", ActorSystem.Get()));
+ Monitoring->RegisterActorPage(
+ nullptr,
+ "get_blob",
+ TString(),
+ false,
+ ActorSystem.Get(),
+ MakeMonGetBlobId());
+
Monitoring->Register(CreateMonBlobRangePage("blob_range", ActorSystem.Get()));
Monitoring->RegisterActorPage(